ASR_BASE

Change-Id: Icf3719cc0afe3eeb3edc7fa80a2eb5199ca9dda1
diff --git a/target/linux/bcm63xx/patches-5.4/045-v5.12-bcm63xx_enet-convert-to-build_skb.patch b/target/linux/bcm63xx/patches-5.4/045-v5.12-bcm63xx_enet-convert-to-build_skb.patch
new file mode 100644
index 0000000..80d44ec
--- /dev/null
+++ b/target/linux/bcm63xx/patches-5.4/045-v5.12-bcm63xx_enet-convert-to-build_skb.patch
@@ -0,0 +1,347 @@
+From d27de0ef5ef995df2cc5f5c006c0efcf0a62b6af Mon Sep 17 00:00:00 2001
+From: Sieng Piaw Liew <liew.s.piaw@gmail.com>
+Date: Wed, 6 Jan 2021 22:42:07 +0800
+Subject: [PATCH 6/7] bcm63xx_enet: convert to build_skb
+
+We can increase the efficiency of rx path by using buffers to receive
+packets then build SKBs around them just before passing into the network
+stack. In contrast, preallocating SKBs too early reduces CPU cache
+efficiency.
+
+Check if we're in NAPI context when refilling RX. Normally we're almost
+always running in NAPI context. Dispatch to napi_alloc_frag directly
+instead of relying on netdev_alloc_frag which does the same but
+with the overhead of local_bh_disable/enable.
+
+Tested on BCM6328 320 MHz and iperf3 -M 512 to measure packet/sec
+performance. Included netif_receive_skb_list and NET_IP_ALIGN
+optimizations.
+
+Before:
+[ ID] Interval           Transfer     Bandwidth       Retr
+[  4]   0.00-10.00  sec  49.9 MBytes  41.9 Mbits/sec  197         sender
+[  4]   0.00-10.00  sec  49.3 MBytes  41.3 Mbits/sec            receiver
+
+After:
+[ ID] Interval           Transfer     Bandwidth       Retr
+[  4]   0.00-30.00  sec   171 MBytes  47.8 Mbits/sec  272         sender
+[  4]   0.00-30.00  sec   170 MBytes  47.6 Mbits/sec            receiver
+
+Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bcm63xx_enet.c | 111 ++++++++++---------
+ drivers/net/ethernet/broadcom/bcm63xx_enet.h |  14 ++-
+ 2 files changed, 71 insertions(+), 54 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
++++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+@@ -221,7 +221,7 @@ static void bcm_enet_mdio_write_mii(stru
+ /*
+  * refill rx queue
+  */
+-static int bcm_enet_refill_rx(struct net_device *dev)
++static int bcm_enet_refill_rx(struct net_device *dev, bool napi_mode)
+ {
+ 	struct bcm_enet_priv *priv;
+ 
+@@ -229,29 +229,29 @@ static int bcm_enet_refill_rx(struct net
+ 
+ 	while (priv->rx_desc_count < priv->rx_ring_size) {
+ 		struct bcm_enet_desc *desc;
+-		struct sk_buff *skb;
+-		dma_addr_t p;
+ 		int desc_idx;
+ 		u32 len_stat;
+ 
+ 		desc_idx = priv->rx_dirty_desc;
+ 		desc = &priv->rx_desc_cpu[desc_idx];
+ 
+-		if (!priv->rx_skb[desc_idx]) {
+-			if (priv->enet_is_sw)
+-				skb = netdev_alloc_skb_ip_align(dev, priv->rx_skb_size);
++		if (!priv->rx_buf[desc_idx]) {
++			void *buf;
++
++			if (likely(napi_mode))
++				buf = napi_alloc_frag(priv->rx_frag_size);
+ 			else
+-				skb = netdev_alloc_skb(dev, priv->rx_skb_size);
+-			if (!skb)
++				buf = netdev_alloc_frag(priv->rx_frag_size);
++			if (unlikely(!buf))
+ 				break;
+-			priv->rx_skb[desc_idx] = skb;
+-			p = dma_map_single(&priv->pdev->dev, skb->data,
+-					   priv->rx_skb_size,
+-					   DMA_FROM_DEVICE);
+-			desc->address = p;
++			priv->rx_buf[desc_idx] = buf;
++			desc->address = dma_map_single(&priv->pdev->dev,
++						       buf + priv->rx_buf_offset,
++						       priv->rx_buf_size,
++						       DMA_FROM_DEVICE);
+ 		}
+ 
+-		len_stat = priv->rx_skb_size << DMADESC_LENGTH_SHIFT;
++		len_stat = priv->rx_buf_size << DMADESC_LENGTH_SHIFT;
+ 		len_stat |= DMADESC_OWNER_MASK;
+ 		if (priv->rx_dirty_desc == priv->rx_ring_size - 1) {
+ 			len_stat |= (DMADESC_WRAP_MASK >> priv->dma_desc_shift);
+@@ -291,7 +291,7 @@ static void bcm_enet_refill_rx_timer(str
+ 	struct net_device *dev = priv->net_dev;
+ 
+ 	spin_lock(&priv->rx_lock);
+-	bcm_enet_refill_rx(dev);
++	bcm_enet_refill_rx(dev, false);
+ 	spin_unlock(&priv->rx_lock);
+ }
+ 
+@@ -321,6 +321,7 @@ static int bcm_enet_receive_queue(struct
+ 		int desc_idx;
+ 		u32 len_stat;
+ 		unsigned int len;
++		void *buf;
+ 
+ 		desc_idx = priv->rx_curr_desc;
+ 		desc = &priv->rx_desc_cpu[desc_idx];
+@@ -366,16 +367,14 @@ static int bcm_enet_receive_queue(struct
+ 		}
+ 
+ 		/* valid packet */
+-		skb = priv->rx_skb[desc_idx];
++		buf = priv->rx_buf[desc_idx];
+ 		len = (len_stat & DMADESC_LENGTH_MASK) >> DMADESC_LENGTH_SHIFT;
+ 		/* don't include FCS */
+ 		len -= 4;
+ 
+ 		if (len < copybreak) {
+-			struct sk_buff *nskb;
+-
+-			nskb = napi_alloc_skb(&priv->napi, len);
+-			if (!nskb) {
++			skb = napi_alloc_skb(&priv->napi, len);
++			if (unlikely(!skb)) {
+ 				/* forget packet, just rearm desc */
+ 				dev->stats.rx_dropped++;
+ 				continue;
+@@ -383,14 +382,21 @@ static int bcm_enet_receive_queue(struct
+ 
+ 			dma_sync_single_for_cpu(kdev, desc->address,
+ 						len, DMA_FROM_DEVICE);
+-			memcpy(nskb->data, skb->data, len);
++			memcpy(skb->data, buf + priv->rx_buf_offset, len);
+ 			dma_sync_single_for_device(kdev, desc->address,
+ 						   len, DMA_FROM_DEVICE);
+-			skb = nskb;
+ 		} else {
+-			dma_unmap_single(&priv->pdev->dev, desc->address,
+-					 priv->rx_skb_size, DMA_FROM_DEVICE);
+-			priv->rx_skb[desc_idx] = NULL;
++			dma_unmap_single(kdev, desc->address,
++					 priv->rx_buf_size, DMA_FROM_DEVICE);
++			priv->rx_buf[desc_idx] = NULL;
++
++			skb = build_skb(buf, priv->rx_frag_size);
++			if (unlikely(!skb)) {
++				skb_free_frag(buf);
++				dev->stats.rx_dropped++;
++				continue;
++			}
++			skb_reserve(skb, priv->rx_buf_offset);
+ 		}
+ 
+ 		skb_put(skb, len);
+@@ -404,7 +410,7 @@ static int bcm_enet_receive_queue(struct
+ 	netif_receive_skb_list(&rx_list);
+ 
+ 	if (processed || !priv->rx_desc_count) {
+-		bcm_enet_refill_rx(dev);
++		bcm_enet_refill_rx(dev, true);
+ 
+ 		/* kick rx dma */
+ 		enet_dmac_writel(priv, priv->dma_chan_en_mask,
+@@ -861,22 +867,22 @@ static void bcm_enet_adjust_link(struct
+ 		priv->pause_tx ? "tx" : "off");
+ }
+ 
+-static void bcm_enet_free_rx_skb_ring(struct device *kdev, struct bcm_enet_priv *priv)
++static void bcm_enet_free_rx_buf_ring(struct device *kdev, struct bcm_enet_priv *priv)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < priv->rx_ring_size; i++) {
+ 		struct bcm_enet_desc *desc;
+ 
+-		if (!priv->rx_skb[i])
++		if (!priv->rx_buf[i])
+ 			continue;
+ 
+ 		desc = &priv->rx_desc_cpu[i];
+-		dma_unmap_single(kdev, desc->address, priv->rx_skb_size,
++		dma_unmap_single(kdev, desc->address, priv->rx_buf_size,
+ 				 DMA_FROM_DEVICE);
+-		kfree_skb(priv->rx_skb[i]);
++		skb_free_frag(priv->rx_buf[i]);
+ 	}
+-	kfree(priv->rx_skb);
++	kfree(priv->rx_buf);
+ }
+ 
+ /*
+@@ -988,10 +994,10 @@ static int bcm_enet_open(struct net_devi
+ 	priv->tx_curr_desc = 0;
+ 	spin_lock_init(&priv->tx_lock);
+ 
+-	/* init & fill rx ring with skbs */
+-	priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *),
++	/* init & fill rx ring with buffers */
++	priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *),
+ 			       GFP_KERNEL);
+-	if (!priv->rx_skb) {
++	if (!priv->rx_buf) {
+ 		ret = -ENOMEM;
+ 		goto out_free_tx_skb;
+ 	}
+@@ -1008,8 +1014,8 @@ static int bcm_enet_open(struct net_devi
+ 		enet_dmac_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0,
+ 				ENETDMAC_BUFALLOC, priv->rx_chan);
+ 
+-	if (bcm_enet_refill_rx(dev)) {
+-		dev_err(kdev, "cannot allocate rx skb queue\n");
++	if (bcm_enet_refill_rx(dev, false)) {
++		dev_err(kdev, "cannot allocate rx buffer queue\n");
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+@@ -1103,7 +1109,7 @@ static int bcm_enet_open(struct net_devi
+ 	return 0;
+ 
+ out:
+-	bcm_enet_free_rx_skb_ring(kdev, priv);
++	bcm_enet_free_rx_buf_ring(kdev, priv);
+ 
+ out_free_tx_skb:
+ 	kfree(priv->tx_skb);
+@@ -1209,8 +1215,8 @@ static int bcm_enet_stop(struct net_devi
+ 	/* force reclaim of all tx buffers */
+ 	bcm_enet_tx_reclaim(dev, 1);
+ 
+-	/* free the rx skb ring */
+-	bcm_enet_free_rx_skb_ring(kdev, priv);
++	/* free the rx buffer ring */
++	bcm_enet_free_rx_buf_ring(kdev, priv);
+ 
+ 	/* free remaining allocated memory */
+ 	kfree(priv->tx_skb);
+@@ -1637,9 +1643,12 @@ static int bcm_enet_change_mtu(struct ne
+ 	 * align rx buffer size to dma burst len, account FCS since
+ 	 * it's appended
+ 	 */
+-	priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN,
++	priv->rx_buf_size = ALIGN(actual_mtu + ETH_FCS_LEN,
+ 				  priv->dma_maxburst * 4);
+ 
++	priv->rx_frag_size = SKB_DATA_ALIGN(priv->rx_buf_offset + priv->rx_buf_size) +
++					    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++
+ 	dev->mtu = new_mtu;
+ 	return 0;
+ }
+@@ -1725,6 +1734,7 @@ static int bcm_enet_probe(struct platfor
+ 
+ 	priv->enet_is_sw = false;
+ 	priv->dma_maxburst = BCMENET_DMA_MAXBURST;
++	priv->rx_buf_offset = NET_SKB_PAD;
+ 
+ 	ret = bcm_enet_change_mtu(dev, dev->mtu);
+ 	if (ret)
+@@ -2142,7 +2152,7 @@ static int bcm_enetsw_open(struct net_de
+ 	priv->tx_skb = kcalloc(priv->tx_ring_size, sizeof(struct sk_buff *),
+ 			       GFP_KERNEL);
+ 	if (!priv->tx_skb) {
+-		dev_err(kdev, "cannot allocate rx skb queue\n");
++		dev_err(kdev, "cannot allocate tx skb queue\n");
+ 		ret = -ENOMEM;
+ 		goto out_free_tx_ring;
+ 	}
+@@ -2152,11 +2162,11 @@ static int bcm_enetsw_open(struct net_de
+ 	priv->tx_curr_desc = 0;
+ 	spin_lock_init(&priv->tx_lock);
+ 
+-	/* init & fill rx ring with skbs */
+-	priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *),
++	/* init & fill rx ring with buffers */
++	priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *),
+ 			       GFP_KERNEL);
+-	if (!priv->rx_skb) {
+-		dev_err(kdev, "cannot allocate rx skb queue\n");
++	if (!priv->rx_buf) {
++		dev_err(kdev, "cannot allocate rx buffer queue\n");
+ 		ret = -ENOMEM;
+ 		goto out_free_tx_skb;
+ 	}
+@@ -2203,8 +2213,8 @@ static int bcm_enetsw_open(struct net_de
+ 	enet_dma_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0,
+ 			ENETDMA_BUFALLOC_REG(priv->rx_chan));
+ 
+-	if (bcm_enet_refill_rx(dev)) {
+-		dev_err(kdev, "cannot allocate rx skb queue\n");
++	if (bcm_enet_refill_rx(dev, false)) {
++		dev_err(kdev, "cannot allocate rx buffer queue\n");
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+@@ -2303,7 +2313,7 @@ static int bcm_enetsw_open(struct net_de
+ 	return 0;
+ 
+ out:
+-	bcm_enet_free_rx_skb_ring(kdev, priv);
++	bcm_enet_free_rx_buf_ring(kdev, priv);
+ 
+ out_free_tx_skb:
+ 	kfree(priv->tx_skb);
+@@ -2353,8 +2363,8 @@ static int bcm_enetsw_stop(struct net_de
+ 	/* force reclaim of all tx buffers */
+ 	bcm_enet_tx_reclaim(dev, 1);
+ 
+-	/* free the rx skb ring */
+-	bcm_enet_free_rx_skb_ring(kdev, priv);
++	/* free the rx buffer ring */
++	bcm_enet_free_rx_buf_ring(kdev, priv);
+ 
+ 	/* free remaining allocated memory */
+ 	kfree(priv->tx_skb);
+@@ -2655,6 +2665,7 @@ static int bcm_enetsw_probe(struct platf
+ 	priv->rx_ring_size = BCMENET_DEF_RX_DESC;
+ 	priv->tx_ring_size = BCMENET_DEF_TX_DESC;
+ 	priv->dma_maxburst = BCMENETSW_DMA_MAXBURST;
++	priv->rx_buf_offset = NET_SKB_PAD + NET_IP_ALIGN;
+ 
+ 	pd = dev_get_platdata(&pdev->dev);
+ 	if (pd) {
+--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h
++++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
+@@ -230,11 +230,17 @@ struct bcm_enet_priv {
+ 	/* next dirty rx descriptor to refill */
+ 	int rx_dirty_desc;
+ 
+-	/* size of allocated rx skbs */
+-	unsigned int rx_skb_size;
++	/* size of allocated rx buffers */
++	unsigned int rx_buf_size;
+ 
+-	/* list of skb given to hw for rx */
+-	struct sk_buff **rx_skb;
++	/* allocated rx buffer offset */
++	unsigned int rx_buf_offset;
++
++	/* size of allocated rx frag */
++	unsigned int rx_frag_size;
++
++	/* list of buffer given to hw for rx */
++	void **rx_buf;
+ 
+ 	/* used when rx skb allocation failed, so we defer rx queue
+ 	 * refill */