Commit 3a1cc23a authored by Rafał Miłecki's avatar Rafał Miłecki Committed by Paolo Abeni

net: broadcom: bcm4908_enet: use build_skb()

RX code can be more efficient with the build_skb(). Allocating actual
SKB around eth packet buffer - right before passing it up - results in
a better cache usage.

Without RPS (echo 0 > rps_cpus) BCM4908 NAT masq performance "jumps"
between two speeds: ~900 Mbps and 940 Mbps (it's a 4 CPUs SoC). This
change bumps the lower speed from 905 Mb/s to 918 Mb/s (tested using
single stream iperf 2.0.5 traffic).

There are more optimizations to consider. One obvious to try is GRO
however as BCM4908 doesn't do hw csum is may actually lower performance.
Sometimes. Some early testing:

┌─────────────────────────────────┬─────────────────────┬────────────────────┐
│                                 │ netif_receive_skb() │ napi_gro_receive() │
├─────────────────────────────────┼─────────────────────┼────────────────────┤
│ netdev_alloc_skb()              │            905 Mb/s │           892 Mb/s │
│ napi_alloc_frag() + build_skb() │            918 Mb/s │           917 Mb/s │
└─────────────────────────────────┴─────────────────────┴────────────────────┘

Another ideas:
1. napi_build_skb()
2. skb_copy_from_linear_data() for small packets

Those need proper testing first though. That can be done later.
Signed-off-by: default avatarRafał Miłecki <rafal@milecki.pl>
Link: https://lore.kernel.org/r/20221025132245.22871-1-zajec5@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent c926b4c3
...@@ -36,13 +36,24 @@ ...@@ -36,13 +36,24 @@
#define ENET_MAX_ETH_OVERHEAD (ETH_HLEN + BRCM_MAX_TAG_LEN + VLAN_HLEN + \ #define ENET_MAX_ETH_OVERHEAD (ETH_HLEN + BRCM_MAX_TAG_LEN + VLAN_HLEN + \
ETH_FCS_LEN + 4) /* 32 */ ETH_FCS_LEN + 4) /* 32 */
#define ENET_RX_SKB_BUF_SIZE (NET_SKB_PAD + NET_IP_ALIGN + \
ETH_HLEN + BRCM_MAX_TAG_LEN + VLAN_HLEN + \
ENET_MTU_MAX + ETH_FCS_LEN + 4)
#define ENET_RX_SKB_BUF_ALLOC_SIZE (SKB_DATA_ALIGN(ENET_RX_SKB_BUF_SIZE) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define ENET_RX_BUF_DMA_OFFSET (NET_SKB_PAD + NET_IP_ALIGN)
#define ENET_RX_BUF_DMA_SIZE (ENET_RX_SKB_BUF_SIZE - ENET_RX_BUF_DMA_OFFSET)
struct bcm4908_enet_dma_ring_bd { struct bcm4908_enet_dma_ring_bd {
__le32 ctl; __le32 ctl;
__le32 addr; __le32 addr;
} __packed; } __packed;
struct bcm4908_enet_dma_ring_slot { struct bcm4908_enet_dma_ring_slot {
struct sk_buff *skb; union {
void *buf; /* RX */
struct sk_buff *skb; /* TX */
};
unsigned int len; unsigned int len;
dma_addr_t dma_addr; dma_addr_t dma_addr;
}; };
...@@ -260,22 +271,21 @@ static int bcm4908_enet_dma_alloc_rx_buf(struct bcm4908_enet *enet, unsigned int ...@@ -260,22 +271,21 @@ static int bcm4908_enet_dma_alloc_rx_buf(struct bcm4908_enet *enet, unsigned int
u32 tmp; u32 tmp;
int err; int err;
slot->len = ENET_MTU_MAX + ENET_MAX_ETH_OVERHEAD; slot->buf = napi_alloc_frag(ENET_RX_SKB_BUF_ALLOC_SIZE);
if (!slot->buf)
slot->skb = netdev_alloc_skb(enet->netdev, slot->len);
if (!slot->skb)
return -ENOMEM; return -ENOMEM;
slot->dma_addr = dma_map_single(dev, slot->skb->data, slot->len, DMA_FROM_DEVICE); slot->dma_addr = dma_map_single(dev, slot->buf + ENET_RX_BUF_DMA_OFFSET,
ENET_RX_BUF_DMA_SIZE, DMA_FROM_DEVICE);
err = dma_mapping_error(dev, slot->dma_addr); err = dma_mapping_error(dev, slot->dma_addr);
if (err) { if (err) {
dev_err(dev, "Failed to map DMA buffer: %d\n", err); dev_err(dev, "Failed to map DMA buffer: %d\n", err);
kfree_skb(slot->skb); skb_free_frag(slot->buf);
slot->skb = NULL; slot->buf = NULL;
return err; return err;
} }
tmp = slot->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT; tmp = ENET_RX_BUF_DMA_SIZE << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
tmp |= DMA_CTL_STATUS_OWN; tmp |= DMA_CTL_STATUS_OWN;
if (idx == enet->rx_ring.length - 1) if (idx == enet->rx_ring.length - 1)
tmp |= DMA_CTL_STATUS_WRAP; tmp |= DMA_CTL_STATUS_WRAP;
...@@ -315,11 +325,11 @@ static void bcm4908_enet_dma_uninit(struct bcm4908_enet *enet) ...@@ -315,11 +325,11 @@ static void bcm4908_enet_dma_uninit(struct bcm4908_enet *enet)
for (i = rx_ring->length - 1; i >= 0; i--) { for (i = rx_ring->length - 1; i >= 0; i--) {
slot = &rx_ring->slots[i]; slot = &rx_ring->slots[i];
if (!slot->skb) if (!slot->buf)
continue; continue;
dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_FROM_DEVICE); dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_FROM_DEVICE);
kfree_skb(slot->skb); skb_free_frag(slot->buf);
slot->skb = NULL; slot->buf = NULL;
} }
} }
...@@ -577,6 +587,7 @@ static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight) ...@@ -577,6 +587,7 @@ static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight)
while (handled < weight) { while (handled < weight) {
struct bcm4908_enet_dma_ring_bd *buf_desc; struct bcm4908_enet_dma_ring_bd *buf_desc;
struct bcm4908_enet_dma_ring_slot slot; struct bcm4908_enet_dma_ring_slot slot;
struct sk_buff *skb;
u32 ctl; u32 ctl;
int len; int len;
int err; int err;
...@@ -600,16 +611,24 @@ static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight) ...@@ -600,16 +611,24 @@ static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight)
if (len < ETH_ZLEN || if (len < ETH_ZLEN ||
(ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) { (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) {
kfree_skb(slot.skb); skb_free_frag(slot.buf);
enet->netdev->stats.rx_dropped++; enet->netdev->stats.rx_dropped++;
break; break;
} }
dma_unmap_single(dev, slot.dma_addr, slot.len, DMA_FROM_DEVICE); dma_unmap_single(dev, slot.dma_addr, ENET_RX_BUF_DMA_SIZE, DMA_FROM_DEVICE);
skb = build_skb(slot.buf, ENET_RX_SKB_BUF_ALLOC_SIZE);
if (unlikely(!skb)) {
skb_free_frag(slot.buf);
enet->netdev->stats.rx_dropped++;
break;
}
skb_reserve(skb, ENET_RX_BUF_DMA_OFFSET);
skb_put(skb, len - ETH_FCS_LEN);
skb->protocol = eth_type_trans(skb, enet->netdev);
skb_put(slot.skb, len - ETH_FCS_LEN); netif_receive_skb(skb);
slot.skb->protocol = eth_type_trans(slot.skb, enet->netdev);
netif_receive_skb(slot.skb);
enet->netdev->stats.rx_packets++; enet->netdev->stats.rx_packets++;
enet->netdev->stats.rx_bytes += len; enet->netdev->stats.rx_bytes += len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment