Commit b01867cb authored by Ayaz Abdulla's avatar Ayaz Abdulla Committed by Jeff Garzik

forcedeth: rx data path optimization

This patch optimizes the rx data paths and cleans up the code.
Signed-Off-By: default avatarAyaz Abdulla <aabdulla@nvidia.com>
Signed-off-by: default avatarJeff Garzik <jeff@garzik.org>
parent 445583b8
...@@ -1317,9 +1317,9 @@ static int nv_alloc_rx(struct net_device *dev) ...@@ -1317,9 +1317,9 @@ static int nv_alloc_rx(struct net_device *dev)
np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma); np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
wmb(); wmb();
np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
if (np->put_rx.orig++ == np->last_rx.orig) if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
np->put_rx.orig = np->first_rx.orig; np->put_rx.orig = np->first_rx.orig;
if (np->put_rx_ctx++ == np->last_rx_ctx) if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
np->put_rx_ctx = np->first_rx_ctx; np->put_rx_ctx = np->first_rx_ctx;
} else { } else {
return 1; return 1;
...@@ -1349,9 +1349,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev) ...@@ -1349,9 +1349,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF; np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
wmb(); wmb();
np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL); np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
if (np->put_rx.ex++ == np->last_rx.ex) if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
np->put_rx.ex = np->first_rx.ex; np->put_rx.ex = np->first_rx.ex;
if (np->put_rx_ctx++ == np->last_rx_ctx) if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
np->put_rx_ctx = np->first_rx_ctx; np->put_rx_ctx = np->first_rx_ctx;
} else { } else {
return 1; return 1;
...@@ -2046,24 +2046,17 @@ static int nv_rx_process(struct net_device *dev, int limit) ...@@ -2046,24 +2046,17 @@ static int nv_rx_process(struct net_device *dev, int limit)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
u32 flags; u32 flags;
u32 vlanflags = 0; u32 rx_processed_cnt = 0;
int count; struct sk_buff *skb;
int len;
for (count = 0; count < limit; ++count) {
struct sk_buff *skb;
int len;
if (np->get_rx.orig == np->put_rx.orig) while((np->get_rx.orig != np->put_rx.orig) &&
break; /* we scanned the whole ring - do not continue */ !((flags = le32_to_cpu(np->get_rx.orig->flaglen)) & NV_RX_AVAIL) &&
flags = le32_to_cpu(np->get_rx.orig->flaglen); (rx_processed_cnt++ < limit)) {
len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n", dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
dev->name, flags); dev->name, flags);
if (flags & NV_RX_AVAIL)
break; /* still owned by hardware, */
/* /*
* the packet is for us - immediately tear down the pci mapping. * the packet is for us - immediately tear down the pci mapping.
* TODO: check if a prefetch of the first cacheline improves * TODO: check if a prefetch of the first cacheline improves
...@@ -2087,99 +2080,80 @@ static int nv_rx_process(struct net_device *dev, int limit) ...@@ -2087,99 +2080,80 @@ static int nv_rx_process(struct net_device *dev, int limit)
} }
/* look at what we actually got: */ /* look at what we actually got: */
if (np->desc_ver == DESC_VER_1) { if (np->desc_ver == DESC_VER_1) {
if (!(flags & NV_RX_DESCRIPTORVALID)) { if (likely(flags & NV_RX_DESCRIPTORVALID)) {
dev_kfree_skb(skb); len = flags & LEN_MASK_V1;
goto next_pkt; if (unlikely(flags & NV_RX_ERROR)) {
} if (flags & NV_RX_ERROR4) {
len = nv_getlen(dev, skb->data, len);
if (flags & NV_RX_ERROR) { if (len < 0) {
if (flags & NV_RX_MISSEDFRAME) { np->stats.rx_errors++;
np->stats.rx_missed_errors++; dev_kfree_skb(skb);
np->stats.rx_errors++; goto next_pkt;
dev_kfree_skb(skb); }
goto next_pkt; }
} /* framing errors are soft errors */
if (flags & (NV_RX_ERROR1|NV_RX_ERROR2|NV_RX_ERROR3)) { else if (flags & NV_RX_FRAMINGERR) {
np->stats.rx_errors++; if (flags & NV_RX_SUBSTRACT1) {
dev_kfree_skb(skb); len--;
goto next_pkt; }
} }
if (flags & NV_RX_CRCERR) { /* the rest are hard errors */
np->stats.rx_crc_errors++; else {
np->stats.rx_errors++; if (flags & NV_RX_MISSEDFRAME)
dev_kfree_skb(skb); np->stats.rx_missed_errors++;
goto next_pkt; if (flags & NV_RX_CRCERR)
} np->stats.rx_crc_errors++;
if (flags & NV_RX_OVERFLOW) { if (flags & NV_RX_OVERFLOW)
np->stats.rx_over_errors++; np->stats.rx_over_errors++;
np->stats.rx_errors++;
dev_kfree_skb(skb);
goto next_pkt;
}
if (flags & NV_RX_ERROR4) {
len = nv_getlen(dev, skb->data, len);
if (len < 0) {
np->stats.rx_errors++; np->stats.rx_errors++;
dev_kfree_skb(skb); dev_kfree_skb(skb);
goto next_pkt; goto next_pkt;
} }
} }
/* framing errors are soft errors. */ } else {
if (flags & NV_RX_FRAMINGERR) {
if (flags & NV_RX_SUBSTRACT1) {
len--;
}
}
}
} else {
if (!(flags & NV_RX2_DESCRIPTORVALID)) {
dev_kfree_skb(skb); dev_kfree_skb(skb);
goto next_pkt; goto next_pkt;
} }
} else {
if (flags & NV_RX2_ERROR) { if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) { len = flags & LEN_MASK_V2;
np->stats.rx_errors++; if (unlikely(flags & NV_RX2_ERROR)) {
dev_kfree_skb(skb); if (flags & NV_RX2_ERROR4) {
goto next_pkt; len = nv_getlen(dev, skb->data, len);
} if (len < 0) {
if (flags & NV_RX2_CRCERR) { np->stats.rx_errors++;
np->stats.rx_crc_errors++; dev_kfree_skb(skb);
np->stats.rx_errors++; goto next_pkt;
dev_kfree_skb(skb); }
goto next_pkt; }
} /* framing errors are soft errors */
if (flags & NV_RX2_OVERFLOW) { else if (flags & NV_RX2_FRAMINGERR) {
np->stats.rx_over_errors++; if (flags & NV_RX2_SUBSTRACT1) {
np->stats.rx_errors++; len--;
dev_kfree_skb(skb); }
goto next_pkt; }
} /* the rest are hard errors */
if (flags & NV_RX2_ERROR4) { else {
len = nv_getlen(dev, skb->data, len); if (flags & NV_RX2_CRCERR)
if (len < 0) { np->stats.rx_crc_errors++;
if (flags & NV_RX2_OVERFLOW)
np->stats.rx_over_errors++;
np->stats.rx_errors++; np->stats.rx_errors++;
dev_kfree_skb(skb); dev_kfree_skb(skb);
goto next_pkt; goto next_pkt;
} }
} }
/* framing errors are soft errors */ if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
if (flags & NV_RX2_FRAMINGERR) {
if (flags & NV_RX2_SUBSTRACT1) {
len--;
}
}
}
if (np->rx_csum) {
flags &= NV_RX2_CHECKSUMMASK;
if (flags == NV_RX2_CHECKSUMOK1 ||
flags == NV_RX2_CHECKSUMOK2 ||
flags == NV_RX2_CHECKSUMOK3) {
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
} else { } else {
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name); if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
(flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
} }
} else {
dev_kfree_skb(skb);
goto next_pkt;
} }
} }
/* got a valid packet - forward it to the network core */ /* got a valid packet - forward it to the network core */
...@@ -2188,29 +2162,21 @@ static int nv_rx_process(struct net_device *dev, int limit) ...@@ -2188,29 +2162,21 @@ static int nv_rx_process(struct net_device *dev, int limit)
dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n", dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
dev->name, len, skb->protocol); dev->name, len, skb->protocol);
#ifdef CONFIG_FORCEDETH_NAPI #ifdef CONFIG_FORCEDETH_NAPI
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) netif_receive_skb(skb);
vlan_hwaccel_receive_skb(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_receive_skb(skb);
#else #else
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) netif_rx(skb);
vlan_hwaccel_rx(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_rx(skb);
#endif #endif
dev->last_rx = jiffies; dev->last_rx = jiffies;
np->stats.rx_packets++; np->stats.rx_packets++;
np->stats.rx_bytes += len; np->stats.rx_bytes += len;
next_pkt: next_pkt:
if (np->get_rx.orig++ == np->last_rx.orig) if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
np->get_rx.orig = np->first_rx.orig; np->get_rx.orig = np->first_rx.orig;
if (np->get_rx_ctx++ == np->last_rx_ctx) if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
np->get_rx_ctx = np->first_rx_ctx; np->get_rx_ctx = np->first_rx_ctx;
} }
return count; return rx_processed_cnt;
} }
static int nv_rx_process_optimized(struct net_device *dev, int limit) static int nv_rx_process_optimized(struct net_device *dev, int limit)
...@@ -2218,24 +2184,17 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) ...@@ -2218,24 +2184,17 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
u32 flags; u32 flags;
u32 vlanflags = 0; u32 vlanflags = 0;
int count; u32 rx_processed_cnt = 0;
struct sk_buff *skb;
for (count = 0; count < limit; ++count) { int len;
struct sk_buff *skb;
int len;
if (np->get_rx.ex == np->put_rx.ex) while((np->get_rx.ex != np->put_rx.ex) &&
break; /* we scanned the whole ring - do not continue */ !((flags = le32_to_cpu(np->get_rx.ex->flaglen)) & NV_RX2_AVAIL) &&
flags = le32_to_cpu(np->get_rx.ex->flaglen); (rx_processed_cnt++ < limit)) {
len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n", dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
dev->name, flags); dev->name, flags);
if (flags & NV_RX_AVAIL)
break; /* still owned by hardware, */
/* /*
* the packet is for us - immediately tear down the pci mapping. * the packet is for us - immediately tear down the pci mapping.
* TODO: check if a prefetch of the first cacheline improves * TODO: check if a prefetch of the first cacheline improves
...@@ -2258,84 +2217,91 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) ...@@ -2258,84 +2217,91 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
dprintk("\n"); dprintk("\n");
} }
/* look at what we actually got: */ /* look at what we actually got: */
if (!(flags & NV_RX2_DESCRIPTORVALID)) { if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
dev_kfree_skb(skb); len = flags & LEN_MASK_V2;
goto next_pkt; if (unlikely(flags & NV_RX2_ERROR)) {
} if (flags & NV_RX2_ERROR4) {
len = nv_getlen(dev, skb->data, len);
if (flags & NV_RX2_ERROR) { if (len < 0) {
if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) { np->stats.rx_errors++;
np->stats.rx_errors++; dev_kfree_skb(skb);
dev_kfree_skb(skb); goto next_pkt;
goto next_pkt; }
} }
if (flags & NV_RX2_CRCERR) { /* framing errors are soft errors */
np->stats.rx_crc_errors++; else if (flags & NV_RX2_FRAMINGERR) {
np->stats.rx_errors++; if (flags & NV_RX2_SUBSTRACT1) {
dev_kfree_skb(skb); len--;
goto next_pkt; }
} }
if (flags & NV_RX2_OVERFLOW) { /* the rest are hard errors */
np->stats.rx_over_errors++; else {
np->stats.rx_errors++; if (flags & NV_RX2_CRCERR)
dev_kfree_skb(skb); np->stats.rx_crc_errors++;
goto next_pkt; if (flags & NV_RX2_OVERFLOW)
} np->stats.rx_over_errors++;
if (flags & NV_RX2_ERROR4) {
len = nv_getlen(dev, skb->data, len);
if (len < 0) {
np->stats.rx_errors++; np->stats.rx_errors++;
dev_kfree_skb(skb); dev_kfree_skb(skb);
goto next_pkt; goto next_pkt;
} }
} }
/* framing errors are soft errors */
if (flags & NV_RX2_FRAMINGERR) { if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
if (flags & NV_RX2_SUBSTRACT1) {
len--;
}
}
}
if (np->rx_csum) {
flags &= NV_RX2_CHECKSUMMASK;
if (flags == NV_RX2_CHECKSUMOK1 ||
flags == NV_RX2_CHECKSUMOK2 ||
flags == NV_RX2_CHECKSUMOK3) {
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
} else { } else {
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name); if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
(flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
} }
}
/* got a valid packet - forward it to the network core */ /* got a valid packet - forward it to the network core */
skb_put(skb, len); skb_put(skb, len);
skb->protocol = eth_type_trans(skb, dev); skb->protocol = eth_type_trans(skb, dev);
dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n", prefetch(skb->data);
dev->name, len, skb->protocol);
dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: %d bytes, proto %d accepted.\n",
dev->name, len, skb->protocol);
if (likely(!np->vlangrp)) {
#ifdef CONFIG_FORCEDETH_NAPI #ifdef CONFIG_FORCEDETH_NAPI
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) netif_receive_skb(skb);
vlan_hwaccel_receive_skb(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_receive_skb(skb);
#else #else
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) netif_rx(skb);
vlan_hwaccel_rx(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_rx(skb);
#endif #endif
dev->last_rx = jiffies; } else {
np->stats.rx_packets++; vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
np->stats.rx_bytes += len; if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
#ifdef CONFIG_FORCEDETH_NAPI
vlan_hwaccel_receive_skb(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
#else
vlan_hwaccel_rx(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
#endif
} else {
#ifdef CONFIG_FORCEDETH_NAPI
netif_receive_skb(skb);
#else
netif_rx(skb);
#endif
}
}
dev->last_rx = jiffies;
np->stats.rx_packets++;
np->stats.rx_bytes += len;
} else {
dev_kfree_skb(skb);
}
next_pkt: next_pkt:
if (np->get_rx.ex++ == np->last_rx.ex) if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
np->get_rx.ex = np->first_rx.ex; np->get_rx.ex = np->first_rx.ex;
if (np->get_rx_ctx++ == np->last_rx_ctx) if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
np->get_rx_ctx = np->first_rx_ctx; np->get_rx_ctx = np->first_rx_ctx;
} }
return count; return rx_processed_cnt;
} }
static void set_bufsize(struct net_device *dev) static void set_bufsize(struct net_device *dev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment