Commit 86b22b0d authored by Ayaz Abdulla's avatar Ayaz Abdulla Committed by Jeff Garzik

forcedeth: optimized routines

This patch breaks up the routines into two versions, one for legacy
descriptor versions (ver 1 and ver 2) and one for desc ver 3. This will
make the new desc functions more leaner and further reductions will be
made in next few patches.
Signed-Off-By: default avatarAyaz Abdulla <aabdulla@nvidia.com>
Signed-off-by: default avatarJeff Garzik <jeff@garzik.org>
parent 658f648a
...@@ -1307,50 +1307,57 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev) ...@@ -1307,50 +1307,57 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
static int nv_alloc_rx(struct net_device *dev) static int nv_alloc_rx(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
union ring_type less_rx; struct ring_desc* less_rx;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { less_rx = np->get_rx.orig;
less_rx.orig = np->get_rx.orig; if (less_rx-- == np->first_rx.orig)
if (less_rx.orig-- == np->first_rx.orig) less_rx = np->last_rx.orig;
less_rx.orig = np->last_rx.orig;
} else {
less_rx.ex = np->get_rx.ex;
if (less_rx.ex-- == np->first_rx.ex)
less_rx.ex = np->last_rx.ex;
}
while (1) { while (np->put_rx.orig != less_rx) {
struct sk_buff *skb; struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
if (skb) {
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { skb->dev = dev;
if (np->put_rx.orig == less_rx.orig) np->put_rx_ctx->skb = skb;
break; np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
skb->end-skb->data, PCI_DMA_FROMDEVICE);
np->put_rx_ctx->dma_len = skb->end-skb->data;
np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
wmb();
np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
if (np->put_rx.orig++ == np->last_rx.orig)
np->put_rx.orig = np->first_rx.orig;
if (np->put_rx_ctx++ == np->last_rx_ctx)
np->put_rx_ctx = np->first_rx_ctx;
} else { } else {
if (np->put_rx.ex == less_rx.ex) return 1;
break;
} }
}
return 0;
}
static int nv_alloc_rx_optimized(struct net_device *dev)
{
struct fe_priv *np = netdev_priv(dev);
struct ring_desc_ex* less_rx;
less_rx = np->get_rx.ex;
if (less_rx-- == np->first_rx.ex)
less_rx = np->last_rx.ex;
skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD); while (np->put_rx.ex != less_rx) {
struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
if (skb) { if (skb) {
skb->dev = dev; skb->dev = dev;
np->put_rx_ctx->skb = skb; np->put_rx_ctx->skb = skb;
np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data, np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
skb->end-skb->data, PCI_DMA_FROMDEVICE); skb->end-skb->data, PCI_DMA_FROMDEVICE);
np->put_rx_ctx->dma_len = skb->end-skb->data; np->put_rx_ctx->dma_len = skb->end-skb->data;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma); np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
wmb(); wmb();
np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
if (np->put_rx.orig++ == np->last_rx.orig) if (np->put_rx.ex++ == np->last_rx.ex)
np->put_rx.orig = np->first_rx.orig; np->put_rx.ex = np->first_rx.ex;
} else {
np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
wmb();
np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
if (np->put_rx.ex++ == np->last_rx.ex)
np->put_rx.ex = np->first_rx.ex;
}
if (np->put_rx_ctx++ == np->last_rx_ctx) if (np->put_rx_ctx++ == np->last_rx_ctx)
np->put_rx_ctx = np->first_rx_ctx; np->put_rx_ctx = np->first_rx_ctx;
} else { } else {
...@@ -1374,6 +1381,7 @@ static void nv_do_rx_refill(unsigned long data) ...@@ -1374,6 +1381,7 @@ static void nv_do_rx_refill(unsigned long data)
{ {
struct net_device *dev = (struct net_device *) data; struct net_device *dev = (struct net_device *) data;
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
int retcode;
if (!using_multi_irqs(dev)) { if (!using_multi_irqs(dev)) {
if (np->msi_flags & NV_MSI_X_ENABLED) if (np->msi_flags & NV_MSI_X_ENABLED)
...@@ -1383,7 +1391,11 @@ static void nv_do_rx_refill(unsigned long data) ...@@ -1383,7 +1391,11 @@ static void nv_do_rx_refill(unsigned long data)
} else { } else {
disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
} }
if (nv_alloc_rx(dev)) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
retcode = nv_alloc_rx(dev);
else
retcode = nv_alloc_rx_optimized(dev);
if (retcode) {
spin_lock_irq(&np->lock); spin_lock_irq(&np->lock);
if (!np->in_shutdown) if (!np->in_shutdown)
mod_timer(&np->oom_kick, jiffies + OOM_REFILL); mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
...@@ -1456,9 +1468,14 @@ static void nv_init_tx(struct net_device *dev) ...@@ -1456,9 +1468,14 @@ static void nv_init_tx(struct net_device *dev)
static int nv_init_ring(struct net_device *dev) static int nv_init_ring(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev);
nv_init_tx(dev); nv_init_tx(dev);
nv_init_rx(dev); nv_init_rx(dev);
return nv_alloc_rx(dev); if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
return nv_alloc_rx(dev);
else
return nv_alloc_rx_optimized(dev);
} }
static int nv_release_txskb(struct net_device *dev, struct nv_skb_map* tx_skb) static int nv_release_txskb(struct net_device *dev, struct nv_skb_map* tx_skb)
...@@ -1554,9 +1571,9 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1554,9 +1571,9 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
u32 empty_slots; u32 empty_slots;
u32 tx_flags_vlan = 0; u32 tx_flags_vlan = 0;
union ring_type put_tx; struct ring_desc* put_tx;
union ring_type start_tx; struct ring_desc* start_tx;
union ring_type prev_tx; struct ring_desc* prev_tx;
struct nv_skb_map* prev_tx_ctx; struct nv_skb_map* prev_tx_ctx;
/* add fragments to entries count */ /* add fragments to entries count */
...@@ -1573,10 +1590,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1573,10 +1590,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) start_tx = put_tx = np->put_tx.orig;
start_tx.orig = put_tx.orig = np->put_tx.orig;
else
start_tx.ex = put_tx.ex = np->put_tx.ex;
/* setup the header buffer */ /* setup the header buffer */
do { do {
...@@ -1586,24 +1600,13 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1586,24 +1600,13 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
np->put_tx_ctx->dma_len = bcnt; np->put_tx_ctx->dma_len = bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma);
put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma); put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} else {
put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
}
tx_flags = np->tx_flags; tx_flags = np->tx_flags;
offset += bcnt; offset += bcnt;
size -= bcnt; size -= bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (put_tx++ == np->last_tx.orig)
if (put_tx.orig++ == np->last_tx.orig) put_tx = np->first_tx.orig;
put_tx.orig = np->first_tx.orig;
} else {
if (put_tx.ex++ == np->last_tx.ex)
put_tx.ex = np->first_tx.ex;
}
if (np->put_tx_ctx++ == np->last_tx_ctx) if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx; np->put_tx_ctx = np->first_tx_ctx;
} while (size); } while (size);
...@@ -1622,33 +1625,19 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1622,33 +1625,19 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
np->put_tx_ctx->dma_len = bcnt; np->put_tx_ctx->dma_len = bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma);
put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma); put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} else {
put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
}
offset += bcnt; offset += bcnt;
size -= bcnt; size -= bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (put_tx++ == np->last_tx.orig)
if (put_tx.orig++ == np->last_tx.orig) put_tx = np->first_tx.orig;
put_tx.orig = np->first_tx.orig;
} else {
if (put_tx.ex++ == np->last_tx.ex)
put_tx.ex = np->first_tx.ex;
}
if (np->put_tx_ctx++ == np->last_tx_ctx) if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx; np->put_tx_ctx = np->first_tx_ctx;
} while (size); } while (size);
} }
/* set last fragment flag */ /* set last fragment flag */
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
prev_tx.orig->flaglen |= cpu_to_le32(tx_flags_extra);
else
prev_tx.ex->flaglen |= cpu_to_le32(tx_flags_extra);
/* save skb in this slot's context area */ /* save skb in this slot's context area */
prev_tx_ctx->skb = skb; prev_tx_ctx->skb = skb;
...@@ -1667,14 +1656,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1667,14 +1656,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
spin_lock_irq(&np->lock); spin_lock_irq(&np->lock);
/* set tx flags */ /* set tx flags */
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
start_tx.orig->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra); np->put_tx.orig = put_tx;
np->put_tx.orig = put_tx.orig;
} else {
start_tx.ex->txvlan = cpu_to_le32(tx_flags_vlan);
start_tx.ex->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
np->put_tx.ex = put_tx.ex;
}
spin_unlock_irq(&np->lock); spin_unlock_irq(&np->lock);
...@@ -1696,6 +1679,130 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1696,6 +1679,130 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
{
struct fe_priv *np = netdev_priv(dev);
u32 tx_flags = 0;
u32 tx_flags_extra = NV_TX2_LASTPACKET;
unsigned int fragments = skb_shinfo(skb)->nr_frags;
unsigned int i;
u32 offset = 0;
u32 bcnt;
u32 size = skb->len-skb->data_len;
u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
u32 empty_slots;
u32 tx_flags_vlan = 0;
struct ring_desc_ex* put_tx;
struct ring_desc_ex* start_tx;
struct ring_desc_ex* prev_tx;
struct nv_skb_map* prev_tx_ctx;
/* add fragments to entries count */
for (i = 0; i < fragments; i++) {
entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) +
((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
}
empty_slots = nv_get_empty_tx_slots(np);
if ((empty_slots - np->tx_limit_stop) <= entries) {
spin_lock_irq(&np->lock);
netif_stop_queue(dev);
spin_unlock_irq(&np->lock);
return NETDEV_TX_BUSY;
}
start_tx = put_tx = np->put_tx.ex;
/* setup the header buffer */
do {
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
PCI_DMA_TODEVICE);
np->put_tx_ctx->dma_len = bcnt;
put_tx->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
put_tx->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
tx_flags = np->tx_flags;
offset += bcnt;
size -= bcnt;
if (put_tx++ == np->last_tx.ex)
put_tx = np->first_tx.ex;
if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx;
} while (size);
/* setup the fragments */
for (i = 0; i < fragments; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
u32 size = frag->size;
offset = 0;
do {
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
PCI_DMA_TODEVICE);
np->put_tx_ctx->dma_len = bcnt;
put_tx->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
put_tx->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
offset += bcnt;
size -= bcnt;
if (put_tx++ == np->last_tx.ex)
put_tx = np->first_tx.ex;
if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx;
} while (size);
}
/* set last fragment flag */
prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
/* save skb in this slot's context area */
prev_tx_ctx->skb = skb;
if (skb_is_gso(skb))
tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
else
tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
/* vlan tag */
if (np->vlangrp && vlan_tx_tag_present(skb)) {
tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb);
}
spin_lock_irq(&np->lock);
/* set tx flags */
start_tx->txvlan = cpu_to_le32(tx_flags_vlan);
start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
np->put_tx.ex = put_tx;
spin_unlock_irq(&np->lock);
dprintk(KERN_DEBUG "%s: nv_start_xmit_optimized: entries %d queued for transmission. tx_flags_extra: %x\n",
dev->name, entries, tx_flags_extra);
{
int j;
for (j=0; j<64; j++) {
if ((j%16) == 0)
dprintk("\n%03x:", j);
dprintk(" %02x", ((unsigned char*)skb->data)[j]);
}
dprintk("\n");
}
dev->trans_start = jiffies;
writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
pci_push(get_hwbase(dev));
return NETDEV_TX_OK;
}
/* /*
* nv_tx_done: check for completed packets, release the skbs. * nv_tx_done: check for completed packets, release the skbs.
* *
...@@ -1707,16 +1814,8 @@ static void nv_tx_done(struct net_device *dev) ...@@ -1707,16 +1814,8 @@ static void nv_tx_done(struct net_device *dev)
u32 flags; u32 flags;
struct sk_buff *skb; struct sk_buff *skb;
while (1) { while (np->get_tx.orig != np->put_tx.orig) {
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { flags = le32_to_cpu(np->get_tx.orig->flaglen);
if (np->get_tx.orig == np->put_tx.orig)
break;
flags = le32_to_cpu(np->get_tx.orig->flaglen);
} else {
if (np->get_tx.ex == np->put_tx.ex)
break;
flags = le32_to_cpu(np->get_tx.ex->flaglen);
}
dprintk(KERN_DEBUG "%s: nv_tx_done: flags 0x%x.\n", dprintk(KERN_DEBUG "%s: nv_tx_done: flags 0x%x.\n",
dev->name, flags); dev->name, flags);
...@@ -1754,13 +1853,45 @@ static void nv_tx_done(struct net_device *dev) ...@@ -1754,13 +1853,45 @@ static void nv_tx_done(struct net_device *dev)
} }
} }
nv_release_txskb(dev, np->get_tx_ctx); nv_release_txskb(dev, np->get_tx_ctx);
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->get_tx.orig++ == np->last_tx.orig)
if (np->get_tx.orig++ == np->last_tx.orig) np->get_tx.orig = np->first_tx.orig;
np->get_tx.orig = np->first_tx.orig; if (np->get_tx_ctx++ == np->last_tx_ctx)
} else { np->get_tx_ctx = np->first_tx_ctx;
if (np->get_tx.ex++ == np->last_tx.ex) }
np->get_tx.ex = np->first_tx.ex; if (nv_get_empty_tx_slots(np) > np->tx_limit_start)
netif_wake_queue(dev);
}
static void nv_tx_done_optimized(struct net_device *dev)
{
struct fe_priv *np = netdev_priv(dev);
u32 flags;
struct sk_buff *skb;
while (np->get_tx.ex == np->put_tx.ex) {
flags = le32_to_cpu(np->get_tx.ex->flaglen);
dprintk(KERN_DEBUG "%s: nv_tx_done_optimized: flags 0x%x.\n",
dev->name, flags);
if (flags & NV_TX_VALID)
break;
if (flags & NV_TX2_LASTPACKET) {
skb = np->get_tx_ctx->skb;
if (flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
if (flags & NV_TX2_UNDERFLOW)
np->stats.tx_fifo_errors++;
if (flags & NV_TX2_CARRIERLOST)
np->stats.tx_carrier_errors++;
np->stats.tx_errors++;
} else {
np->stats.tx_packets++;
np->stats.tx_bytes += skb->len;
}
} }
nv_release_txskb(dev, np->get_tx_ctx);
if (np->get_tx.ex++ == np->last_tx.ex)
np->get_tx.ex = np->first_tx.ex;
if (np->get_tx_ctx++ == np->last_tx_ctx) if (np->get_tx_ctx++ == np->last_tx_ctx)
np->get_tx_ctx = np->first_tx_ctx; np->get_tx_ctx = np->first_tx_ctx;
} }
...@@ -1837,7 +1968,10 @@ static void nv_tx_timeout(struct net_device *dev) ...@@ -1837,7 +1968,10 @@ static void nv_tx_timeout(struct net_device *dev)
nv_stop_tx(dev); nv_stop_tx(dev);
/* 2) check that the packets were not sent already: */ /* 2) check that the packets were not sent already: */
nv_tx_done(dev); if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
nv_tx_done(dev);
else
nv_tx_done_optimized(dev);
/* 3) if there are dead entries: clear everything */ /* 3) if there are dead entries: clear everything */
if (np->get_tx_ctx != np->put_tx_ctx) { if (np->get_tx_ctx != np->put_tx_ctx) {
...@@ -1913,22 +2047,14 @@ static int nv_rx_process(struct net_device *dev, int limit) ...@@ -1913,22 +2047,14 @@ static int nv_rx_process(struct net_device *dev, int limit)
u32 vlanflags = 0; u32 vlanflags = 0;
int count; int count;
for (count = 0; count < limit; ++count) { for (count = 0; count < limit; ++count) {
struct sk_buff *skb; struct sk_buff *skb;
int len; int len;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->get_rx.orig == np->put_rx.orig)
if (np->get_rx.orig == np->put_rx.orig) break; /* we scanned the whole ring - do not continue */
break; /* we scanned the whole ring - do not continue */ flags = le32_to_cpu(np->get_rx.orig->flaglen);
flags = le32_to_cpu(np->get_rx.orig->flaglen); len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
} else {
if (np->get_rx.ex == np->put_rx.ex)
break; /* we scanned the whole ring - do not continue */
flags = le32_to_cpu(np->get_rx.ex->flaglen);
len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
}
dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n", dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
dev->name, flags); dev->name, flags);
...@@ -2076,13 +2202,133 @@ static int nv_rx_process(struct net_device *dev, int limit) ...@@ -2076,13 +2202,133 @@ static int nv_rx_process(struct net_device *dev, int limit)
np->stats.rx_packets++; np->stats.rx_packets++;
np->stats.rx_bytes += len; np->stats.rx_bytes += len;
next_pkt: next_pkt:
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->get_rx.orig++ == np->last_rx.orig)
if (np->get_rx.orig++ == np->last_rx.orig) np->get_rx.orig = np->first_rx.orig;
np->get_rx.orig = np->first_rx.orig; if (np->get_rx_ctx++ == np->last_rx_ctx)
} else { np->get_rx_ctx = np->first_rx_ctx;
if (np->get_rx.ex++ == np->last_rx.ex) }
np->get_rx.ex = np->first_rx.ex;
return count;
}
static int nv_rx_process_optimized(struct net_device *dev, int limit)
{
struct fe_priv *np = netdev_priv(dev);
u32 flags;
u32 vlanflags = 0;
int count;
for (count = 0; count < limit; ++count) {
struct sk_buff *skb;
int len;
if (np->get_rx.ex == np->put_rx.ex)
break; /* we scanned the whole ring - do not continue */
flags = le32_to_cpu(np->get_rx.ex->flaglen);
len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
dev->name, flags);
if (flags & NV_RX_AVAIL)
break; /* still owned by hardware, */
/*
* the packet is for us - immediately tear down the pci mapping.
* TODO: check if a prefetch of the first cacheline improves
* the performance.
*/
pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
np->get_rx_ctx->dma_len,
PCI_DMA_FROMDEVICE);
skb = np->get_rx_ctx->skb;
np->get_rx_ctx->skb = NULL;
{
int j;
dprintk(KERN_DEBUG "Dumping packet (flags 0x%x).",flags);
for (j=0; j<64; j++) {
if ((j%16) == 0)
dprintk("\n%03x:", j);
dprintk(" %02x", ((unsigned char*)skb->data)[j]);
}
dprintk("\n");
} }
/* look at what we actually got: */
if (!(flags & NV_RX2_DESCRIPTORVALID)) {
dev_kfree_skb(skb);
goto next_pkt;
}
if (flags & NV_RX2_ERROR) {
if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
np->stats.rx_errors++;
dev_kfree_skb(skb);
goto next_pkt;
}
if (flags & NV_RX2_CRCERR) {
np->stats.rx_crc_errors++;
np->stats.rx_errors++;
dev_kfree_skb(skb);
goto next_pkt;
}
if (flags & NV_RX2_OVERFLOW) {
np->stats.rx_over_errors++;
np->stats.rx_errors++;
dev_kfree_skb(skb);
goto next_pkt;
}
if (flags & NV_RX2_ERROR4) {
len = nv_getlen(dev, skb->data, len);
if (len < 0) {
np->stats.rx_errors++;
dev_kfree_skb(skb);
goto next_pkt;
}
}
/* framing errors are soft errors */
if (flags & NV_RX2_FRAMINGERR) {
if (flags & NV_RX2_SUBSTRACT1) {
len--;
}
}
}
if (np->rx_csum) {
flags &= NV_RX2_CHECKSUMMASK;
if (flags == NV_RX2_CHECKSUMOK1 ||
flags == NV_RX2_CHECKSUMOK2 ||
flags == NV_RX2_CHECKSUMOK3) {
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else {
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
}
}
/* got a valid packet - forward it to the network core */
skb_put(skb, len);
skb->protocol = eth_type_trans(skb, dev);
dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
dev->name, len, skb->protocol);
#ifdef CONFIG_FORCEDETH_NAPI
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
vlan_hwaccel_receive_skb(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_receive_skb(skb);
#else
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
vlan_hwaccel_rx(skb, np->vlangrp,
vlanflags & NV_RX3_VLAN_TAG_MASK);
else
netif_rx(skb);
#endif
dev->last_rx = jiffies;
np->stats.rx_packets++;
np->stats.rx_bytes += len;
next_pkt:
if (np->get_rx.ex++ == np->last_rx.ex)
np->get_rx.ex = np->first_rx.ex;
if (np->get_rx_ctx++ == np->last_rx_ctx) if (np->get_rx_ctx++ == np->last_rx_ctx)
np->get_rx_ctx = np->first_rx_ctx; np->get_rx_ctx = np->first_rx_ctx;
} }
...@@ -2655,6 +2901,117 @@ static irqreturn_t nv_nic_irq(int foo, void *data) ...@@ -2655,6 +2901,117 @@ static irqreturn_t nv_nic_irq(int foo, void *data)
return IRQ_RETVAL(i); return IRQ_RETVAL(i);
} }
static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
{
struct net_device *dev = (struct net_device *) data;
struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev);
u32 events;
int i;
dprintk(KERN_DEBUG "%s: nv_nic_irq_optimized\n", dev->name);
for (i=0; ; i++) {
if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
} else {
events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus);
}
pci_push(base);
dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events);
if (!(events & np->irqmask))
break;
spin_lock(&np->lock);
nv_tx_done_optimized(dev);
spin_unlock(&np->lock);
if (events & NVREG_IRQ_LINK) {
spin_lock(&np->lock);
nv_link_irq(dev);
spin_unlock(&np->lock);
}
if (np->need_linktimer && time_after(jiffies, np->link_timeout)) {
spin_lock(&np->lock);
nv_linkchange(dev);
spin_unlock(&np->lock);
np->link_timeout = jiffies + LINK_TIMEOUT;
}
if (events & (NVREG_IRQ_TX_ERR)) {
dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n",
dev->name, events);
}
if (events & (NVREG_IRQ_UNKNOWN)) {
printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n",
dev->name, events);
}
if (unlikely(events & NVREG_IRQ_RECOVER_ERROR)) {
spin_lock(&np->lock);
/* disable interrupts on the nic */
if (!(np->msi_flags & NV_MSI_X_ENABLED))
writel(0, base + NvRegIrqMask);
else
writel(np->irqmask, base + NvRegIrqMask);
pci_push(base);
if (!np->in_shutdown) {
np->nic_poll_irq = np->irqmask;
np->recover_error = 1;
mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
}
spin_unlock(&np->lock);
break;
}
#ifdef CONFIG_FORCEDETH_NAPI
if (events & NVREG_IRQ_RX_ALL) {
netif_rx_schedule(dev);
/* Disable furthur receive irq's */
spin_lock(&np->lock);
np->irqmask &= ~NVREG_IRQ_RX_ALL;
if (np->msi_flags & NV_MSI_X_ENABLED)
writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
else
writel(np->irqmask, base + NvRegIrqMask);
spin_unlock(&np->lock);
}
#else
nv_rx_process_optimized(dev, dev->weight);
if (nv_alloc_rx_optimized(dev)) {
spin_lock(&np->lock);
if (!np->in_shutdown)
mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
spin_unlock(&np->lock);
}
#endif
if (i > max_interrupt_work) {
spin_lock(&np->lock);
/* disable interrupts on the nic */
if (!(np->msi_flags & NV_MSI_X_ENABLED))
writel(0, base + NvRegIrqMask);
else
writel(np->irqmask, base + NvRegIrqMask);
pci_push(base);
if (!np->in_shutdown) {
np->nic_poll_irq = np->irqmask;
mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
}
printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq.\n", dev->name, i);
spin_unlock(&np->lock);
break;
}
}
dprintk(KERN_DEBUG "%s: nv_nic_irq_optimized completed\n", dev->name);
return IRQ_RETVAL(i);
}
static irqreturn_t nv_nic_irq_tx(int foo, void *data) static irqreturn_t nv_nic_irq_tx(int foo, void *data)
{ {
struct net_device *dev = (struct net_device *) data; struct net_device *dev = (struct net_device *) data;
...@@ -2675,7 +3032,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) ...@@ -2675,7 +3032,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
break; break;
spin_lock_irqsave(&np->lock, flags); spin_lock_irqsave(&np->lock, flags);
nv_tx_done(dev); nv_tx_done_optimized(dev);
spin_unlock_irqrestore(&np->lock, flags); spin_unlock_irqrestore(&np->lock, flags);
if (events & (NVREG_IRQ_TX_ERR)) { if (events & (NVREG_IRQ_TX_ERR)) {
...@@ -2711,7 +3068,10 @@ static int nv_napi_poll(struct net_device *dev, int *budget) ...@@ -2711,7 +3068,10 @@ static int nv_napi_poll(struct net_device *dev, int *budget)
u8 __iomem *base = get_hwbase(dev); u8 __iomem *base = get_hwbase(dev);
unsigned long flags; unsigned long flags;
pkts = nv_rx_process(dev, limit); if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
pkts = nv_rx_process(dev, limit);
else
pkts = nv_rx_process_optimized(dev, limit);
if (nv_alloc_rx(dev)) { if (nv_alloc_rx(dev)) {
spin_lock_irqsave(&np->lock, flags); spin_lock_irqsave(&np->lock, flags);
...@@ -2782,8 +3142,8 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) ...@@ -2782,8 +3142,8 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
if (!(events & np->irqmask)) if (!(events & np->irqmask))
break; break;
nv_rx_process(dev, dev->weight); nv_rx_process_optimized(dev, dev->weight);
if (nv_alloc_rx(dev)) { if (nv_alloc_rx_optimized(dev)) {
spin_lock_irqsave(&np->lock, flags); spin_lock_irqsave(&np->lock, flags);
if (!np->in_shutdown) if (!np->in_shutdown)
mod_timer(&np->oom_kick, jiffies + OOM_REFILL); mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
...@@ -2942,6 +3302,16 @@ static int nv_request_irq(struct net_device *dev, int intr_test) ...@@ -2942,6 +3302,16 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
u8 __iomem *base = get_hwbase(dev); u8 __iomem *base = get_hwbase(dev);
int ret = 1; int ret = 1;
int i; int i;
irqreturn_t (*handler)(int foo, void *data);
if (intr_test) {
handler = nv_nic_irq_test;
} else {
if (np->desc_ver == DESC_VER_3)
handler = nv_nic_irq_optimized;
else
handler = nv_nic_irq;
}
if (np->msi_flags & NV_MSI_X_CAPABLE) { if (np->msi_flags & NV_MSI_X_CAPABLE) {
for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) { for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
...@@ -2979,10 +3349,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test) ...@@ -2979,10 +3349,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER); set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER);
} else { } else {
/* Request irq for all interrupts */ /* Request irq for all interrupts */
if ((!intr_test && if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, handler, IRQF_SHARED, dev->name, dev) != 0) {
request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) ||
(intr_test &&
request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0)) {
printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
pci_disable_msix(np->pci_dev); pci_disable_msix(np->pci_dev);
np->msi_flags &= ~NV_MSI_X_ENABLED; np->msi_flags &= ~NV_MSI_X_ENABLED;
...@@ -2998,8 +3365,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test) ...@@ -2998,8 +3365,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) { if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
if ((ret = pci_enable_msi(np->pci_dev)) == 0) { if ((ret = pci_enable_msi(np->pci_dev)) == 0) {
np->msi_flags |= NV_MSI_ENABLED; np->msi_flags |= NV_MSI_ENABLED;
if ((!intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) || if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0) {
(intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0)) {
printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
pci_disable_msi(np->pci_dev); pci_disable_msi(np->pci_dev);
np->msi_flags &= ~NV_MSI_ENABLED; np->msi_flags &= ~NV_MSI_ENABLED;
...@@ -3014,8 +3380,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test) ...@@ -3014,8 +3380,7 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
} }
} }
if (ret != 0) { if (ret != 0) {
if ((!intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq, IRQF_SHARED, dev->name, dev) != 0) || if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0)
(intr_test && request_irq(np->pci_dev->irq, &nv_nic_irq_test, IRQF_SHARED, dev->name, dev) != 0))
goto out_err; goto out_err;
} }
...@@ -4629,7 +4994,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i ...@@ -4629,7 +4994,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
dev->open = nv_open; dev->open = nv_open;
dev->stop = nv_close; dev->stop = nv_close;
dev->hard_start_xmit = nv_start_xmit; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
dev->hard_start_xmit = nv_start_xmit;
else
dev->hard_start_xmit = nv_start_xmit_optimized;
dev->get_stats = nv_get_stats; dev->get_stats = nv_get_stats;
dev->change_mtu = nv_change_mtu; dev->change_mtu = nv_change_mtu;
dev->set_mac_address = nv_set_mac_address; dev->set_mac_address = nv_set_mac_address;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment