Commit 1b2a7205 authored by Michael Chan's avatar Michael Chan Committed by David S. Miller

[TG3]: Fix tx race condition

Fix a subtle race condition between tg3_start_xmit() and tg3_tx()
discovered by Herbert Xu <herbert@gondor.apana.org.au>:

CPU0					CPU1
tg3_start_xmit()
	if (tx_ring_full) {
		tx_lock
					tg3_tx()
						if (!netif_queue_stopped)
		netif_stop_queue()
		if (!tx_ring_full)
						update_tx_ring 
			netif_wake_queue()
		tx_unlock
	}

Even though tx_ring is updated before the if statement in tg3_tx() in
program order, it can be re-ordered by the CPU as shown above.  This
scenario can cause the tx queue to be stopped forever if tg3_tx() has
just freed up the entire tx_ring.  The possibility of this happening
should be very rare though.

The following changes are made:

1. Add memory barrier to fix the above race condition.

2. Eliminate the private tx_lock altogether and rely solely on
netif_tx_lock.  This eliminates one spinlock in tg3_start_xmit()
when the ring is full.

3. Because of 2, use netif_tx_lock in tg3_tx() before calling
netif_wake_queue().

4. Change TX_BUFFS_AVAIL to an inline function with a memory barrier.
Herbert and David suggested using the memory barrier instead of
volatile.

5. Check for the full wake queue condition before getting
netif_tx_lock in tg3_tx().  This reduces the number of unnecessary
spinlocks when the tx ring is full in a steady-state condition.

6. Update version to 3.65.
Signed-off-by: default avatarMichael Chan <mchan@broadcom.com>
Acked-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bd37a088
...@@ -68,8 +68,8 @@ ...@@ -68,8 +68,8 @@
#define DRV_MODULE_NAME "tg3" #define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": " #define PFX DRV_MODULE_NAME ": "
#define DRV_MODULE_VERSION "3.64" #define DRV_MODULE_VERSION "3.65"
#define DRV_MODULE_RELDATE "July 31, 2006" #define DRV_MODULE_RELDATE "August 07, 2006"
#define TG3_DEF_MAC_MODE 0 #define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0 #define TG3_DEF_RX_MODE 0
...@@ -123,9 +123,6 @@ ...@@ -123,9 +123,6 @@
TG3_RX_RCB_RING_SIZE(tp)) TG3_RX_RCB_RING_SIZE(tp))
#define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \ #define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \
TG3_TX_RING_SIZE) TG3_TX_RING_SIZE)
#define TX_BUFFS_AVAIL(TP) \
((TP)->tx_pending - \
(((TP)->tx_prod - (TP)->tx_cons) & (TG3_TX_RING_SIZE - 1)))
#define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1)) #define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1))
#define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64) #define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64)
...@@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *tp) ...@@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *tp)
spin_unlock(&tp->lock); spin_unlock(&tp->lock);
} }
static inline u32 tg3_tx_avail(struct tg3 *tp)
{
smp_mb();
return (tp->tx_pending -
((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1)));
}
/* Tigon3 never reports partial packet sends. So we do not /* Tigon3 never reports partial packet sends. So we do not
* need special logic to handle SKBs that have not had all * need special logic to handle SKBs that have not had all
* of their frags sent yet, like SunGEM does. * of their frags sent yet, like SunGEM does.
...@@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp) ...@@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp)
tp->tx_cons = sw_idx; tp->tx_cons = sw_idx;
if (unlikely(netif_queue_stopped(tp->dev))) { /* Need to make the tx_cons update visible to tg3_start_xmit()
spin_lock(&tp->tx_lock); * before checking for netif_queue_stopped(). Without the
* memory barrier, there is a small possibility that tg3_start_xmit()
* will miss it and cause the queue to be stopped forever.
*/
smp_mb();
if (unlikely(netif_queue_stopped(tp->dev) &&
(tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))) {
netif_tx_lock(tp->dev);
if (netif_queue_stopped(tp->dev) && if (netif_queue_stopped(tp->dev) &&
(TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)) (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))
netif_wake_queue(tp->dev); netif_wake_queue(tp->dev);
spin_unlock(&tp->tx_lock); netif_tx_unlock(tp->dev);
} }
} }
...@@ -3795,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -3795,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
* interrupt. Furthermore, IRQ processing runs lockless so we have * interrupt. Furthermore, IRQ processing runs lockless so we have
* no IRQ context deadlocks to worry about either. Rejoice! * no IRQ context deadlocks to worry about either. Rejoice!
*/ */
if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
if (!netif_queue_stopped(dev)) { if (!netif_queue_stopped(dev)) {
netif_stop_queue(dev); netif_stop_queue(dev);
...@@ -3891,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -3891,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
tp->tx_prod = entry; tp->tx_prod = entry;
if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
spin_lock(&tp->tx_lock);
netif_stop_queue(dev); netif_stop_queue(dev);
if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
netif_wake_queue(tp->dev); netif_wake_queue(tp->dev);
spin_unlock(&tp->tx_lock);
} }
out_unlock: out_unlock:
...@@ -3918,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) ...@@ -3918,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb)
struct sk_buff *segs, *nskb; struct sk_buff *segs, *nskb;
/* Estimate the number of fragments in the worst case */ /* Estimate the number of fragments in the worst case */
if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) {
netif_stop_queue(tp->dev); netif_stop_queue(tp->dev);
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
...@@ -3958,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) ...@@ -3958,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
* interrupt. Furthermore, IRQ processing runs lockless so we have * interrupt. Furthermore, IRQ processing runs lockless so we have
* no IRQ context deadlocks to worry about either. Rejoice! * no IRQ context deadlocks to worry about either. Rejoice!
*/ */
if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
if (!netif_queue_stopped(dev)) { if (!netif_queue_stopped(dev)) {
netif_stop_queue(dev); netif_stop_queue(dev);
...@@ -4108,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) ...@@ -4108,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry);
tp->tx_prod = entry; tp->tx_prod = entry;
if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) {
spin_lock(&tp->tx_lock);
netif_stop_queue(dev); netif_stop_queue(dev);
if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)
netif_wake_queue(tp->dev); netif_wake_queue(tp->dev);
spin_unlock(&tp->tx_lock);
} }
out_unlock: out_unlock:
...@@ -11472,7 +11480,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, ...@@ -11472,7 +11480,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
#endif #endif
spin_lock_init(&tp->lock); spin_lock_init(&tp->lock);
spin_lock_init(&tp->tx_lock);
spin_lock_init(&tp->indirect_lock); spin_lock_init(&tp->indirect_lock);
INIT_WORK(&tp->reset_task, tg3_reset_task, tp); INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
......
...@@ -2079,9 +2079,9 @@ struct tg3 { ...@@ -2079,9 +2079,9 @@ struct tg3 {
* lock: Held during reset, PHY access, timer, and when * lock: Held during reset, PHY access, timer, and when
* updating tg3_flags and tg3_flags2. * updating tg3_flags and tg3_flags2.
* *
* tx_lock: Held during tg3_start_xmit and tg3_tx only * netif_tx_lock: Held during tg3_start_xmit. tg3_tx holds
* when calling netif_[start|stop]_queue. * netif_tx_lock when it needs to call
* tg3_start_xmit is protected by netif_tx_lock. * netif_wake_queue.
* *
* Both of these locks are to be held with BH safety. * Both of these locks are to be held with BH safety.
* *
...@@ -2118,8 +2118,6 @@ struct tg3 { ...@@ -2118,8 +2118,6 @@ struct tg3 {
u32 tx_cons; u32 tx_cons;
u32 tx_pending; u32 tx_pending;
spinlock_t tx_lock;
struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_buffer_desc *tx_ring;
struct tx_ring_info *tx_buffers; struct tx_ring_info *tx_buffers;
dma_addr_t tx_desc_mapping; dma_addr_t tx_desc_mapping;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment