Commit e757e3e1 authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher

ixgbevf: Make next_to_watch a pointer and adjust memory barriers to avoid races

This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not.  In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.

To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit.  In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Acked-by: default avatarGreg Rose <gregory.v.rose@intel.com>
Tested-by: default avatarSibai Li <sibai.li@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 7f0e44ac
...@@ -44,8 +44,8 @@ struct ixgbevf_tx_buffer { ...@@ -44,8 +44,8 @@ struct ixgbevf_tx_buffer {
struct sk_buff *skb; struct sk_buff *skb;
dma_addr_t dma; dma_addr_t dma;
unsigned long time_stamp; unsigned long time_stamp;
union ixgbe_adv_tx_desc *next_to_watch;
u16 length; u16 length;
u16 next_to_watch;
u16 mapped_as_page; u16 mapped_as_page;
}; };
......
...@@ -190,28 +190,37 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, ...@@ -190,28 +190,37 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
struct ixgbevf_adapter *adapter = q_vector->adapter; struct ixgbevf_adapter *adapter = q_vector->adapter;
union ixgbe_adv_tx_desc *tx_desc, *eop_desc; union ixgbe_adv_tx_desc *tx_desc, *eop_desc;
struct ixgbevf_tx_buffer *tx_buffer_info; struct ixgbevf_tx_buffer *tx_buffer_info;
unsigned int i, eop, count = 0; unsigned int i, count = 0;
unsigned int total_bytes = 0, total_packets = 0; unsigned int total_bytes = 0, total_packets = 0;
if (test_bit(__IXGBEVF_DOWN, &adapter->state)) if (test_bit(__IXGBEVF_DOWN, &adapter->state))
return true; return true;
i = tx_ring->next_to_clean; i = tx_ring->next_to_clean;
eop = tx_ring->tx_buffer_info[i].next_to_watch; tx_buffer_info = &tx_ring->tx_buffer_info[i];
eop_desc = IXGBEVF_TX_DESC(tx_ring, eop); eop_desc = tx_buffer_info->next_to_watch;
while ((eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) && do {
(count < tx_ring->count)) {
bool cleaned = false; bool cleaned = false;
rmb(); /* read buffer_info after eop_desc */
/* eop could change between read and DD-check */ /* if next_to_watch is not set then there is no work pending */
if (unlikely(eop != tx_ring->tx_buffer_info[i].next_to_watch)) if (!eop_desc)
goto cont_loop; break;
/* prevent any other reads prior to eop_desc */
read_barrier_depends();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
break;
/* clear next_to_watch to prevent false hangs */
tx_buffer_info->next_to_watch = NULL;
for ( ; !cleaned; count++) { for ( ; !cleaned; count++) {
struct sk_buff *skb; struct sk_buff *skb;
tx_desc = IXGBEVF_TX_DESC(tx_ring, i); tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
tx_buffer_info = &tx_ring->tx_buffer_info[i]; cleaned = (tx_desc == eop_desc);
cleaned = (i == eop);
skb = tx_buffer_info->skb; skb = tx_buffer_info->skb;
if (cleaned && skb) { if (cleaned && skb) {
...@@ -234,13 +243,13 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, ...@@ -234,13 +243,13 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
i++; i++;
if (i == tx_ring->count) if (i == tx_ring->count)
i = 0; i = 0;
}
cont_loop: tx_buffer_info = &tx_ring->tx_buffer_info[i];
eop = tx_ring->tx_buffer_info[i].next_to_watch;
eop_desc = IXGBEVF_TX_DESC(tx_ring, eop);
} }
eop_desc = tx_buffer_info->next_to_watch;
} while (count < tx_ring->count);
tx_ring->next_to_clean = i; tx_ring->next_to_clean = i;
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
...@@ -2806,8 +2815,7 @@ static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, ...@@ -2806,8 +2815,7 @@ static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
} }
static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
struct sk_buff *skb, u32 tx_flags, struct sk_buff *skb, u32 tx_flags)
unsigned int first)
{ {
struct ixgbevf_tx_buffer *tx_buffer_info; struct ixgbevf_tx_buffer *tx_buffer_info;
unsigned int len; unsigned int len;
...@@ -2832,7 +2840,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, ...@@ -2832,7 +2840,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
size, DMA_TO_DEVICE); size, DMA_TO_DEVICE);
if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma)) if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma))
goto dma_error; goto dma_error;
tx_buffer_info->next_to_watch = i;
len -= size; len -= size;
total -= size; total -= size;
...@@ -2862,7 +2869,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, ...@@ -2862,7 +2869,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
tx_buffer_info->dma)) tx_buffer_info->dma))
goto dma_error; goto dma_error;
tx_buffer_info->mapped_as_page = true; tx_buffer_info->mapped_as_page = true;
tx_buffer_info->next_to_watch = i;
len -= size; len -= size;
total -= size; total -= size;
...@@ -2881,8 +2887,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, ...@@ -2881,8 +2887,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
else else
i = i - 1; i = i - 1;
tx_ring->tx_buffer_info[i].skb = skb; tx_ring->tx_buffer_info[i].skb = skb;
tx_ring->tx_buffer_info[first].next_to_watch = i;
tx_ring->tx_buffer_info[first].time_stamp = jiffies;
return count; return count;
...@@ -2891,7 +2895,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, ...@@ -2891,7 +2895,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
/* clear timestamp and dma mappings for failed tx_buffer_info map */ /* clear timestamp and dma mappings for failed tx_buffer_info map */
tx_buffer_info->dma = 0; tx_buffer_info->dma = 0;
tx_buffer_info->next_to_watch = 0;
count--; count--;
/* clear timestamp and dma mappings for remaining portion of packet */ /* clear timestamp and dma mappings for remaining portion of packet */
...@@ -2908,7 +2911,8 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, ...@@ -2908,7 +2911,8 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
} }
static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags,
int count, u32 paylen, u8 hdr_len) int count, unsigned int first, u32 paylen,
u8 hdr_len)
{ {
union ixgbe_adv_tx_desc *tx_desc = NULL; union ixgbe_adv_tx_desc *tx_desc = NULL;
struct ixgbevf_tx_buffer *tx_buffer_info; struct ixgbevf_tx_buffer *tx_buffer_info;
...@@ -2959,6 +2963,16 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, ...@@ -2959,6 +2963,16 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags,
tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd);
tx_ring->tx_buffer_info[first].time_stamp = jiffies;
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
tx_ring->tx_buffer_info[first].next_to_watch = tx_desc;
tx_ring->next_to_use = i; tx_ring->next_to_use = i;
} }
...@@ -3050,15 +3064,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -3050,15 +3064,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
tx_flags |= IXGBE_TX_FLAGS_CSUM; tx_flags |= IXGBE_TX_FLAGS_CSUM;
ixgbevf_tx_queue(tx_ring, tx_flags, ixgbevf_tx_queue(tx_ring, tx_flags,
ixgbevf_tx_map(tx_ring, skb, tx_flags, first), ixgbevf_tx_map(tx_ring, skb, tx_flags),
skb->len, hdr_len); first, skb->len, hdr_len);
/*
* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail); writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment