Commit 1a557afc authored by Jesse Brandeburg's avatar Jesse Brandeburg Committed by Jeff Kirsher

i40e: Refactor receive routine

This is part 1 of the Rx refactor series, just including
changes to i40e.

This refactor aligns the receive routine with the one in
ixgbe which was highly optimized.  This reduces the code
we have to maintain and allows for (hopefully) more readable
and maintainable RX hot path.

In order to do this:
- consolidate the receive path into a single function that doesn't
  use packet split but *does* use pages for Rx buffers.
- remove the old _1buf routine
- consolidate several routines into helper functions
- remove ethtool control over packet split

Change-ID: I5ca100721de65992aa0114f8b4bac844b84758e0
Signed-off-by: default avatarJesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 04b3b779
...@@ -531,7 +531,6 @@ struct i40e_vsi { ...@@ -531,7 +531,6 @@ struct i40e_vsi {
u8 *rss_lut_user; /* User configured lookup table entries */ u8 *rss_lut_user; /* User configured lookup table entries */
u16 max_frame; u16 max_frame;
u16 rx_hdr_len;
u16 rx_buf_len; u16 rx_buf_len;
u8 dtype; u8 dtype;
......
...@@ -268,9 +268,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ...@@ -268,9 +268,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
rx_ring->queue_index, rx_ring->queue_index,
rx_ring->reg_idx); rx_ring->reg_idx);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d\n", " rx_rings[%i]: rx_buf_len = %d\n",
i, rx_ring->rx_hdr_len, i, rx_ring->rx_buf_len);
rx_ring->rx_buf_len);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" rx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n", " rx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
i, i,
...@@ -361,8 +360,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ...@@ -361,8 +360,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" work_limit = %d\n", " work_limit = %d\n",
vsi->work_limit); vsi->work_limit);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", " max_frame = %d, rx_buf_len = %d dtype = %d\n",
vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); vsi->max_frame, vsi->rx_buf_len, vsi->dtype);
dev_info(&pf->pdev->dev, dev_info(&pf->pdev->dev,
" num_q_vectors = %i, base_vector = %i\n", " num_q_vectors = %i, base_vector = %i\n",
vsi->num_q_vectors, vsi->base_vector); vsi->num_q_vectors, vsi->base_vector);
......
...@@ -235,7 +235,6 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { ...@@ -235,7 +235,6 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
"LinkPolling", "LinkPolling",
"flow-director-atr", "flow-director-atr",
"veb-stats", "veb-stats",
"packet-split",
"hw-atr-eviction", "hw-atr-eviction",
}; };
......
...@@ -2855,10 +2855,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -2855,10 +2855,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
memset(&rx_ctx, 0, sizeof(rx_ctx)); memset(&rx_ctx, 0, sizeof(rx_ctx));
ring->rx_buf_len = vsi->rx_buf_len; ring->rx_buf_len = vsi->rx_buf_len;
ring->rx_hdr_len = vsi->rx_hdr_len;
rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT; rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
rx_ctx.hbuff = ring->rx_hdr_len >> I40E_RXQ_CTX_HBUFF_SHIFT;
rx_ctx.base = (ring->dma / 128); rx_ctx.base = (ring->dma / 128);
rx_ctx.qlen = ring->count; rx_ctx.qlen = ring->count;
...@@ -2910,7 +2908,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -2910,7 +2908,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail); writel(0, ring->tail);
i40e_alloc_rx_buffers_1buf(ring, I40E_DESC_UNUSED(ring)); i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
return 0; return 0;
} }
...@@ -2949,15 +2947,13 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) ...@@ -2949,15 +2947,13 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
else else
vsi->max_frame = I40E_RXBUFFER_2048; vsi->max_frame = I40E_RXBUFFER_2048;
vsi->rx_hdr_len = 0; vsi->rx_buf_len = I40E_RXBUFFER_2048;
vsi->rx_buf_len = vsi->max_frame;
vsi->dtype = I40E_RX_DTYPE_NO_SPLIT; vsi->dtype = I40E_RX_DTYPE_NO_SPLIT;
#ifdef I40E_FCOE #ifdef I40E_FCOE
/* setup rx buffer for FCoE */ /* setup rx buffer for FCoE */
if ((vsi->type == I40E_VSI_FCOE) && if ((vsi->type == I40E_VSI_FCOE) &&
(vsi->back->flags & I40E_FLAG_FCOE_ENABLED)) { (vsi->back->flags & I40E_FLAG_FCOE_ENABLED)) {
vsi->rx_hdr_len = 0;
vsi->rx_buf_len = I40E_RXBUFFER_3072; vsi->rx_buf_len = I40E_RXBUFFER_3072;
vsi->max_frame = I40E_RXBUFFER_3072; vsi->max_frame = I40E_RXBUFFER_3072;
vsi->dtype = I40E_RX_DTYPE_NO_SPLIT; vsi->dtype = I40E_RX_DTYPE_NO_SPLIT;
...@@ -2965,8 +2961,6 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) ...@@ -2965,8 +2961,6 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
#endif /* I40E_FCOE */ #endif /* I40E_FCOE */
/* round up for the chip's needs */ /* round up for the chip's needs */
vsi->rx_hdr_len = ALIGN(vsi->rx_hdr_len,
BIT_ULL(I40E_RXQ_CTX_HBUFF_SHIFT));
vsi->rx_buf_len = ALIGN(vsi->rx_buf_len, vsi->rx_buf_len = ALIGN(vsi->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
...@@ -10661,11 +10655,9 @@ static void i40e_print_features(struct i40e_pf *pf) ...@@ -10661,11 +10655,9 @@ static void i40e_print_features(struct i40e_pf *pf)
#ifdef CONFIG_PCI_IOV #ifdef CONFIG_PCI_IOV
i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
#endif #endif
i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s", i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
pf->hw.func_caps.num_vsis, pf->hw.func_caps.num_vsis,
pf->vsi[pf->lan_vsi]->num_queue_pairs, pf->vsi[pf->lan_vsi]->num_queue_pairs);
"1BUF");
if (pf->flags & I40E_FLAG_RSS_ENABLED) if (pf->flags & I40E_FLAG_RSS_ENABLED)
i += snprintf(&buf[i], REMAIN(i), " RSS"); i += snprintf(&buf[i], REMAIN(i), " RSS");
if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
......
This diff is collapsed.
...@@ -102,8 +102,8 @@ enum i40e_dyn_idx_t { ...@@ -102,8 +102,8 @@ enum i40e_dyn_idx_t {
(((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \ (((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA) I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
/* Supported Rx Buffer Sizes */ /* Supported Rx Buffer Sizes (a multiple of 128) */
#define I40E_RXBUFFER_512 512 /* Used for packet split */ #define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_2048 2048 #define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* For FCoE MTU of 2158 */ #define I40E_RXBUFFER_3072 3072 /* For FCoE MTU of 2158 */
#define I40E_RXBUFFER_4096 4096 #define I40E_RXBUFFER_4096 4096
...@@ -114,9 +114,28 @@ enum i40e_dyn_idx_t { ...@@ -114,9 +114,28 @@ enum i40e_dyn_idx_t {
* reserve 2 more, and skb_shared_info adds an additional 384 bytes more, * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
* this adds up to 512 bytes of extra data meaning the smallest allocation * this adds up to 512 bytes of extra data meaning the smallest allocation
* we could have is 1K. * we could have is 1K.
* i.e. RXBUFFER_512 --> size-1024 slab * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
* i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
*/ */
#define I40E_RX_HDR_SIZE I40E_RXBUFFER_512 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
#define i40e_rx_desc i40e_32byte_rx_desc
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
* value of the mask which is really only used for boolean tests.
* The status_error_len doesn't need to be shifted because it begins
* at offset zero.
*/
static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
const u64 stat_err_bits)
{
return !!(rx_desc->wb.qword1.status_error_len &
cpu_to_le64(stat_err_bits));
}
/* How many Rx Buffers do we bundle into one write to the hardware ? */ /* How many Rx Buffers do we bundle into one write to the hardware ? */
#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ #define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
...@@ -142,8 +161,6 @@ enum i40e_dyn_idx_t { ...@@ -142,8 +161,6 @@ enum i40e_dyn_idx_t {
prefetch((n)); \ prefetch((n)); \
} while (0) } while (0)
#define i40e_rx_desc i40e_32byte_rx_desc
#define I40E_MAX_BUFFER_TXD 8 #define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17 #define I40E_MIN_TX_LEN 17
...@@ -213,10 +230,8 @@ struct i40e_tx_buffer { ...@@ -213,10 +230,8 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer { struct i40e_rx_buffer {
struct sk_buff *skb; struct sk_buff *skb;
void *hdr_buf;
dma_addr_t dma; dma_addr_t dma;
struct page *page; struct page *page;
dma_addr_t page_dma;
unsigned int page_offset; unsigned int page_offset;
}; };
...@@ -280,7 +295,6 @@ struct i40e_ring { ...@@ -280,7 +295,6 @@ struct i40e_ring {
u16 count; /* Number of descriptors */ u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */ u16 reg_idx; /* HW register index of the ring */
u16 rx_hdr_len;
u16 rx_buf_len; u16 rx_buf_len;
#define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_NO_SPLIT 0
#define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_HEADER_SPLIT 1
...@@ -322,6 +336,7 @@ struct i40e_ring { ...@@ -322,6 +336,7 @@ struct i40e_ring {
struct i40e_q_vector *q_vector; /* Backreference to associated vector */ struct i40e_q_vector *q_vector; /* Backreference to associated vector */
struct rcu_head rcu; /* to avoid race on free */ struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
enum i40e_latency_range { enum i40e_latency_range {
...@@ -345,9 +360,7 @@ struct i40e_ring_container { ...@@ -345,9 +360,7 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \ #define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next) for (pos = (head).ring; pos != NULL; pos = pos->next)
bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count);
void i40e_alloc_rx_headers(struct i40e_ring *rxr);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
void i40e_clean_rx_ring(struct i40e_ring *rx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment