Commit 90912f9f authored by Alexander Lobakin's avatar Alexander Lobakin Committed by Tony Nguyen

idpf: convert header split mode to libeth + napi_build_skb()

Currently, idpf uses the following model for the header buffers:

* buffers are allocated via dma_alloc_coherent();
* when receiving, napi_alloc_skb() is called and then the header is
  copied to the newly allocated linear part.

This is far from optimal as DMA coherent zone is slow on many systems
and memcpy() neutralizes the idea and benefits of the header split. Not
speaking of that XDP can't be run on DMA coherent buffers, but at the
same time the idea of allocating an skb to run XDP program is ill.
Instead, use libeth to create page_pools for the header buffers, allocate
them dynamically and then build an skb via napi_build_skb() around them
with no memory copy. With one exception...

When you enable header split, you expect you'll always have a separate
header buffer, so that you could reserve headroom and tailroom only
there and then use full buffers for the data. For example, this is how
TCP zerocopy works -- you have to have the payload aligned to PAGE_SIZE.
The current hardware running idpf does *not* guarantee that you'll
always have headers placed separately. For example, on my setup, even
ICMP packets are written as one piece to the data buffers. You can't
build a valid skb around a data buffer in this case.
To not complicate things and not lose TCP zerocopy etc., when such thing
happens, use the empty header buffer and pull either full frame (if it's
short) or the Ethernet header there and build an skb around it. GRO
layer will pull more from the data buffer later. This W/A will hopefully
be removed one day.
Signed-off-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent 5aaac1ae
......@@ -827,6 +827,7 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
}
idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded);
skb_record_rx_queue(skb, rx_q->idx);
}
/**
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
#include <linux/dim.h>
#include <net/libeth/cache.h>
#include <net/page_pool/helpers.h>
#include <net/libeth/rx.h>
#include <net/tcp.h>
#include <net/netdev_queues.h>
......@@ -103,8 +103,6 @@ do { \
#define IDPF_RX_BUF_STRIDE 32
#define IDPF_RX_BUF_POST_STRIDE 16
#define IDPF_LOW_WATERMARK 64
/* Size of header buffer specifically for header split */
#define IDPF_HDR_BUF_SIZE 256
#define IDPF_PACKET_HDR_PAD \
(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
#define IDPF_TX_TSO_MIN_MSS 88
......@@ -300,14 +298,7 @@ struct idpf_rx_extracted {
#define IDPF_TX_MAX_DESC_DATA_ALIGNED \
ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
#define IDPF_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
struct idpf_rx_buf {
struct page *page;
unsigned int page_offset;
u16 truesize;
};
#define idpf_rx_buf libeth_fqe
#define IDPF_RX_MAX_PTYPE_PROTO_IDS 32
#define IDPF_RX_MAX_PTYPE_SZ (sizeof(struct virtchnl2_ptype) + \
......@@ -751,14 +742,14 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
/**
* struct idpf_buf_queue - software structure representing a buffer queue
* @split_buf: buffer descriptor array
* @rx_buf: Struct with RX buffer related members
* @rx_buf.buf: See struct idpf_rx_buf
* @rx_buf.hdr_buf_pa: DMA handle
* @rx_buf.hdr_buf_va: Virtual address
* @pp: Page pool pointer
* @hdr_buf: &libeth_fqe for header buffers
* @hdr_pp: &page_pool for header buffers
* @buf: &idpf_rx_buf for data buffers
* @pp: &page_pool for data buffers
* @tail: Tail offset
* @flags: See enum idpf_queue_flags_t
* @desc_count: Number of descriptors
* @hdr_truesize: truesize for buffer headers
* @next_to_use: Next descriptor to use
* @next_to_clean: Next descriptor to clean
* @next_to_alloc: RX buffer to allocate at
......@@ -773,16 +764,16 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
struct idpf_buf_queue {
__cacheline_group_begin_aligned(read_mostly);
struct virtchnl2_splitq_rx_buf_desc *split_buf;
struct {
struct libeth_fqe *hdr_buf;
struct page_pool *hdr_pp;
struct idpf_rx_buf *buf;
dma_addr_t hdr_buf_pa;
void *hdr_buf_va;
} rx_buf;
struct page_pool *pp;
void __iomem *tail;
DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
u32 desc_count;
u32 hdr_truesize;
__cacheline_group_end_aligned(read_mostly);
__cacheline_group_begin_aligned(read_write);
......@@ -976,6 +967,18 @@ struct idpf_txq_group {
u32 num_completions_pending;
};
static inline int idpf_q_vector_to_mem(const struct idpf_q_vector *q_vector)
{
u32 cpu;
if (!q_vector)
return NUMA_NO_NODE;
cpu = cpumask_first(q_vector->affinity_mask);
return cpu < nr_cpu_ids ? cpu_to_mem(cpu) : NUMA_NO_NODE;
}
/**
* idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
* @size: transmit request size in bytes
......@@ -1044,7 +1047,7 @@ static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
unsigned int buf_size)
{
if (buf_size == IDPF_RX_BUF_2048)
buf->page = page_pool_dev_alloc_frag(pool, &buf->page_offset,
buf->page = page_pool_dev_alloc_frag(pool, &buf->offset,
buf_size);
else
buf->page = page_pool_dev_alloc_pages(pool);
......@@ -1054,7 +1057,7 @@ static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
buf->truesize = buf_size;
return page_pool_get_dma_addr(buf->page) + buf->page_offset +
return page_pool_get_dma_addr(buf->page) + buf->offset +
pool->p.offset;
}
......@@ -1081,7 +1084,7 @@ static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len)
dma_sync_single_range_for_cpu(pp->p.dev,
page_pool_get_dma_addr(page),
rx_buf->page_offset + pp->p.offset, len,
rx_buf->offset + pp->p.offset, len,
page_pool_get_dma_dir(pp));
}
......@@ -1110,6 +1113,7 @@ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
struct idpf_rx_buf *rx_buf,
unsigned int size);
struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size);
void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
bool xmit_more);
unsigned int idpf_size_to_txd_count(unsigned int size);
......
......@@ -1604,32 +1604,38 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
num_rxq = rx_qgrp->singleq.num_rxq;
for (j = 0; j < num_rxq; j++, k++) {
const struct idpf_bufq_set *sets;
struct idpf_rx_queue *rxq;
if (!idpf_is_queue_model_split(vport->rxq_model)) {
rxq = rx_qgrp->singleq.rxqs[j];
goto common_qi_fields;
}
rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
qi[k].rx_bufq1_id =
cpu_to_le16(rxq->bufq_sets[0].bufq.q_id);
sets = rxq->bufq_sets;
qi[k].rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id);
if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
qi[k].rx_bufq2_id =
cpu_to_le16(rxq->bufq_sets[1].bufq.q_id);
cpu_to_le16(sets[1].bufq.q_id);
}
qi[k].rx_buffer_low_watermark =
cpu_to_le16(rxq->rx_buffer_low_watermark);
if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
common_qi_fields:
rxq->rx_hbuf_size = sets[0].bufq.rx_hbuf_size;
if (idpf_queue_has(HSPLIT_EN, rxq)) {
qi[k].qflags |=
cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT);
qi[k].hdr_buffer_size =
cpu_to_le16(rxq->rx_hbuf_size);
}
common_qi_fields:
qi[k].queue_id = cpu_to_le32(rxq->q_id);
qi[k].model = cpu_to_le16(vport->rxq_model);
qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment