Commit 4c9f0937 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-xsk-rx-batch'

Magnus Karlsson says:

====================
This patch set introduces a batched interface for Rx buffer allocation
in AF_XDP buffer pool. Instead of using xsk_buff_alloc(*pool), drivers
can now use xsk_buff_alloc_batch(*pool, **xdp_buff_array,
max). Instead of returning a pointer to an xdp_buff, it returns the
number of xdp_buffs it managed to allocate up to the maximum value of
the max parameter in the function call. Pointers to the allocated
xdp_buff:s are put in the xdp_buff_array supplied in the call. This
could be a SW ring that already exists in the driver or a new
structure that the driver has allocated.

  u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool,
                           struct xdp_buff **xdp,
                           u32 max);

When using this interface, the driver should also use the new
interface below to set the relevant fields in the struct xdp_buff. The
reason for this is that xsk_buff_alloc_batch() does not fill in the
data and data_meta fields for you as is the case with
xsk_buff_alloc(). So it is not sufficient to just set data_end
(effectively the size) anymore in the driver. The reason for this is
performance as explained in detail in the commit message.

  void xsk_buff_set_size(struct xdp_buff *xdp, u32 size);

Patch 6 also optimizes the buffer allocation in the aligned case. In
this case, we can skip the reinitialization of most fields in the
xdp_buff_xsk struct at allocation time. As the number of elements in
the heads array is equal to the number of possible buffers in the
umem, we can initialize them once and for all at bind time and then
just point to the correct one in the xdp_buff_array that is returned
to the driver. No reason to have a stack of free head entries. In the
unaligned case, the buffers can reside anywhere in the umem, so this
optimization is not possible as we still have to fill in the right
information in the xdp_buff every single time one is allocated.

I have updated i40e and ice to use this new batched interface.

These are the throughput results on my 2.1 GHz Cascade Lake system:

Aligned mode:
ice: +11% / -9 cycles/pkt
i40e: +12% / -9 cycles/pkt

Unaligned mode:
ice: +1.5% / -1 cycle/pkt
i40e: +1% / -1 cycle/pkt

For the aligned case, batching provides around 40% of the performance
improvement and the aligned optimization the rest, around 60%. Would
have expected a ~4% boost for unaligned with this data, but I only get
around 1%. Do not know why. Note that memory consumption in aligned
mode is also reduced by this patch set.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e7d5184b e34087fc
...@@ -193,42 +193,40 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) ...@@ -193,42 +193,40 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
{ {
u16 ntu = rx_ring->next_to_use; u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc; union i40e_rx_desc *rx_desc;
struct xdp_buff **bi, *xdp; struct xdp_buff **xdp;
u32 nb_buffs, i;
dma_addr_t dma; dma_addr_t dma;
bool ok = true;
rx_desc = I40E_RX_DESC(rx_ring, ntu); rx_desc = I40E_RX_DESC(rx_ring, ntu);
bi = i40e_rx_bi(rx_ring, ntu); xdp = i40e_rx_bi(rx_ring, ntu);
do {
xdp = xsk_buff_alloc(rx_ring->xsk_pool); nb_buffs = min_t(u16, count, rx_ring->count - ntu);
if (!xdp) { nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
ok = false; if (!nb_buffs)
goto no_buffers; return false;
}
*bi = xdp; i = nb_buffs;
dma = xsk_buff_xdp_get_dma(xdp); while (i--) {
dma = xsk_buff_xdp_get_dma(*xdp);
rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->read.hdr_addr = 0; rx_desc->read.hdr_addr = 0;
rx_desc++; rx_desc++;
bi++; xdp++;
ntu++; }
if (unlikely(ntu == rx_ring->count)) { ntu += nb_buffs;
if (ntu == rx_ring->count) {
rx_desc = I40E_RX_DESC(rx_ring, 0); rx_desc = I40E_RX_DESC(rx_ring, 0);
bi = i40e_rx_bi(rx_ring, 0); xdp = i40e_rx_bi(rx_ring, 0);
ntu = 0; ntu = 0;
} }
} while (--count);
no_buffers:
if (rx_ring->next_to_use != ntu) {
/* clear the status bits for the next_to_use descriptor */ /* clear the status bits for the next_to_use descriptor */
rx_desc->wb.qword1.status_error_len = 0; rx_desc->wb.qword1.status_error_len = 0;
i40e_release_rx_desc(rx_ring, ntu); i40e_release_rx_desc(rx_ring, ntu);
}
return ok; return count == nb_buffs ? true : false;
} }
/** /**
...@@ -365,7 +363,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) ...@@ -365,7 +363,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
break; break;
bi = *i40e_rx_bi(rx_ring, next_to_clean); bi = *i40e_rx_bi(rx_ring, next_to_clean);
bi->data_end = bi->data + size; xsk_buff_set_size(bi, size);
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
xdp_res = i40e_run_xdp_zc(rx_ring, bi); xdp_res = i40e_run_xdp_zc(rx_ring, bi);
......
...@@ -164,17 +164,10 @@ struct ice_tx_offload_params { ...@@ -164,17 +164,10 @@ struct ice_tx_offload_params {
}; };
struct ice_rx_buf { struct ice_rx_buf {
union {
struct {
dma_addr_t dma; dma_addr_t dma;
struct page *page; struct page *page;
unsigned int page_offset; unsigned int page_offset;
u16 pagecnt_bias; u16 pagecnt_bias;
};
struct {
struct xdp_buff *xdp;
};
};
}; };
struct ice_q_stats { struct ice_q_stats {
...@@ -270,6 +263,7 @@ struct ice_ring { ...@@ -270,6 +263,7 @@ struct ice_ring {
union { union {
struct ice_tx_buf *tx_buf; struct ice_tx_buf *tx_buf;
struct ice_rx_buf *rx_buf; struct ice_rx_buf *rx_buf;
struct xdp_buff **xdp_buf;
}; };
/* CL2 - 2nd cacheline starts here */ /* CL2 - 2nd cacheline starts here */
u16 q_index; /* Queue number of ring */ u16 q_index; /* Queue number of ring */
......
...@@ -364,45 +364,39 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) ...@@ -364,45 +364,39 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
{ {
union ice_32b_rx_flex_desc *rx_desc; union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use; u16 ntu = rx_ring->next_to_use;
struct ice_rx_buf *rx_buf; struct xdp_buff **xdp;
bool ok = true; u32 nb_buffs, i;
dma_addr_t dma; dma_addr_t dma;
if (!count)
return true;
rx_desc = ICE_RX_DESC(rx_ring, ntu); rx_desc = ICE_RX_DESC(rx_ring, ntu);
rx_buf = &rx_ring->rx_buf[ntu]; xdp = &rx_ring->xdp_buf[ntu];
do { nb_buffs = min_t(u16, count, rx_ring->count - ntu);
rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
if (!rx_buf->xdp) { if (!nb_buffs)
ok = false; return false;
break;
}
dma = xsk_buff_xdp_get_dma(rx_buf->xdp); i = nb_buffs;
while (i--) {
dma = xsk_buff_xdp_get_dma(*xdp);
rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
rx_desc++; rx_desc++;
rx_buf++; xdp++;
ntu++; }
if (unlikely(ntu == rx_ring->count)) { ntu += nb_buffs;
if (ntu == rx_ring->count) {
rx_desc = ICE_RX_DESC(rx_ring, 0); rx_desc = ICE_RX_DESC(rx_ring, 0);
rx_buf = rx_ring->rx_buf; xdp = rx_ring->xdp_buf;
ntu = 0; ntu = 0;
} }
} while (--count);
if (rx_ring->next_to_use != ntu) {
/* clear the status bits for the next_to_use descriptor */ /* clear the status bits for the next_to_use descriptor */
rx_desc->wb.status_error0 = 0; rx_desc->wb.status_error0 = 0;
ice_release_rx_desc(rx_ring, ntu); ice_release_rx_desc(rx_ring, ntu);
}
return ok; return count == nb_buffs ? true : false;
} }
/** /**
...@@ -421,19 +415,19 @@ static void ice_bump_ntc(struct ice_ring *rx_ring) ...@@ -421,19 +415,19 @@ static void ice_bump_ntc(struct ice_ring *rx_ring)
/** /**
* ice_construct_skb_zc - Create an sk_buff from zero-copy buffer * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
* @rx_ring: Rx ring * @rx_ring: Rx ring
* @rx_buf: zero-copy Rx buffer * @xdp_arr: Pointer to the SW ring of xdp_buff pointers
* *
* This function allocates a new skb from a zero-copy Rx buffer. * This function allocates a new skb from a zero-copy Rx buffer.
* *
* Returns the skb on success, NULL on failure. * Returns the skb on success, NULL on failure.
*/ */
static struct sk_buff * static struct sk_buff *
ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) ice_construct_skb_zc(struct ice_ring *rx_ring, struct xdp_buff **xdp_arr)
{ {
unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta; struct xdp_buff *xdp = *xdp_arr;
unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data; unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int datasize_hard = rx_buf->xdp->data_end - unsigned int datasize = xdp->data_end - xdp->data;
rx_buf->xdp->data_hard_start; unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start;
struct sk_buff *skb; struct sk_buff *skb;
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
...@@ -441,13 +435,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) ...@@ -441,13 +435,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start); skb_reserve(skb, xdp->data - xdp->data_hard_start);
memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize); memcpy(__skb_put(skb, datasize), xdp->data, datasize);
if (metasize) if (metasize)
skb_metadata_set(skb, metasize); skb_metadata_set(skb, metasize);
xsk_buff_free(rx_buf->xdp); xsk_buff_free(xdp);
rx_buf->xdp = NULL; *xdp_arr = NULL;
return skb; return skb;
} }
...@@ -521,7 +515,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) ...@@ -521,7 +515,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
while (likely(total_rx_packets < (unsigned int)budget)) { while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc; union ice_32b_rx_flex_desc *rx_desc;
unsigned int size, xdp_res = 0; unsigned int size, xdp_res = 0;
struct ice_rx_buf *rx_buf; struct xdp_buff **xdp;
struct sk_buff *skb; struct sk_buff *skb;
u16 stat_err_bits; u16 stat_err_bits;
u16 vlan_tag = 0; u16 vlan_tag = 0;
...@@ -544,18 +538,18 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) ...@@ -544,18 +538,18 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
if (!size) if (!size)
break; break;
rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean];
rx_buf->xdp->data_end = rx_buf->xdp->data + size; xsk_buff_set_size(*xdp, size);
xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool); xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool);
xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp); xdp_res = ice_run_xdp_zc(rx_ring, *xdp);
if (xdp_res) { if (xdp_res) {
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
xdp_xmit |= xdp_res; xdp_xmit |= xdp_res;
else else
xsk_buff_free(rx_buf->xdp); xsk_buff_free(*xdp);
rx_buf->xdp = NULL; *xdp = NULL;
total_rx_bytes += size; total_rx_bytes += size;
total_rx_packets++; total_rx_packets++;
cleaned_count++; cleaned_count++;
...@@ -565,7 +559,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) ...@@ -565,7 +559,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
} }
/* XDP_PASS path */ /* XDP_PASS path */
skb = ice_construct_skb_zc(rx_ring, rx_buf); skb = ice_construct_skb_zc(rx_ring, xdp);
if (!skb) { if (!skb) {
rx_ring->rx_stats.alloc_buf_failed++; rx_ring->rx_stats.alloc_buf_failed++;
break; break;
...@@ -813,12 +807,12 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) ...@@ -813,12 +807,12 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
u16 i; u16 i;
for (i = 0; i < rx_ring->count; i++) { for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; struct xdp_buff **xdp = &rx_ring->xdp_buf[i];
if (!rx_buf->xdp) if (!xdp)
continue; continue;
rx_buf->xdp = NULL; *xdp = NULL;
} }
} }
......
...@@ -77,6 +77,12 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) ...@@ -77,6 +77,12 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool); return xp_alloc(pool);
} }
/* Returns as many entries as possible up to max. 0 <= N <= max. */
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
return xp_alloc_batch(pool, xdp, max);
}
static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{ {
return xp_can_alloc(pool, count); return xp_can_alloc(pool, count);
...@@ -89,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) ...@@ -89,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
xp_free(xskb); xp_free(xskb);
} }
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + size;
}
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
u64 addr) u64 addr)
{ {
...@@ -212,6 +225,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) ...@@ -212,6 +225,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL; return NULL;
} }
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
return 0;
}
static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
{ {
return false; return false;
...@@ -221,6 +239,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) ...@@ -221,6 +239,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{ {
} }
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
u64 addr) u64 addr)
{ {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/if_xdp.h> #include <linux/if_xdp.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/bpf.h>
#include <net/xdp.h> #include <net/xdp.h>
struct xsk_buff_pool; struct xsk_buff_pool;
...@@ -23,7 +24,6 @@ struct xdp_buff_xsk { ...@@ -23,7 +24,6 @@ struct xdp_buff_xsk {
dma_addr_t dma; dma_addr_t dma;
dma_addr_t frame_dma; dma_addr_t frame_dma;
struct xsk_buff_pool *pool; struct xsk_buff_pool *pool;
bool unaligned;
u64 orig_addr; u64 orig_addr;
struct list_head free_list_node; struct list_head free_list_node;
}; };
...@@ -67,6 +67,7 @@ struct xsk_buff_pool { ...@@ -67,6 +67,7 @@ struct xsk_buff_pool {
u32 free_heads_cnt; u32 free_heads_cnt;
u32 headroom; u32 headroom;
u32 chunk_size; u32 chunk_size;
u32 chunk_shift;
u32 frame_len; u32 frame_len;
u8 cached_need_wakeup; u8 cached_need_wakeup;
bool uses_need_wakeup; bool uses_need_wakeup;
...@@ -81,6 +82,13 @@ struct xsk_buff_pool { ...@@ -81,6 +82,13 @@ struct xsk_buff_pool {
struct xdp_buff_xsk *free_heads[]; struct xdp_buff_xsk *free_heads[];
}; };
/* Masks for xdp_umem_page flags.
* The low 12-bits of the addr will be 0 since this is the page address, so we
* can use them for flags.
*/
#define XSK_NEXT_PG_CONTIG_SHIFT 0
#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
/* AF_XDP core. */ /* AF_XDP core. */
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem); struct xdp_umem *umem);
...@@ -89,7 +97,6 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, ...@@ -89,7 +97,6 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
struct net_device *dev, u16 queue_id); struct net_device *dev, u16 queue_id);
void xp_destroy(struct xsk_buff_pool *pool); void xp_destroy(struct xsk_buff_pool *pool);
void xp_release(struct xdp_buff_xsk *xskb);
void xp_get_pool(struct xsk_buff_pool *pool); void xp_get_pool(struct xsk_buff_pool *pool);
bool xp_put_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool);
void xp_clear_dev(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool);
...@@ -99,12 +106,28 @@ void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); ...@@ -99,12 +106,28 @@ void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
/* AF_XDP, and XDP core. */ /* AF_XDP, and XDP core. */
void xp_free(struct xdp_buff_xsk *xskb); void xp_free(struct xdp_buff_xsk *xskb);
static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
u64 addr)
{
xskb->orig_addr = addr;
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
}
static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
dma_addr_t *dma_pages, u64 addr)
{
xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) +
(addr & ~PAGE_MASK);
xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM;
}
/* AF_XDP ZC drivers, via xdp_sock_buff.h */ /* AF_XDP ZC drivers, via xdp_sock_buff.h */
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages); unsigned long attrs, struct page **pages, u32 nr_pages);
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool); struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
...@@ -180,4 +203,25 @@ static inline u64 xp_unaligned_add_offset_to_addr(u64 addr) ...@@ -180,4 +203,25 @@ static inline u64 xp_unaligned_add_offset_to_addr(u64 addr)
xp_unaligned_extract_offset(addr); xp_unaligned_extract_offset(addr);
} }
static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr)
{
return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift;
}
static inline void xp_release(struct xdp_buff_xsk *xskb)
{
if (xskb->pool->unaligned)
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
}
static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
{
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
offset += xskb->pool->headroom;
if (!xskb->pool->unaligned)
return xskb->orig_addr + offset;
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
#endif /* XSK_BUFF_POOL_H_ */ #endif /* XSK_BUFF_POOL_H_ */
...@@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, ...@@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0; return 0;
} }
void xp_release(struct xdp_buff_xsk *xskb)
{
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
}
static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
{
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
offset += xskb->pool->headroom;
if (!xskb->pool->unaligned)
return xskb->orig_addr + offset;
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{ {
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
......
...@@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool) ...@@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool)
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem) struct xdp_umem *umem)
{ {
bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
struct xsk_buff_pool *pool; struct xsk_buff_pool *pool;
struct xdp_buff_xsk *xskb; struct xdp_buff_xsk *xskb;
u32 i; u32 i, entries;
pool = kvzalloc(struct_size(pool, free_heads, umem->chunks), entries = unaligned ? umem->chunks : 0;
GFP_KERNEL); pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL);
if (!pool) if (!pool)
goto out; goto out;
...@@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, ...@@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->free_heads_cnt = umem->chunks; pool->free_heads_cnt = umem->chunks;
pool->headroom = umem->headroom; pool->headroom = umem->headroom;
pool->chunk_size = umem->chunk_size; pool->chunk_size = umem->chunk_size;
pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; pool->chunk_shift = ffs(umem->chunk_size) - 1;
pool->unaligned = unaligned;
pool->frame_len = umem->chunk_size - umem->headroom - pool->frame_len = umem->chunk_size - umem->headroom -
XDP_PACKET_HEADROOM; XDP_PACKET_HEADROOM;
pool->umem = umem; pool->umem = umem;
...@@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, ...@@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i]; xskb = &pool->heads[i];
xskb->pool = pool; xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
if (pool->unaligned)
pool->free_heads[i] = xskb; pool->free_heads[i] = xskb;
else
xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
} }
return pool; return pool;
...@@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, ...@@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
if (pool->unaligned) if (pool->unaligned)
xp_check_dma_contiguity(dma_map); xp_check_dma_contiguity(dma_map);
else
for (i = 0; i < pool->heads_cnt; i++) {
struct xdp_buff_xsk *xskb = &pool->heads[i];
xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
}
err = xp_init_dma_info(pool, dma_map); err = xp_init_dma_info(pool, dma_map);
if (err) { if (err) {
...@@ -448,8 +459,6 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) ...@@ -448,8 +459,6 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
if (pool->free_heads_cnt == 0) if (pool->free_heads_cnt == 0)
return NULL; return NULL;
xskb = pool->free_heads[--pool->free_heads_cnt];
for (;;) { for (;;) {
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
pool->fq->queue_empty_descs++; pool->fq->queue_empty_descs++;
...@@ -466,17 +475,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) ...@@ -466,17 +475,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
} }
break; break;
} }
xskq_cons_release(pool->fq);
xskb->orig_addr = addr; if (pool->unaligned) {
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; xskb = pool->free_heads[--pool->free_heads_cnt];
if (pool->dma_pages_cnt) { xp_init_xskb_addr(xskb, pool, addr);
xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] & if (pool->dma_pages_cnt)
~XSK_NEXT_PG_CONTIG_MASK) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
(addr & ~PAGE_MASK); } else {
xskb->dma = xskb->frame_dma + pool->headroom + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
XDP_PACKET_HEADROOM;
} }
xskq_cons_release(pool->fq);
return xskb; return xskb;
} }
...@@ -507,6 +516,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) ...@@ -507,6 +516,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
} }
EXPORT_SYMBOL(xp_alloc); EXPORT_SYMBOL(xp_alloc);
static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 i, cached_cons, nb_entries;
if (max > pool->free_heads_cnt)
max = pool->free_heads_cnt;
max = xskq_cons_nb_entries(pool->fq, max);
cached_cons = pool->fq->cached_cons;
nb_entries = max;
i = max;
while (i--) {
struct xdp_buff_xsk *xskb;
u64 addr;
bool ok;
__xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr);
ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
xp_check_aligned(pool, &addr);
if (unlikely(!ok)) {
pool->fq->invalid_descs++;
nb_entries--;
continue;
}
if (pool->unaligned) {
xskb = pool->free_heads[--pool->free_heads_cnt];
xp_init_xskb_addr(xskb, pool, addr);
if (pool->dma_pages_cnt)
xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
} else {
xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
}
*xdp = &xskb->xdp;
xdp++;
}
xskq_cons_release_n(pool->fq, max);
return nb_entries;
}
static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries)
{
struct xdp_buff_xsk *xskb;
u32 i;
nb_entries = min_t(u32, nb_entries, pool->free_list_cnt);
i = nb_entries;
while (i--) {
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
list_del(&xskb->free_list_node);
*xdp = &xskb->xdp;
xdp++;
}
pool->free_list_cnt -= nb_entries;
return nb_entries;
}
u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
if (unlikely(pool->dma_need_sync)) {
/* Slow path */
*xdp = xp_alloc(pool);
return !!*xdp;
}
if (unlikely(pool->free_list_cnt)) {
nb_entries1 = xp_alloc_reused(pool, xdp, max);
if (nb_entries1 == max)
return nb_entries1;
max -= nb_entries1;
xdp += nb_entries1;
}
nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max);
if (!nb_entries2)
pool->fq->queue_empty_descs++;
return nb_entries1 + nb_entries2;
}
EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{ {
if (pool->free_list_cnt >= count) if (pool->free_list_cnt >= count)
......
...@@ -111,14 +111,18 @@ struct xsk_queue { ...@@ -111,14 +111,18 @@ struct xsk_queue {
/* Functions that read and validate content from consumer rings. */ /* Functions that read and validate content from consumer rings. */
static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr)
{ {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
u32 idx = cached_cons & q->ring_mask;
if (q->cached_cons != q->cached_prod) {
u32 idx = q->cached_cons & q->ring_mask;
*addr = ring->desc[idx]; *addr = ring->desc[idx];
}
static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_cons != q->cached_prod) {
__xskq_cons_read_addr_unchecked(q, q->cached_cons, addr);
return true; return true;
} }
......
This diff is collapsed.
...@@ -35,13 +35,13 @@ ...@@ -35,13 +35,13 @@
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
#define USLEEP_MAX 10000 #define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10 #define SOCK_RECONF_CTR 10
#define BATCH_SIZE 8 #define BATCH_SIZE 64
#define POLL_TMOUT 1000 #define POLL_TMOUT 1000
#define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_PKT_CNT (4 * 1024)
#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
#define RX_FULL_RXQSIZE 32 #define RX_FULL_RXQSIZE 32
#define DEFAULT_OFFSET 256 #define UMEM_HEADROOM_TEST_SIZE 128
#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
...@@ -55,11 +55,13 @@ enum test_mode { ...@@ -55,11 +55,13 @@ enum test_mode {
enum test_type { enum test_type {
TEST_TYPE_RUN_TO_COMPLETION, TEST_TYPE_RUN_TO_COMPLETION,
TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
TEST_TYPE_POLL, TEST_TYPE_POLL,
TEST_TYPE_UNALIGNED, TEST_TYPE_UNALIGNED,
TEST_TYPE_ALIGNED_INV_DESC, TEST_TYPE_ALIGNED_INV_DESC,
TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
TEST_TYPE_UNALIGNED_INV_DESC, TEST_TYPE_UNALIGNED_INV_DESC,
TEST_TYPE_HEADROOM,
TEST_TYPE_TEARDOWN, TEST_TYPE_TEARDOWN,
TEST_TYPE_BIDI, TEST_TYPE_BIDI,
TEST_TYPE_STATS, TEST_TYPE_STATS,
...@@ -136,6 +138,7 @@ struct ifobject { ...@@ -136,6 +138,7 @@ struct ifobject {
bool tx_on; bool tx_on;
bool rx_on; bool rx_on;
bool use_poll; bool use_poll;
bool pacing_on;
u8 dst_mac[ETH_ALEN]; u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN];
}; };
...@@ -151,5 +154,9 @@ struct test_spec { ...@@ -151,5 +154,9 @@ struct test_spec {
}; };
pthread_barrier_t barr; pthread_barrier_t barr;
pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
u32 pkts_in_flight;
#endif /* XDPXCEIVER_H */ #endif /* XDPXCEIVER_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment