Commit 2239185c authored by Saeed Mahameed's avatar Saeed Mahameed Committed by David S. Miller

net/mlx5e: Optimize XDP frame xmit

XDP SQ has a fixed size WQE (MLX5E_XDP_TX_WQEBBS = 1) and only posts
one kind of WQE (MLX5_OPCODE_SEND),

Also we initialize SQ descriptors static fields once on open_xdpsq,
rather than every time on critical path.

Optimize the code in light of those facts and add a prefetch of the TX
descriptor first thing in the xdp xmit function.

Performance improvement:
System: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz

Test case              Before     Now        improvement
---------------------------------------------------------------
XDP TX   (1 core)      13Mpps    13.7Mpps       5%
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 39e12351
...@@ -116,12 +116,8 @@ ...@@ -116,12 +116,8 @@
(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_IHS_DS_COUNT \
DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT \ #define MLX5E_XDP_TX_DS_COUNT \
((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
#define MLX5E_XDP_TX_WQEBBS \
DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS)
#define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_NUM_MAIN_GROUPS 9
...@@ -352,7 +348,6 @@ struct mlx5e_sq { ...@@ -352,7 +348,6 @@ struct mlx5e_sq {
} txq; } txq;
struct mlx5e_sq_wqe_info *ico_wqe; struct mlx5e_sq_wqe_info *ico_wqe;
struct { struct {
struct mlx5e_sq_wqe_info *wqe_info;
struct mlx5e_dma_info *di; struct mlx5e_dma_info *di;
bool doorbell; bool doorbell;
} xdp; } xdp;
......
...@@ -894,7 +894,6 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) ...@@ -894,7 +894,6 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq)
{ {
kfree(sq->db.xdp.di); kfree(sq->db.xdp.di);
kfree(sq->db.xdp.wqe_info);
} }
static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
...@@ -903,9 +902,7 @@ static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) ...@@ -903,9 +902,7 @@ static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz,
GFP_KERNEL, numa); GFP_KERNEL, numa);
sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, if (!sq->db.xdp.di) {
GFP_KERNEL, numa);
if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) {
mlx5e_free_sq_xdp_db(sq); mlx5e_free_sq_xdp_db(sq);
return -ENOMEM; return -ENOMEM;
} }
...@@ -993,7 +990,7 @@ static int mlx5e_sq_get_max_wqebbs(u8 sq_type) ...@@ -993,7 +990,7 @@ static int mlx5e_sq_get_max_wqebbs(u8 sq_type)
case MLX5E_SQ_ICO: case MLX5E_SQ_ICO:
return MLX5E_ICOSQ_MAX_WQEBBS; return MLX5E_ICOSQ_MAX_WQEBBS;
case MLX5E_SQ_XDP: case MLX5E_SQ_XDP:
return MLX5E_XDP_TX_WQEBBS; return 1;
} }
return MLX5_SEND_WQE_MAX_WQEBBS; return MLX5_SEND_WQE_MAX_WQEBBS;
} }
...@@ -1513,6 +1510,40 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) ...@@ -1513,6 +1510,40 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
MLX5E_MAX_NUM_CHANNELS); MLX5E_MAX_NUM_CHANNELS);
} }
static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
struct mlx5e_sq_param *param,
struct mlx5e_sq *sq)
{
unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
unsigned int inline_hdr_sz = 0;
int err;
int i;
err = mlx5e_open_sq(c, 0, param, sq);
if (err)
return err;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
ds_cnt++;
}
/* Pre initialize fixed WQE fields */
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
}
return 0;
}
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam, struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp) struct mlx5e_channel **cp)
...@@ -1587,7 +1618,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -1587,7 +1618,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
} }
} }
err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->rq.xdpsq) : 0; err = c->xdp ? mlx5e_open_xdpsq(c, &cparam->xdp_sq, &c->rq.xdpsq) : 0;
if (err) if (err)
goto err_close_sqs; goto err_close_sqs;
......
...@@ -641,7 +641,7 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) ...@@ -641,7 +641,7 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
{ {
struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_tx_wqe *wqe; struct mlx5e_tx_wqe *wqe;
u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ u16 pi = (sq->pc - 1) & wq->sz_m1; /* last pi */
wqe = mlx5_wq_cyc_get_wqe(wq, pi); wqe = mlx5_wq_cyc_get_wqe(wq, pi);
...@@ -657,17 +657,17 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, ...@@ -657,17 +657,17 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = sq->pc & wq->sz_m1; u16 pi = sq->pc & wq->sz_m1;
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi];
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg; struct mlx5_wqe_data_seg *dseg;
u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT;
ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; ptrdiff_t data_offset = xdp->data - xdp->data_hard_start;
dma_addr_t dma_addr = di->addr + data_offset; dma_addr_t dma_addr = di->addr + data_offset;
unsigned int dma_len = xdp->data_end - xdp->data; unsigned int dma_len = xdp->data_end - xdp->data;
prefetchw(wqe);
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE ||
MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) {
rq->stats.xdp_drop++; rq->stats.xdp_drop++;
...@@ -675,7 +675,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, ...@@ -675,7 +675,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
return false; return false;
} }
if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { if (unlikely(!mlx5e_sq_has_room_for(sq, 1))) {
if (sq->db.xdp.doorbell) { if (sq->db.xdp.doorbell) {
/* SQ is full, ring doorbell */ /* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq); mlx5e_xmit_xdp_doorbell(sq);
...@@ -686,35 +686,29 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, ...@@ -686,35 +686,29 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
return false; return false;
} }
dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE);
PCI_DMA_TODEVICE);
memset(wqe, 0, sizeof(*wqe)); cseg->fm_ce_se = 0;
dseg = (struct mlx5_wqe_data_seg *)eseg + 1; dseg = (struct mlx5_wqe_data_seg *)eseg + 1;
/* copy the inline part if required */ /* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE);
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
dma_len -= MLX5E_XDP_MIN_INLINE; dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE;
ds_cnt += MLX5E_XDP_IHS_DS_COUNT;
dseg++; dseg++;
} }
/* write the dma part */ /* write the dma part */
dseg->addr = cpu_to_be64(dma_addr); dseg->addr = cpu_to_be64(dma_addr);
dseg->byte_count = cpu_to_be32(dma_len); dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
sq->db.xdp.di[pi] = *di; sq->db.xdp.di[pi] = *di;
wi->opcode = MLX5_OPCODE_SEND; sq->pc++;
wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
sq->pc += MLX5E_XDP_TX_WQEBBS;
sq->db.xdp.doorbell = true; sq->db.xdp.doorbell = true;
rq->stats.xdp_tx++; rq->stats.xdp_tx++;
...@@ -1023,7 +1017,6 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) ...@@ -1023,7 +1017,6 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
wqe_counter = be16_to_cpu(cqe->wqe_counter); wqe_counter = be16_to_cpu(cqe->wqe_counter);
do { do {
struct mlx5e_sq_wqe_info *wi;
struct mlx5e_dma_info *di; struct mlx5e_dma_info *di;
u16 ci; u16 ci;
...@@ -1031,14 +1024,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) ...@@ -1031,14 +1024,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
ci = sqcc & sq->wq.sz_m1; ci = sqcc & sq->wq.sz_m1;
di = &sq->db.xdp.di[ci]; di = &sq->db.xdp.di[ci];
wi = &sq->db.xdp.wqe_info[ci];
if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) {
sqcc++;
continue;
}
sqcc += wi->num_wqebbs; sqcc++;
/* Recycle RX page */ /* Recycle RX page */
mlx5e_page_release(rq, di, true); mlx5e_page_release(rq, di, true);
} while (!last_wqe); } while (!last_wqe);
...@@ -1056,21 +1043,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) ...@@ -1056,21 +1043,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
void mlx5e_free_xdpsq_descs(struct mlx5e_sq *sq) void mlx5e_free_xdpsq_descs(struct mlx5e_sq *sq)
{ {
struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq); struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq);
struct mlx5e_sq_wqe_info *wi;
struct mlx5e_dma_info *di; struct mlx5e_dma_info *di;
u16 ci; u16 ci;
while (sq->cc != sq->pc) { while (sq->cc != sq->pc) {
ci = sq->cc & sq->wq.sz_m1; ci = sq->cc & sq->wq.sz_m1;
di = &sq->db.xdp.di[ci]; di = &sq->db.xdp.di[ci];
wi = &sq->db.xdp.wqe_info[ci]; sq->cc++;
if (wi->opcode == MLX5_OPCODE_NOP) {
sq->cc++;
continue;
}
sq->cc += wi->num_wqebbs;
mlx5e_page_release(rq, di, false); mlx5e_page_release(rq, di, false);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment