Commit cd02a1a2 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5e-xdp-extend'

Tariq Toukan says:

====================
net/mlx5e: Extend XDP multi-buffer capabilities

This series extends the XDP multi-buffer support in the mlx5e driver.

Patchset breakdown:
- Infrastructural changes and preparations.
- Add XDP multi-buffer support for XDP redirect-in.
- Use TX MPWQE (multi-packet WQE) HW feature for non-linear
  single-segmented XDP frames.
- Add XDP multi-buffer support for striding RQ.

In Striding RQ, we overcome the lack of headroom and tailroom between
the RQ strides by allocating a side page per packet and using it for the
xdp_buff descriptor. We structure the xdp_buff so that it contains
nothing in the linear part, and the whole packet resides in the
fragments.

Performance highlight:

Packet rate test, 64 bytes, 32 channels, MTU 9000 bytes.
CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz.
NIC: ConnectX-6 Dx, at 100 Gbps.

+----------+-------------+-------------+---------+
| Test     | Legacy RQ   | Striding RQ | Speedup |
+----------+-------------+-------------+---------+
| XDP_DROP | 101,615,544 | 117,191,020 | +15%    |
+----------+-------------+-------------+---------+
| XDP_TX   |  95,608,169 | 117,043,422 | +22%    |
+----------+-------------+-------------+---------+

Series generated against net commit:
e61caf04 Merge branch 'page_pool-allow-caching-from-safely-localized-napi'

I'm submitting this directly as Saeed is traveling.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 14cac662 f52ac702
......@@ -475,59 +475,18 @@ struct mlx5e_txqsq {
cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
/* XDP packets can be transmitted in different ways. On completion, we need to
* distinguish between them to clean up things in a proper way.
*/
enum mlx5e_xdp_xmit_mode {
/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
* device or XDP_TX from an XSK RQ. The frame has to be unmapped and
* returned.
*/
MLX5E_XDP_XMIT_MODE_FRAME,
/* The xdp_frame was created in place as a result of XDP_TX from a
* regular RQ. No DMA remapping happened, and the page belongs to us.
*/
MLX5E_XDP_XMIT_MODE_PAGE,
/* No xdp_frame was created at all, the transmit happened from a UMEM
* page. The UMEM Completion Ring producer pointer has to be increased.
*/
MLX5E_XDP_XMIT_MODE_XSK,
};
struct mlx5e_xdp_info {
enum mlx5e_xdp_xmit_mode mode;
union {
struct {
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
} frame;
struct {
struct mlx5e_rq *rq;
struct page *page;
} page;
};
};
struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
};
struct mlx5e_xdp_info_fifo {
struct mlx5e_xdp_info *xi;
union mlx5e_xdp_info *xi;
u32 *cc;
u32 *pc;
u32 mask;
};
struct mlx5e_xdpsq;
struct mlx5e_xmit_data;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *,
struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
......@@ -628,6 +587,7 @@ union mlx5e_alloc_units {
struct mlx5e_mpw_info {
u16 consumed_strides;
DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
struct mlx5e_frag_page linear_page;
union mlx5e_alloc_units alloc_units;
};
......
......@@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
u32 sz;
/* XSK frames are mapped as individual pages, because frames may come in
* an arbitrary order from random locations in the UMEM.
*/
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
/* XDP in mlx5e doesn't support multiple packets per page. */
if (params->xdp_prog)
return PAGE_SIZE;
sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
/* XDP in mlx5e doesn't support multiple packets per page.
* Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
*/
return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz;
}
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
......@@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
}
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
log_wqe_num_of_strides,
page_shift, umr_mode);
}
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
......@@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
/* XDP in mlx5e doesn't support multiple packets per page. */
if (params->xdp_prog)
return PAGE_SHIFT;
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
......@@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
return -EOPNOTSUPP;
if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
return -EINVAL;
return 0;
}
......
......@@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
......
......@@ -77,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
}
/* TX */
struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len : 31;
u32 has_frags : 1;
};
struct mlx5e_xmit_data_frags {
struct mlx5e_xmit_data xd;
struct skb_shared_info *sinfo;
dma_addr_t *dma_arr;
};
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
......
......@@ -61,9 +61,8 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct xdp_buff *xdp)
{
struct page *page = virt_to_page(xdp->data);
struct skb_shared_info *sinfo = NULL;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct mlx5e_xmit_data_frags xdptxdf = {};
struct mlx5e_xmit_data *xdptxd;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
int i;
......@@ -72,8 +71,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
if (unlikely(!xdpf))
return false;
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
xdptxd = &xdptxdf.xd;
xdptxd->data = xdpf->data;
xdptxd->len = xdpf->len;
xdptxd->has_frags = xdp_frame_has_frags(xdpf);
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
......@@ -88,24 +89,29 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
*/
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
if (unlikely(xdptxd->has_frags))
return false;
dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len,
DMA_TO_DEVICE);
if (dma_mapping_error(sq->pdev, dma_addr)) {
xdp_return_frame(xdpf);
return false;
}
xdptxd.dma_addr = dma_addr;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = dma_addr;
xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
/* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .frame.xdpf = xdpf });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .frame.dma_addr = dma_addr });
return true;
}
......@@ -115,17 +121,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
* mode.
*/
xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
xdpi.page.rq = rq;
dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL);
if (unlikely(xdp_frame_has_frags(xdpf))) {
sinfo = xdp_get_shared_info_from_frame(xdpf);
if (xdptxd->has_frags) {
xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
xdptxdf.dma_arr = NULL;
for (i = 0; i < sinfo->nr_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
dma_addr_t addr;
u32 len;
......@@ -137,22 +141,34 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
}
}
xdptxd.dma_addr = dma_addr;
xdptxd->dma_addr = dma_addr;
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
return false;
xdpi.page.page = page;
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
if (unlikely(xdp_frame_has_frags(xdpf))) {
for (i = 0; i < sinfo->nr_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
xdpi.page.page = skb_frag_page(frag);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
/* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE });
if (xdptxd->has_frags) {
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info)
{ .page.num = 1 + xdptxdf.sinfo->nr_frags });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .page.page = page });
for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info)
{ .page.page = skb_frag_page(frag) });
}
} else {
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .page.num = 1 });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .page.page = page });
}
return true;
......@@ -381,26 +397,43 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo, int check_result);
int check_result);
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo, int check_result)
int check_result)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5e_xmit_data *p = xdptxd;
struct mlx5e_xmit_data tmp;
if (xdptxd->has_frags) {
struct mlx5e_xmit_data_frags *xdptxdf =
container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
if (unlikely(sinfo)) {
if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) {
/* MPWQE is enabled, but a multi-buffer packet is queued for
* transmission. MPWQE can't send fragmented packets, so close
* the current session and fall back to a regular WQE.
*/
if (unlikely(sq->mpwqe.wqe))
mlx5e_xdp_mpwqe_complete(sq);
return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
return mlx5e_xmit_xdp_frame(sq, xdptxd, 0);
}
if (!xdptxd->len) {
skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
if (unlikely(xdptxd->len > sq->hw_mtu)) {
tmp.data = skb_frag_address(frag);
tmp.len = skb_frag_size(frag);
tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] :
page_pool_get_dma_addr(skb_frag_page(frag)) +
skb_frag_off(frag);
p = &tmp;
}
}
if (unlikely(p->len > sq->hw_mtu)) {
stats->err++;
return false;
}
......@@ -418,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_session_start(sq);
}
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
......@@ -446,8 +479,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo, int check_result)
int check_result)
{
struct mlx5e_xmit_data_frags *xdptxdf =
container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_data_seg *dseg;
......@@ -459,26 +494,34 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
u16 ds_cnt, inline_hdr_sz;
u8 num_wqebbs = 1;
int num_frags = 0;
bool inline_ok;
bool linear;
u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE ||
dma_len >= MLX5E_XDP_MIN_INLINE;
if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
inline_hdr_sz = 0;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
ds_cnt++;
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
linear = !!(dma_len - inline_hdr_sz);
ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz;
/* check_result must be 0 if sinfo is passed. */
if (!check_result) {
int stop_room = 1;
if (unlikely(sinfo)) {
ds_cnt += sinfo->nr_frags;
num_frags = sinfo->nr_frags;
if (xdptxd->has_frags) {
ds_cnt += xdptxdf->sinfo->nr_frags;
num_frags = xdptxdf->sinfo->nr_frags;
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
* enough to hold all fragments.
......@@ -499,53 +542,53 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
eseg = &wqe->eth;
dseg = wqe->data;
inline_hdr_sz = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
if (inline_hdr_sz) {
memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
inline_hdr_sz - sizeof(eseg->inline_hdr.start));
dma_len -= inline_hdr_sz;
dma_addr += inline_hdr_sz;
dseg++;
}
/* write the dma part */
if (linear) {
dseg->addr = cpu_to_be64(dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
dseg++;
}
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
u8 num_pkts = 1 + num_frags;
if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
int i;
memset(&cseg->trailer, 0, sizeof(cseg->trailer));
memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg->lkey = sq->mkey_be;
for (i = 0; i < num_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
dma_addr_t addr;
addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
page_pool_get_dma_addr(skb_frag_page(frag)) +
skb_frag_off(frag);
dseg++;
dseg->addr = cpu_to_be64(addr);
dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
dseg->lkey = sq->mkey_be;
dseg++;
}
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = num_wqebbs,
.num_pkts = num_pkts,
.num_pkts = 1,
};
sq->pc += num_wqebbs;
......@@ -570,20 +613,61 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
u16 i;
for (i = 0; i < wi->num_pkts; i++) {
struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
switch (xdpi.mode) {
case MLX5E_XDP_XMIT_MODE_FRAME:
case MLX5E_XDP_XMIT_MODE_FRAME: {
/* XDP_TX from the XSK RQ and XDP_REDIRECT */
dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
xdpi.frame.xdpf->len, DMA_TO_DEVICE);
xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
xdpf = xdpi.frame.xdpf;
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
dma_addr = xdpi.frame.dma_addr;
dma_unmap_single(sq->pdev, dma_addr,
xdpf->len, DMA_TO_DEVICE);
if (xdp_frame_has_frags(xdpf)) {
struct skb_shared_info *sinfo;
int j;
sinfo = xdp_get_shared_info_from_frame(xdpf);
for (j = 0; j < sinfo->nr_frags; j++) {
skb_frag_t *frag = &sinfo->frags[j];
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
dma_addr = xdpi.frame.dma_addr;
dma_unmap_single(sq->pdev, dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
}
xdp_return_frame_bulk(xdpf, bq);
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
}
case MLX5E_XDP_XMIT_MODE_PAGE: {
/* XDP_TX from the regular RQ */
page_pool_put_defragged_page(xdpi.page.rq->page_pool,
xdpi.page.page, -1, true);
u8 num, n = 0;
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
num = xdpi.page.num;
do {
struct page *page;
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
page = xdpi.page.page;
/* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
* as we know this is a page_pool page.
*/
page_pool_put_defragged_page(page->pp,
page, -1, true);
} while (++n < num);
break;
}
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
(*xsk_frames)++;
......@@ -717,34 +801,79 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
sq = &priv->channels.c[sq_num]->xdpsq;
for (i = 0; i < n; i++) {
struct mlx5e_xmit_data_frags xdptxdf = {};
struct xdp_frame *xdpf = frames[i];
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
dma_addr_t dma_arr[MAX_SKB_FRAGS];
struct mlx5e_xmit_data *xdptxd;
bool ret;
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
xdptxd.len, DMA_TO_DEVICE);
xdptxd = &xdptxdf.xd;
xdptxd->data = xdpf->data;
xdptxd->len = xdpf->len;
xdptxd->has_frags = xdp_frame_has_frags(xdpf);
xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data,
xdptxd->len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr)))
break;
xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = xdptxd.dma_addr;
if (xdptxd->has_frags) {
int j;
xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
xdptxdf.dma_arr = dma_arr;
for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) {
skb_frag_t *frag = &xdptxdf.sinfo->frags[j];
dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag),
skb_frag_size(frag), DMA_TO_DEVICE);
if (!dma_mapping_error(sq->pdev, dma_arr[j]))
continue;
/* mapping error */
while (--j >= 0)
dma_unmap_single(sq->pdev, dma_arr[j],
skb_frag_size(&xdptxdf.sinfo->frags[j]),
DMA_TO_DEVICE);
goto out;
}
}
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
mlx5e_xmit_xdp_frame, sq, xdptxd, 0);
if (unlikely(!ret)) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE);
int j;
dma_unmap_single(sq->pdev, xdptxd->dma_addr,
xdptxd->len, DMA_TO_DEVICE);
if (!xdptxd->has_frags)
break;
for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
dma_unmap_single(sq->pdev, dma_arr[j],
skb_frag_size(&xdptxdf.sinfo->frags[j]),
DMA_TO_DEVICE);
break;
}
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
/* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .frame.xdpf = xdpf });
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr });
if (xdptxd->has_frags) {
int j;
for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
(union mlx5e_xdp_info)
{ .frame.dma_addr = dma_arr[j] });
}
nxmit++;
}
out:
if (flags & XDP_XMIT_FLUSH) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
......
......@@ -50,6 +50,53 @@ struct mlx5e_xdp_buff {
struct mlx5e_rq *rq;
};
/* XDP packets can be transmitted in different ways. On completion, we need to
* distinguish between them to clean up things in a proper way.
*/
enum mlx5e_xdp_xmit_mode {
/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
* device or XDP_TX from an XSK RQ. The frame has to be unmapped and
* returned.
*/
MLX5E_XDP_XMIT_MODE_FRAME,
/* The xdp_frame was created in place as a result of XDP_TX from a
* regular RQ. No DMA remapping happened, and the page belongs to us.
*/
MLX5E_XDP_XMIT_MODE_PAGE,
/* No xdp_frame was created at all, the transmit happened from a UMEM
* page. The UMEM Completion Ring producer pointer has to be increased.
*/
MLX5E_XDP_XMIT_MODE_XSK,
};
/* xmit_mode entry is pushed to the fifo per packet, followed by multiple
* entries, as follows:
*
* MLX5E_XDP_XMIT_MODE_FRAME:
* xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num.
* 'num' is derived from xdpf.
*
* MLX5E_XDP_XMIT_MODE_PAGE:
* num, page_1, page_2, ... , page_num.
*
* MLX5E_XDP_XMIT_MODE_XSK:
* none.
*/
union mlx5e_xdp_info {
enum mlx5e_xdp_xmit_mode mode;
union {
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
} frame;
union {
struct mlx5e_rq *rq;
u8 num;
struct page *page;
} page;
};
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
......@@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
......@@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
union mlx5e_xdp_info xi)
{
u32 i = (*fifo->pc)++ & fifo->mask;
fifo->xi[i] = *xi;
fifo->xi[i] = xi;
}
static inline struct mlx5e_xdp_info
static inline union mlx5e_xdp_info
mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
{
return fifo->xi[(*fifo->cc)++ & fifo->mask];
......
......@@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
* same.
*/
static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_info *xdpi)
union mlx5e_xdp_info *xdpi)
{
u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
......@@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
wi->num_pkts = 1;
nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi);
sq->doorbell_cseg = &nopwqe->ctrl;
}
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
union mlx5e_xdp_info xdpi;
bool work_done = true;
bool flush = false;
......@@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xmit_xdp_frame_check_mpwqe,
mlx5e_xmit_xdp_frame_check,
sq);
struct mlx5e_xmit_data xdptxd = {};
struct xdp_desc desc;
bool ret;
......@@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
mlx5e_xmit_xdp_frame, sq, &xdptxd,
check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
......@@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xsk_tx_post_err(sq, &xdpi);
} else {
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
}
flush = true;
......
......@@ -803,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog)
pool_size *= 2; /* additional page per packet for the linear part */
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
......@@ -1300,17 +1303,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
* entries of all xmit_modes.
*/
size_t size;
size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
size = array_size(sizeof(*xdpi_fifo->xi), entries);
xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
return -ENOMEM;
xdpi_fifo->pc = &sq->xdpi_fifo_pc;
xdpi_fifo->cc = &sq->xdpi_fifo_cc;
xdpi_fifo->mask = dsegs_per_wq - 1;
xdpi_fifo->mask = entries - 1;
return 0;
}
......@@ -1860,11 +1865,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
/* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
* supported by upstream, and there is no defined trigger to allow
* transmitting redirected multi-buffer frames.
*/
if (param->is_xdp_mb && !is_redirect)
if (param->is_xdp_mb)
set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
......@@ -1888,7 +1889,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = 1,
......@@ -1897,9 +1897,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
}
}
......@@ -4066,9 +4063,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev)
val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_NDO_XMIT;
if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
val |= NETDEV_XDP_ACT_RX_SG;
NETDEV_XDP_ACT_RX_SG |
NETDEV_XDP_ACT_NDO_XMIT |
NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(netdev, val);
}
......@@ -4262,20 +4259,25 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev,
/* No XSK params: AF_XDP can't be enabled yet at the point of setting
* the XDP program.
*/
is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ?
mlx5e_rx_is_linear_skb(mdev, params, NULL) :
mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL);
if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
if (!is_linear) {
if (!params->xdp_prog->aux->xdp_has_frags) {
netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
}
if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
!mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) {
netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
}
}
return true;
}
......@@ -4766,20 +4768,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
queue_work(priv->wq, &priv->tx_timeout_work);
}
static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
struct net_device *netdev = priv->netdev;
struct mlx5e_params new_params;
if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
new_params = priv->channels.params;
new_params.xdp_prog = prog;
if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
if (!mlx5e_params_validate_xdp(netdev, mdev, params))
return -EINVAL;
return 0;
......@@ -4806,8 +4803,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mutex_lock(&priv->state_lock);
new_params = priv->channels.params;
new_params.xdp_prog = prog;
if (prog) {
err = mlx5e_xdp_allowed(priv, prog);
err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params);
if (err)
goto unlock;
}
......@@ -4815,22 +4815,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
new_params = priv->channels.params;
new_params.xdp_prog = prog;
/* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
* supported with the new parameters: if PAGE_SIZE is bigger than
* MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
* the MTU is small enough for the linear mode, because XDP uses strides
* of PAGE_SIZE on regular RQs.
*/
if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
/* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
if (err)
goto unlock;
}
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
......
......@@ -471,6 +471,35 @@ static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
return i;
}
static void
mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo,
struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page,
u32 frag_offset, u32 len)
{
skb_frag_t *frag;
dma_addr_t addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir);
if (!xdp_buff_has_frags(xdp)) {
/* Init on the first fragment to avoid cold cache access
* when possible.
*/
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(xdp);
}
frag = &sinfo->frags[sinfo->nr_frags++];
__skb_frag_set_page(frag, frag_page->page);
skb_frag_off_set(frag, frag_offset);
skb_frag_size_set(frag, len);
if (page_is_pfmemalloc(frag_page->page))
xdp_buff_set_frag_pfmemalloc(xdp);
sinfo->xdp_frags_size += len;
}
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
struct page *page, u32 frag_offset, u32 len,
......@@ -1601,10 +1630,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
}
static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
void *va, u16 headroom, u32 len,
void *va, u16 headroom, u32 frame_sz, u32 len,
struct mlx5e_xdp_buff *mxbuf)
{
xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true);
mxbuf->cqe = cqe;
mxbuf->rq = rq;
......@@ -1637,7 +1666,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf))
return NULL; /* page/packet was consumed by XDP */
......@@ -1685,7 +1715,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
frag_consumed_bytes, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
truesize = 0;
......@@ -1694,35 +1725,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
wi++;
while (cqe_bcnt) {
skb_frag_t *frag;
frag_page = wi->frag_page;
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
frag_consumed_bytes, rq->buff.map_dir);
if (!xdp_buff_has_frags(&mxbuf.xdp)) {
/* Init on the first fragment to avoid cold cache access
* when possible.
*/
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(&mxbuf.xdp);
}
frag = &sinfo->frags[sinfo->nr_frags++];
__skb_frag_set_page(frag, frag_page->page);
skb_frag_off_set(frag, wi->offset);
skb_frag_size_set(frag, frag_consumed_bytes);
if (page_is_pfmemalloc(frag_page->page))
xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
sinfo->xdp_frags_size += frag_consumed_bytes;
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
wi->offset, frag_consumed_bytes);
truesize += frag_info->frag_stride;
cqe_bcnt -= frag_consumed_bytes;
......@@ -1969,28 +1977,131 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
struct mlx5e_frag_page *head_page = frag_page;
u32 frag_offset = head_offset + headlen;
u32 byte_cnt = cqe_bcnt - headlen;
u32 frag_offset = head_offset;
u32 byte_cnt = cqe_bcnt;
struct skb_shared_info *sinfo;
struct mlx5e_xdp_buff mxbuf;
unsigned int truesize = 0;
struct bpf_prog *prog;
struct sk_buff *skb;
dma_addr_t addr;
u32 linear_frame_sz;
u16 linear_data_len;
u16 linear_hr;
void *va;
prog = rcu_dereference(rq->xdp_prog);
if (prog) {
/* area for bpf_xdp_[store|load]_bytes */
net_prefetchw(page_address(frag_page->page) + frag_offset);
if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) {
rq->stats->buff_alloc_err++;
return NULL;
}
va = page_address(wi->linear_page.page);
net_prefetchw(va); /* xdp_frame data area */
linear_hr = XDP_PACKET_HEADROOM;
linear_data_len = 0;
linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD);
} else {
skb = napi_alloc_skb(rq->cq.napi,
ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
skb_mark_for_recycle(skb);
va = skb->head;
net_prefetchw(va); /* xdp_frame data area */
net_prefetchw(skb->data);
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
frag_offset += headlen;
byte_cnt -= headlen;
linear_hr = skb_headroom(skb);
linear_data_len = headlen;
linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb));
if (unlikely(frag_offset >= PAGE_SIZE)) {
frag_page++;
frag_offset -= PAGE_SIZE;
}
}
mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
while (byte_cnt) {
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
truesize += pg_consumed_bytes;
else
truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
pg_consumed_bytes);
byte_cnt -= pg_consumed_bytes;
frag_offset = 0;
frag_page++;
}
if (prog) {
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
int i;
for (i = 0; i < sinfo->nr_frags; i++)
/* non-atomic */
__set_bit(page_idx + i, wi->skip_release_bitmap);
return NULL;
}
mlx5e_page_release_fragmented(rq, &wi->linear_page);
return NULL; /* page/packet was consumed by XDP */
}
skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
linear_frame_sz,
mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
mxbuf.xdp.data - mxbuf.xdp.data_meta);
if (unlikely(!skb)) {
mlx5e_page_release_fragmented(rq, &wi->linear_page);
return NULL;
}
skb_mark_for_recycle(skb);
mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset);
wi->linear_page.frags++;
mlx5e_page_release_fragmented(rq, &wi->linear_page);
if (xdp_buff_has_frags(&mxbuf.xdp)) {
struct mlx5e_frag_page *pagep;
/* sinfo->nr_frags is reset by build_skb, calculate again. */
xdp_update_skb_shared_info(skb, frag_page - head_page,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
pagep = head_page;
do
pagep->frags++;
while (++pagep < frag_page);
}
__pskb_pull_tail(skb, headlen);
} else {
dma_addr_t addr;
if (xdp_buff_has_frags(&mxbuf.xdp)) {
struct mlx5e_frag_page *pagep;
xdp_update_skb_shared_info(skb, sinfo->nr_frags,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
pagep = frag_page - sinfo->nr_frags;
do
pagep->frags++;
while (++pagep < frag_page);
}
/* copy header */
addr = page_pool_get_dma_addr(head_page->page);
mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
......@@ -1998,6 +2109,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
}
return skb;
}
......@@ -2036,7 +2148,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment