Commit cd02a1a2 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5e-xdp-extend'

Tariq Toukan says:

====================
net/mlx5e: Extend XDP multi-buffer capabilities

This series extends the XDP multi-buffer support in the mlx5e driver.

Patchset breakdown:
- Infrastructural changes and preparations.
- Add XDP multi-buffer support for XDP redirect-in.
- Use TX MPWQE (multi-packet WQE) HW feature for non-linear
  single-segmented XDP frames.
- Add XDP multi-buffer support for striding RQ.

In Striding RQ, we overcome the lack of headroom and tailroom between
the RQ strides by allocating a side page per packet and using it for the
xdp_buff descriptor. We structure the xdp_buff so that it contains
nothing in the linear part, and the whole packet resides in the
fragments.

Performance highlight:

Packet rate test, 64 bytes, 32 channels, MTU 9000 bytes.
CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz.
NIC: ConnectX-6 Dx, at 100 Gbps.

+----------+-------------+-------------+---------+
| Test     | Legacy RQ   | Striding RQ | Speedup |
+----------+-------------+-------------+---------+
| XDP_DROP | 101,615,544 | 117,191,020 | +15%    |
+----------+-------------+-------------+---------+
| XDP_TX   |  95,608,169 | 117,043,422 | +22%    |
+----------+-------------+-------------+---------+

Series generated against net commit:
e61caf04 Merge branch 'page_pool-allow-caching-from-safely-localized-napi'

I'm submitting this directly as Saeed is traveling.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 14cac662 f52ac702
......@@ -475,59 +475,18 @@ struct mlx5e_txqsq {
cqe_ts_to_ns ptp_cyc2time;
} ____cacheline_aligned_in_smp;
/* XDP packets can be transmitted in different ways. On completion, we need to
* distinguish between them to clean up things in a proper way.
*/
enum mlx5e_xdp_xmit_mode {
/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
* device or XDP_TX from an XSK RQ. The frame has to be unmapped and
* returned.
*/
MLX5E_XDP_XMIT_MODE_FRAME,
/* The xdp_frame was created in place as a result of XDP_TX from a
* regular RQ. No DMA remapping happened, and the page belongs to us.
*/
MLX5E_XDP_XMIT_MODE_PAGE,
/* No xdp_frame was created at all, the transmit happened from a UMEM
* page. The UMEM Completion Ring producer pointer has to be increased.
*/
MLX5E_XDP_XMIT_MODE_XSK,
};
struct mlx5e_xdp_info {
enum mlx5e_xdp_xmit_mode mode;
union {
struct {
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
} frame;
struct {
struct mlx5e_rq *rq;
struct page *page;
} page;
};
};
struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
};
struct mlx5e_xdp_info_fifo {
struct mlx5e_xdp_info *xi;
union mlx5e_xdp_info *xi;
u32 *cc;
u32 *pc;
u32 mask;
};
struct mlx5e_xdpsq;
struct mlx5e_xmit_data;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *,
struct skb_shared_info *,
int);
struct mlx5e_xdpsq {
......@@ -628,6 +587,7 @@ union mlx5e_alloc_units {
struct mlx5e_mpw_info {
u16 consumed_strides;
DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
struct mlx5e_frag_page linear_page;
union mlx5e_alloc_units alloc_units;
};
......
......@@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
u32 sz;
/* XSK frames are mapped as individual pages, because frames may come in
* an arbitrary order from random locations in the UMEM.
*/
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
/* XDP in mlx5e doesn't support multiple packets per page. */
if (params->xdp_prog)
return PAGE_SIZE;
sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
/* XDP in mlx5e doesn't support multiple packets per page.
* Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
*/
return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz;
}
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
......@@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
}
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
log_wqe_num_of_strides,
page_shift, umr_mode);
}
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
......@@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
/* XDP in mlx5e doesn't support multiple packets per page. */
if (params->xdp_prog)
return PAGE_SHIFT;
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
......@@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
return -EOPNOTSUPP;
if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
return -EINVAL;
return 0;
}
......
......@@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
......
......@@ -77,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
}
/* TX */
struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len : 31;
u32 has_frags : 1;
};
struct mlx5e_xmit_data_frags {
struct mlx5e_xmit_data xd;
struct skb_shared_info *sinfo;
dma_addr_t *dma_arr;
};
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
......
......@@ -50,6 +50,53 @@ struct mlx5e_xdp_buff {
struct mlx5e_rq *rq;
};
/* XDP packets can be transmitted in different ways. On completion, we need to
* distinguish between them to clean up things in a proper way.
*/
enum mlx5e_xdp_xmit_mode {
/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
* device or XDP_TX from an XSK RQ. The frame has to be unmapped and
* returned.
*/
MLX5E_XDP_XMIT_MODE_FRAME,
/* The xdp_frame was created in place as a result of XDP_TX from a
* regular RQ. No DMA remapping happened, and the page belongs to us.
*/
MLX5E_XDP_XMIT_MODE_PAGE,
/* No xdp_frame was created at all, the transmit happened from a UMEM
* page. The UMEM Completion Ring producer pointer has to be increased.
*/
MLX5E_XDP_XMIT_MODE_XSK,
};
/* xmit_mode entry is pushed to the fifo per packet, followed by multiple
* entries, as follows:
*
* MLX5E_XDP_XMIT_MODE_FRAME:
* xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num.
* 'num' is derived from xdpf.
*
* MLX5E_XDP_XMIT_MODE_PAGE:
* num, page_1, page_2, ... , page_num.
*
* MLX5E_XDP_XMIT_MODE_XSK:
* none.
*/
union mlx5e_xdp_info {
enum mlx5e_xdp_xmit_mode mode;
union {
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
} frame;
union {
struct mlx5e_rq *rq;
u8 num;
struct page *page;
} page;
};
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
......@@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
......@@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
union mlx5e_xdp_info xi)
{
u32 i = (*fifo->pc)++ & fifo->mask;
fifo->xi[i] = *xi;
fifo->xi[i] = xi;
}
static inline struct mlx5e_xdp_info
static inline union mlx5e_xdp_info
mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
{
return fifo->xi[(*fifo->cc)++ & fifo->mask];
......
......@@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
* same.
*/
static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_info *xdpi)
union mlx5e_xdp_info *xdpi)
{
u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
......@@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
wi->num_pkts = 1;
nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi);
sq->doorbell_cseg = &nopwqe->ctrl;
}
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
union mlx5e_xdp_info xdpi;
bool work_done = true;
bool flush = false;
......@@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xmit_xdp_frame_check_mpwqe,
mlx5e_xmit_xdp_frame_check,
sq);
struct mlx5e_xmit_data xdptxd = {};
struct xdp_desc desc;
bool ret;
......@@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
mlx5e_xmit_xdp_frame, sq, &xdptxd,
check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
......@@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xsk_tx_post_err(sq, &xdpi);
} else {
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
}
flush = true;
......
......@@ -803,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog)
pool_size *= 2; /* additional page per packet for the linear part */
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
......@@ -1300,17 +1303,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
* entries of all xmit_modes.
*/
size_t size;
size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
size = array_size(sizeof(*xdpi_fifo->xi), entries);
xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
return -ENOMEM;
xdpi_fifo->pc = &sq->xdpi_fifo_pc;
xdpi_fifo->cc = &sq->xdpi_fifo_cc;
xdpi_fifo->mask = dsegs_per_wq - 1;
xdpi_fifo->mask = entries - 1;
return 0;
}
......@@ -1860,11 +1865,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
csp.min_inline_mode = sq->min_inline_mode;
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
/* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
* supported by upstream, and there is no defined trigger to allow
* transmitting redirected multi-buffer frames.
*/
if (param->is_xdp_mb && !is_redirect)
if (param->is_xdp_mb)
set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
......@@ -1888,7 +1889,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = 1,
......@@ -1897,9 +1897,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
}
}
......@@ -4066,9 +4063,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev)
val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_NDO_XMIT;
if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
val |= NETDEV_XDP_ACT_RX_SG;
NETDEV_XDP_ACT_RX_SG |
NETDEV_XDP_ACT_NDO_XMIT |
NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(netdev, val);
}
......@@ -4262,19 +4259,24 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev,
/* No XSK params: AF_XDP can't be enabled yet at the point of setting
* the XDP program.
*/
is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
}
if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ?
mlx5e_rx_is_linear_skb(mdev, params, NULL) :
mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL);
if (!is_linear) {
if (!params->xdp_prog->aux->xdp_has_frags) {
netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
}
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
!mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) {
netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
params->sw_mtu,
mlx5e_xdp_max_mtu(params, NULL));
return false;
}
}
return true;
......@@ -4766,20 +4768,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
queue_work(priv->wq, &priv->tx_timeout_work);
}
static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
struct net_device *netdev = priv->netdev;
struct mlx5e_params new_params;
if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
new_params = priv->channels.params;
new_params.xdp_prog = prog;
if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
if (!mlx5e_params_validate_xdp(netdev, mdev, params))
return -EINVAL;
return 0;
......@@ -4806,8 +4803,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mutex_lock(&priv->state_lock);
new_params = priv->channels.params;
new_params.xdp_prog = prog;
if (prog) {
err = mlx5e_xdp_allowed(priv, prog);
err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params);
if (err)
goto unlock;
}
......@@ -4815,22 +4815,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
new_params = priv->channels.params;
new_params.xdp_prog = prog;
/* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
* supported with the new parameters: if PAGE_SIZE is bigger than
* MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
* the MTU is small enough for the linear mode, because XDP uses strides
* of PAGE_SIZE on regular RQs.
*/
if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
/* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
if (err)
goto unlock;
}
old_prog = priv->channels.params.xdp_prog;
err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
......
......@@ -471,6 +471,35 @@ static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
return i;
}
static void
mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo,
struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page,
u32 frag_offset, u32 len)
{
skb_frag_t *frag;
dma_addr_t addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir);
if (!xdp_buff_has_frags(xdp)) {
/* Init on the first fragment to avoid cold cache access
* when possible.
*/
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(xdp);
}
frag = &sinfo->frags[sinfo->nr_frags++];
__skb_frag_set_page(frag, frag_page->page);
skb_frag_off_set(frag, frag_offset);
skb_frag_size_set(frag, len);
if (page_is_pfmemalloc(frag_page->page))
xdp_buff_set_frag_pfmemalloc(xdp);
sinfo->xdp_frags_size += len;
}
static inline void
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
struct page *page, u32 frag_offset, u32 len,
......@@ -1601,10 +1630,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
}
static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
void *va, u16 headroom, u32 len,
void *va, u16 headroom, u32 frame_sz, u32 len,
struct mlx5e_xdp_buff *mxbuf)
{
xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true);
mxbuf->cqe = cqe;
mxbuf->rq = rq;
......@@ -1637,7 +1666,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf))
return NULL; /* page/packet was consumed by XDP */
......@@ -1685,7 +1715,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
frag_consumed_bytes, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
truesize = 0;
......@@ -1694,35 +1725,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
wi++;
while (cqe_bcnt) {
skb_frag_t *frag;
frag_page = wi->frag_page;
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
addr = page_pool_get_dma_addr(frag_page->page);
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
frag_consumed_bytes, rq->buff.map_dir);
if (!xdp_buff_has_frags(&mxbuf.xdp)) {
/* Init on the first fragment to avoid cold cache access
* when possible.
*/
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(&mxbuf.xdp);
}
frag = &sinfo->frags[sinfo->nr_frags++];
__skb_frag_set_page(frag, frag_page->page);
skb_frag_off_set(frag, wi->offset);
skb_frag_size_set(frag, frag_consumed_bytes);
if (page_is_pfmemalloc(frag_page->page))
xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
sinfo->xdp_frags_size += frag_consumed_bytes;
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
wi->offset, frag_consumed_bytes);
truesize += frag_info->frag_stride;
cqe_bcnt -= frag_consumed_bytes;
......@@ -1969,35 +1977,139 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
struct mlx5e_frag_page *head_page = frag_page;
u32 frag_offset = head_offset + headlen;
u32 byte_cnt = cqe_bcnt - headlen;
u32 frag_offset = head_offset;
u32 byte_cnt = cqe_bcnt;
struct skb_shared_info *sinfo;
struct mlx5e_xdp_buff mxbuf;
unsigned int truesize = 0;
struct bpf_prog *prog;
struct sk_buff *skb;
dma_addr_t addr;
u32 linear_frame_sz;
u16 linear_data_len;
u16 linear_hr;
void *va;
skb = napi_alloc_skb(rq->cq.napi,
ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
prog = rcu_dereference(rq->xdp_prog);
if (prog) {
/* area for bpf_xdp_[store|load]_bytes */
net_prefetchw(page_address(frag_page->page) + frag_offset);
if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) {
rq->stats->buff_alloc_err++;
return NULL;
}
va = page_address(wi->linear_page.page);
net_prefetchw(va); /* xdp_frame data area */
linear_hr = XDP_PACKET_HEADROOM;
linear_data_len = 0;
linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD);
} else {
skb = napi_alloc_skb(rq->cq.napi,
ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
skb_mark_for_recycle(skb);
va = skb->head;
net_prefetchw(va); /* xdp_frame data area */
net_prefetchw(skb->data);
frag_offset += headlen;
byte_cnt -= headlen;
linear_hr = skb_headroom(skb);
linear_data_len = headlen;
linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb));
if (unlikely(frag_offset >= PAGE_SIZE)) {
frag_page++;
frag_offset -= PAGE_SIZE;
}
}
net_prefetchw(skb->data);
mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
while (byte_cnt) {
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
if (unlikely(frag_offset >= PAGE_SIZE)) {
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
truesize += pg_consumed_bytes;
else
truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
pg_consumed_bytes);
byte_cnt -= pg_consumed_bytes;
frag_offset = 0;
frag_page++;
frag_offset -= PAGE_SIZE;
}
skb_mark_for_recycle(skb);
mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset);
/* copy header */
addr = page_pool_get_dma_addr(head_page->page);
mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
if (prog) {
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
int i;
for (i = 0; i < sinfo->nr_frags; i++)
/* non-atomic */
__set_bit(page_idx + i, wi->skip_release_bitmap);
return NULL;
}
mlx5e_page_release_fragmented(rq, &wi->linear_page);
return NULL; /* page/packet was consumed by XDP */
}
skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
linear_frame_sz,
mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
mxbuf.xdp.data - mxbuf.xdp.data_meta);
if (unlikely(!skb)) {
mlx5e_page_release_fragmented(rq, &wi->linear_page);
return NULL;
}
skb_mark_for_recycle(skb);
wi->linear_page.frags++;
mlx5e_page_release_fragmented(rq, &wi->linear_page);
if (xdp_buff_has_frags(&mxbuf.xdp)) {
struct mlx5e_frag_page *pagep;
/* sinfo->nr_frags is reset by build_skb, calculate again. */
xdp_update_skb_shared_info(skb, frag_page - head_page,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
pagep = head_page;
do
pagep->frags++;
while (++pagep < frag_page);
}
__pskb_pull_tail(skb, headlen);
} else {
dma_addr_t addr;
if (xdp_buff_has_frags(&mxbuf.xdp)) {
struct mlx5e_frag_page *pagep;
xdp_update_skb_shared_info(skb, sinfo->nr_frags,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
pagep = frag_page - sinfo->nr_frags;
do
pagep->frags++;
while (++pagep < frag_page);
}
/* copy header */
addr = page_pool_get_dma_addr(head_page->page);
mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
}
return skb;
}
......@@ -2036,7 +2148,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_xdp_buff mxbuf;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
cqe_bcnt, &mxbuf);
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment