Commit 13921345 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Jakub Kicinski

net/mlx5e: xsk: Use KLM to protect frame overrun in unaligned mode

XSK RQs support striding RQ linear mode, but the stride size may be
bigger than the XSK frame size, because:

1. The stride size must be a power of two.

2. The stride size must be equal to the UMR page size. Each XSK frame is
treated as a separate page, because they aren't necessarily adjacent in
physical memory, so the driver can't put more than one stride per page.

3. The minimal MTT page size is 4096 on older firmware.

That means that if XSK frame size is 2048 or not a power of two, the
strides may be bigger than XSK frames. Normally, it's not a problem if
the hardware enforces the MTU. However, traffic between vports skips the
hardware MTU check, and oversized packets may be received.

If an oversized packet is bigger than the XSK frame but not bigger than
the stride, it will cause overwriting of the adjacent UMEM region. If
the packet takes more than one stride, they can be recycled for reuse,
so it's not a problem when the XSK frame size matches the stride size.

Work around the above issue by leveraging KLM to make a more
fine-grained mapping. The beginning of each stride is mapped to the
frame memory, and the padding up to the closest power of two is mapped
to the overflow page that doesn't belong to UMEM. This way, application
data corruption won't happen upon receiving packets bigger than MTU.
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 9f123f74
...@@ -680,6 +680,7 @@ struct mlx5e_hw_gro_data { ...@@ -680,6 +680,7 @@ struct mlx5e_hw_gro_data {
enum mlx5e_mpwrq_umr_mode { enum mlx5e_mpwrq_umr_mode {
MLX5E_MPWRQ_UMR_MODE_ALIGNED, MLX5E_MPWRQ_UMR_MODE_ALIGNED,
MLX5E_MPWRQ_UMR_MODE_UNALIGNED, MLX5E_MPWRQ_UMR_MODE_UNALIGNED,
MLX5E_MPWRQ_UMR_MODE_OVERSIZED,
}; };
struct mlx5e_rq { struct mlx5e_rq {
......
...@@ -36,8 +36,28 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) ...@@ -36,8 +36,28 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
* 1. MTT - direct mapping in page granularity. * 1. MTT - direct mapping in page granularity.
* 2. KSM - indirect mapping to another MKey to arbitrary addresses, but * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
* all mappings have the same size. * all mappings have the same size.
* 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
* mappings can have different sizes.
*/ */
u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
bool unaligned = xsk ? xsk->unaligned : false; bool unaligned = xsk ? xsk->unaligned : false;
bool oversized = false;
if (xsk) {
oversized = xsk->chunk_size < (1 << page_shift);
WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
}
/* XSK frame size doesn't match the UMR page size, either because the
* frame size is not a power of two, or it's smaller than the minimal
* page size supported by the firmware.
* It's possible to receive packets bigger than MTU in certain setups.
* To avoid writing over the XSK frame boundary, the top region of each
* stride is mapped to a garbage page, resulting in two mappings of
* different sizes per frame.
*/
if (oversized)
return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
/* XSK frames can start at arbitrary unaligned locations, but they all /* XSK frames can start at arbitrary unaligned locations, but they all
* have the same size which is a power of two. It allows to optimize to * have the same size which is a power of two. It allows to optimize to
...@@ -60,6 +80,8 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) ...@@ -60,6 +80,8 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
return sizeof(struct mlx5_mtt); return sizeof(struct mlx5_mtt);
case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
return sizeof(struct mlx5_ksm); return sizeof(struct mlx5_ksm);
case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
return sizeof(struct mlx5_klm) * 2;
} }
WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
return 0; return 0;
...@@ -145,11 +167,21 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, ...@@ -145,11 +167,21 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
enum mlx5e_mpwrq_umr_mode umr_mode) enum mlx5e_mpwrq_umr_mode umr_mode)
{ {
if (umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED) /* Same limits apply to KSMs and KLMs. */
return min(MLX5E_MAX_RQ_NUM_KSMS, u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
switch (umr_mode) {
case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
return MLX5E_MAX_RQ_NUM_MTTS; return MLX5E_MAX_RQ_NUM_MTTS;
case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
return klm_limit;
case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
/* Each entry is two KLMs. */
return klm_limit / 2;
}
WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
return 0;
} }
static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
...@@ -1084,6 +1116,11 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, ...@@ -1084,6 +1116,11 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
xsk.unaligned = true; xsk.unaligned = true;
max_xsk_wqebbs = max(max_xsk_wqebbs, max_xsk_wqebbs = max(max_xsk_wqebbs,
mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
/* XSK unaligned mode, frame size is not equal to stride size. */
xsk.chunk_size -= 1;
max_xsk_wqebbs = max(max_xsk_wqebbs,
mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
} }
wqebbs += max_xsk_wqebbs; wqebbs += max_xsk_wqebbs;
......
...@@ -41,7 +41,15 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -41,7 +41,15 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
for (i = 0; i < batch; i++) {
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
};
}
} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
for (i = 0; i < batch; i++) { for (i = 0; i < batch; i++) {
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
...@@ -51,11 +59,22 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -51,11 +59,22 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
}; };
} }
} else { } else {
__be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
rq->xsk_pool->chunk_size);
__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
for (i = 0; i < batch; i++) { for (i = 0; i < batch; i++) {
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
.ptag = cpu_to_be64(addr | MLX5_EN_WR), .key = rq->mkey_be,
.va = cpu_to_be64(addr),
.bcount = frame_size,
};
umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
.key = rq->mkey_be,
.va = cpu_to_be64(rq->wqe_overflow.addr),
.bcount = pad_size,
}; };
} }
} }
...@@ -70,6 +89,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -70,6 +89,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
offset = ix * rq->mpwqe.mtts_per_wqe; offset = ix * rq->mpwqe.mtts_per_wqe;
if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
......
...@@ -299,6 +299,8 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) ...@@ -299,6 +299,8 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
return MLX5_MKC_ACCESS_MODE_MTT; return MLX5_MKC_ACCESS_MODE_MTT;
case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
return MLX5_MKC_ACCESS_MODE_KSM; return MLX5_MKC_ACCESS_MODE_KSM;
case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
return MLX5_MKC_ACCESS_MODE_KLMS;
} }
WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
return 0; return 0;
...@@ -307,10 +309,12 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) ...@@ -307,10 +309,12 @@ static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
u32 npages, u8 page_shift, u32 *umr_mkey, u32 npages, u8 page_shift, u32 *umr_mkey,
dma_addr_t filler_addr, dma_addr_t filler_addr,
enum mlx5e_mpwrq_umr_mode umr_mode) enum mlx5e_mpwrq_umr_mode umr_mode,
u32 xsk_chunk_size)
{ {
struct mlx5_mtt *mtt; struct mlx5_mtt *mtt;
struct mlx5_ksm *ksm; struct mlx5_ksm *ksm;
struct mlx5_klm *klm;
u32 octwords; u32 octwords;
int inlen; int inlen;
void *mkc; void *mkc;
...@@ -347,6 +351,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, ...@@ -347,6 +351,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET64(mkc, mkc, len, npages << page_shift);
MLX5_SET(mkc, mkc, translations_octword_size, octwords); MLX5_SET(mkc, mkc, translations_octword_size, octwords);
if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED)
MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, log_page_size, page_shift);
MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
...@@ -357,6 +362,21 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, ...@@ -357,6 +362,21 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
* to the default page. * to the default page.
*/ */
switch (umr_mode) { switch (umr_mode) {
case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0; i < npages; i++) {
klm[i << 1] = (struct mlx5_klm) {
.va = cpu_to_be64(filler_addr),
.bcount = cpu_to_be32(xsk_chunk_size),
.key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
};
klm[(i << 1) + 1] = (struct mlx5_klm) {
.va = cpu_to_be64(filler_addr),
.bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size),
.key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
};
}
break;
case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0; i < npages; i++) for (i = 0; i < npages; i++)
...@@ -415,6 +435,7 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, ...@@ -415,6 +435,7 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
{ {
u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0;
u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
u32 num_entries, max_num_entries; u32 num_entries, max_num_entries;
u32 umr_mkey; u32 umr_mkey;
...@@ -432,7 +453,7 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq ...@@ -432,7 +453,7 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
&umr_mkey, rq->wqe_overflow.addr, &umr_mkey, rq->wqe_overflow.addr,
rq->mpwqe.umr_mode); rq->mpwqe.umr_mode, xsk_chunk_size);
rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment