Commit 6470d2e7 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Jakub Kicinski

net/mlx5e: xsk: Use KSM for unaligned XSK

UMR MTTs used in striding RQ have certain alignment requirements. While
it's guaranteed to work when UMR pages are aligned to the UMR page size,
in practice it works then UMR pages are aligned to 8 bytes. However,
it's still not enough flexibility for the unaligned mode of XSK. This
patch leverages KSM to map UMR pages without alignment requirements,
when unaligned XSK is active. The downside is that KSM entries are twice
as big as MTTs, which limits the maximum WQE size, so regular RQs and
aligned XSK continue using MTTs.
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent c4418f34
......@@ -112,8 +112,10 @@ struct page_pool;
#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8))
#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2)
#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
#define MLX5_KSM_OCTW(ksms) (ksms)
#define MLX5E_MAX_RQ_NUM_MTTS \
(ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
......@@ -265,6 +267,7 @@ struct mlx5e_umr_wqe {
union {
DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms);
};
};
......@@ -708,6 +711,7 @@ struct mlx5e_rq {
u8 pages_per_wqe;
u8 umr_wqebbs;
u8 mtts_per_wqe;
u8 unaligned;
struct mlx5e_shampo_hd *shampo;
} mpwqe;
};
......@@ -1007,7 +1011,8 @@ struct mlx5e_profile {
void mlx5e_build_ptys2ethtool_map(void);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
bool unaligned);
void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
......
......@@ -9,6 +9,7 @@
struct mlx5e_xsk_param {
u16 headroom;
u16 chunk_size;
bool unaligned;
};
struct mlx5e_cq_param {
......@@ -87,12 +88,13 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
/* Striding RQ dynamic parameters */
u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift);
u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift);
u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift);
u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift);
u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift);
u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift);
u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, bool unaligned);
u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned);
/* Parameter calculations */
......
......@@ -72,6 +72,7 @@ void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *x
{
xsk->headroom = xsk_pool_get_headroom(pool);
xsk->chunk_size = xsk_pool_get_chunk_size(pool);
xsk->unaligned = pool->unaligned;
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
......
......@@ -7,8 +7,6 @@
#include "en.h"
#include <net/xdp_sock_drv.h>
#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
/* RX data path */
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
......@@ -23,7 +21,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
retry:
dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
if (!dma_info->xsk)
return -ENOMEM;
......@@ -35,17 +32,6 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
*/
dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
/* MTT page mapping has alignment requirements. If they are not
* satisfied, leak the descriptor so that it won't come again, and try
* to allocate a new one.
*/
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
xsk_buff_discard(dma_info->xsk);
goto retry;
}
}
return 0;
}
......
......@@ -314,7 +314,7 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
/* Limitation for regular RQ. XSK RQ may clamp the queue length in
* mlx5e_mpwqe_get_log_rq_size.
*/
u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, PAGE_SHIFT);
u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, PAGE_SHIFT, false);
param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
max_log_mpwrq_pkts);
......
......@@ -68,7 +68,8 @@
#include "qos.h"
#include "en/trap.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift)
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
bool unaligned)
{
u16 umr_wqebbs, max_wqebbs;
bool striding_rq_umr;
......@@ -78,7 +79,7 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_
if (!striding_rq_umr)
return false;
umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift);
umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, unaligned);
max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
/* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is
* calculated from mlx5e_get_max_sq_aligned_wqebbs.
......@@ -208,9 +209,11 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
{
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
u16 octowords;
u8 ds_cnt;
ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift),
ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift,
rq->mpwqe.unaligned),
MLX5_SEND_WQE_DS);
cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
......@@ -218,8 +221,9 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
cseg->umr_mkey = rq->mpwqe.umr_mkey_be;
ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
ucseg->xlt_octowords =
cpu_to_be16(MLX5_MTT_OCTW(rq->mpwqe.pages_per_wqe));
octowords = rq->mpwqe.unaligned ? MLX5_KSM_OCTW(rq->mpwqe.pages_per_wqe) :
MLX5_MTT_OCTW(rq->mpwqe.pages_per_wqe);
ucseg->xlt_octowords = cpu_to_be16(octowords);
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
......@@ -279,39 +283,51 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
return 0;
}
static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
u64 npages, u8 page_shift, u32 *umr_mkey,
dma_addr_t filler_addr)
static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
u32 npages, u8 page_shift, u32 *umr_mkey,
dma_addr_t filler_addr, bool unaligned)
{
struct mlx5_mtt *mtt;
struct mlx5_ksm *ksm;
u32 octwords;
int inlen;
void *mkc;
u32 *in;
int err;
int i;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
if (unaligned && !MLX5_CAP_GEN(mdev, fixed_buffer_size)) {
mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n");
return -EINVAL;
}
inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in),
unaligned ? sizeof(*ksm) : sizeof(*mtt),
npages);
if (inlen < 0)
return inlen;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
octwords = unaligned ? MLX5_KSM_OCTW(npages) : MLX5_MTT_OCTW(npages);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, access_mode_1_0,
unaligned ? MLX5_MKC_ACCESS_MODE_KSM : MLX5_MKC_ACCESS_MODE_MTT);
mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
MLX5_SET64(mkc, mkc, len, npages << page_shift);
MLX5_SET(mkc, mkc, translations_octword_size,
MLX5_MTT_OCTW(npages));
MLX5_SET(mkc, mkc, translations_octword_size, octwords);
MLX5_SET(mkc, mkc, log_page_size, page_shift);
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
MLX5_MTT_OCTW(npages));
MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
/* Initialize the mkey with all MTTs pointing to a default
* page (filler_addr). When the channels are activated, UMR
......@@ -319,9 +335,20 @@ static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
* the RQ's pool, while the gaps (wqe_overflow) remain mapped
* to the default page.
*/
mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0 ; i < npages ; i++)
mtt[i].ptag = cpu_to_be64(filler_addr);
if (unaligned) {
ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0; i < npages; i++)
ksm[i] = (struct mlx5_ksm) {
.key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
.va = cpu_to_be64(filler_addr),
};
} else {
mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0; i < npages; i++)
mtt[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(filler_addr),
};
}
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
......@@ -364,12 +391,24 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
{
u64 num_mtts = mlx5_wq_ll_get_size(&rq->mpwqe.wq) * rq->mpwqe.mtts_per_wqe;
u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
u32 num_entries, max_num_entries;
u32 umr_mkey;
int err;
err = mlx5e_create_umr_mtt_mkey(mdev, num_mtts, rq->mpwqe.page_shift,
&umr_mkey, rq->wqe_overflow.addr);
max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.unaligned);
/* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */
if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe,
&num_entries) ||
num_entries > max_num_entries))
mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n",
__func__, wq_size, rq->mpwqe.mtts_per_wqe,
max_num_entries);
err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
&umr_mkey, rq->wqe_overflow.addr,
rq->mpwqe.unaligned);
rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
return err;
}
......@@ -597,12 +636,16 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
rq->mpwqe.unaligned = xsk ? xsk->unaligned : false;
rq->mpwqe.pages_per_wqe =
mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift);
mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
rq->mpwqe.unaligned);
rq->mpwqe.umr_wqebbs =
mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift);
mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift,
rq->mpwqe.unaligned);
rq->mpwqe.mtts_per_wqe =
mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift);
mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift,
rq->mpwqe.unaligned);
pool_size = rq->mpwqe.pages_per_wqe <<
mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
......@@ -4932,7 +4975,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
!MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
!MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT))
mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, false))
netdev->vlan_features |= NETIF_F_LRO;
netdev->hw_features = netdev->vlan_features;
......
......@@ -673,6 +673,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5e_icosq *sq = rq->icosq;
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u32 offset; /* 17-bit value with MTT. */
u16 pi;
int err;
int i;
......@@ -694,13 +695,27 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, dma_info++) {
err = mlx5e_page_alloc(rq, dma_info);
if (unlikely(err))
goto err_unmap;
umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
if (unlikely(rq->mpwqe.unaligned)) {
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, dma_info++) {
err = mlx5e_page_alloc(rq, dma_info);
if (unlikely(err))
goto err_unmap;
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
.key = rq->mkey_be,
.va = cpu_to_be64(dma_info->addr),
};
}
} else {
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, dma_info++) {
err = mlx5e_page_alloc(rq, dma_info);
if (unlikely(err))
goto err_unmap;
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR),
};
}
}
bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
......@@ -709,8 +724,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
MLX5_OPCODE_UMR);
umr_wqe->uctrl.xlt_offset =
cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe));
offset = ix * rq->mpwqe.mtts_per_wqe;
if (!rq->mpwqe.unaligned)
offset = MLX5_ALIGNED_MTTS_OCTW(offset);
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
......
......@@ -478,6 +478,12 @@ struct mlx5_klm {
__be64 va;
};
struct mlx5_ksm {
__be32 reserved;
__be32 key;
__be64 va;
};
struct mlx5_stride_block_entry {
__be16 stride;
__be16 bcount;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment