Commit 38f8ff5b authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/mlx5: Reorganize mlx5_ib_reg_user_mr()

This function handles an ODP and regular MR flow all mushed together, even
though the two flows are quite different. Split them into two dedicated
functions.

Link: https://lore.kernel.org/r/20201130075839.278575-5-leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 6e0954b1
...@@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, ...@@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice, enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge); u32 flags, struct ib_sge *sg_list, u32 num_sge);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{ {
...@@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, ...@@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
......
...@@ -56,6 +56,10 @@ enum { ...@@ -56,6 +56,10 @@ enum {
static void static void
create_mkey_callback(int status, struct mlx5_async_work *context); create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags, unsigned int page_size,
bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd) struct ib_pd *pd)
...@@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) ...@@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT; return MLX5_MAX_UMR_SHIFT;
} }
static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start,
u64 length, int access_flags)
{
struct ib_umem *u;
if (access_flags & IB_ACCESS_ON_DEMAND) {
struct ib_umem_odp *odp;
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
&mlx5_mn_ops);
if (IS_ERR(odp)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
PTR_ERR(odp));
return ERR_CAST(odp);
}
return &odp->umem;
}
u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(u)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
return u;
}
return u;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct mlx5_ib_umr_context *context = struct mlx5_ib_umr_context *context =
...@@ -957,7 +935,16 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, ...@@ -957,7 +935,16 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order]; return &cache->ent[order];
} }
static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 length, int access_flags)
{
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.length = length;
mr->access_flags = access_flags;
}
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova, struct ib_umem *umem, u64 iova,
int access_flags) int access_flags)
{ {
...@@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, ...@@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
ent = mr_cache_ent_from_order( ent = mr_cache_ent_from_order(
dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
if (!ent) /*
return ERR_PTR(-E2BIG); * Matches access in alloc_cache_mr(). If the MR can't come from the
* cache then synchronously create an uncached one.
/* Matches access in alloc_cache_mr() */ */
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) if (!ent || ent->limit == 0 ||
return ERR_PTR(-EOPNOTSUPP); !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, umem, iova, access_flags, page_size,
false);
mutex_unlock(&dev->slow_path_mutex);
return mr;
}
mr = get_cache_mr(ent); mr = get_cache_mr(ent);
if (!mr) { if (!mr) {
mr = create_cache_mr(ent); mr = create_cache_mr(ent);
/*
* The above already tried to do the same stuff as reg_create(),
* no reason to try it again.
*/
if (IS_ERR(mr)) if (IS_ERR(mr))
return mr; return mr;
} }
...@@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, ...@@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
mr->mmkey.size = umem->length; mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn; mr->mmkey.pd = to_mpd(pd)->pdn;
mr->page_shift = order_base_2(page_size); mr->page_shift = order_base_2(page_size);
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
return mr; return mr;
} }
...@@ -1279,10 +1278,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) ...@@ -1279,10 +1278,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
*/ */
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
struct ib_umem *umem, u64 iova, struct ib_umem *umem, u64 iova,
int access_flags, bool populate) int access_flags, unsigned int page_size,
bool populate)
{ {
struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_dev *dev = to_mdev(pd->device);
unsigned int page_size;
struct mlx5_ib_mr *mr; struct mlx5_ib_mr *mr;
__be64 *pas; __be64 *pas;
void *mkc; void *mkc;
...@@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, ...@@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err; int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
page_size = if (!page_size) {
mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
if (WARN_ON(!page_size)) 0, iova);
if (!page_size)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) if (!mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, ...@@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt); mr->desc_size = sizeof(struct mlx5_mtt);
mr->dev = dev; mr->dev = dev;
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in); kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
...@@ -1368,15 +1370,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, ...@@ -1368,15 +1370,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
return ERR_PTR(err); return ERR_PTR(err);
} }
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 length, int access_flags)
{
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.length = length;
mr->access_flags = access_flags;
}
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode) u64 length, int acc, int mode)
{ {
...@@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, ...@@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode); attr->access_flags, mode);
} }
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, int access_flags, u64 iova, int access_flags)
struct ib_udata *udata)
{ {
struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL; struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr; bool xlt_with_umr;
struct ib_umem *umem;
int err; int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
/* ODP requires xlt update via umr to work. */
if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
return ERR_PTR(-EINVAL);
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
length == U64_MAX) {
if (virt_addr != start)
return ERR_PTR(-EINVAL);
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
if (IS_ERR(mr))
return ERR_CAST(mr);
return &mr->ibmr;
}
umem = mr_umem_get(dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
if (xlt_with_umr) { if (xlt_with_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags); mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
if (IS_ERR(mr)) } else {
mr = NULL;
}
if (!mr) {
mutex_lock(&dev->slow_path_mutex); mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, umem, virt_addr, access_flags, mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true);
!xlt_with_umr);
mutex_unlock(&dev->slow_path_mutex); mutex_unlock(&dev->slow_path_mutex);
} }
if (IS_ERR(mr)) { if (IS_ERR(mr)) {
err = PTR_ERR(mr); ib_umem_release(umem);
goto error; return ERR_CAST(mr);
} }
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
mr->umem = umem; atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
set_mr_fields(dev, mr, length, access_flags);
if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { if (xlt_with_umr) {
/* /*
* If the MR was created with reg_create then it will be * If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead * configured properly but left disabled. It is safe to go ahead
...@@ -1546,32 +1501,88 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -1546,32 +1501,88 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err); return ERR_PTR(err);
} }
} }
return &mr->ibmr;
}
if (is_odp_mr(mr)) { static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
to_ib_umem_odp(mr->umem)->private = mr; u64 iova, int access_flags,
init_waitqueue_head(&mr->q_deferred_work); struct ib_udata *udata)
atomic_set(&mr->num_deferred_work, 0); {
err = xa_err(xa_store(&dev->odp_mkeys, struct mlx5_ib_dev *dev = to_mdev(pd->device);
mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, struct ib_umem_odp *odp;
GFP_KERNEL)); struct mlx5_ib_mr *mr;
if (err) { int err;
dereg_mr(dev, mr);
return ERR_PTR(err);
}
err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
if (err) { return ERR_PTR(-EOPNOTSUPP);
dereg_mr(dev, mr);
return ERR_PTR(err); if (!start && length == U64_MAX) {
if (iova != 0)
return ERR_PTR(-EINVAL);
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
if (IS_ERR(mr))
return ERR_CAST(mr);
return &mr->ibmr;
} }
/* ODP requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
return ERR_PTR(-EINVAL);
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
&mlx5_mn_ops);
if (IS_ERR(odp))
return ERR_CAST(odp);
mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
} }
odp->private = mr;
init_waitqueue_head(&mr->q_deferred_work);
atomic_set(&mr->num_deferred_work, 0);
err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL));
if (err)
goto err_dereg_mr;
err = mlx5_ib_init_odp_mr(mr);
if (err)
goto err_dereg_mr;
return &mr->ibmr; return &mr->ibmr;
error:
ib_umem_release(umem); err_dereg_mr:
dereg_mr(dev, mr);
return ERR_PTR(err); return ERR_PTR(err);
} }
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *umem;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, iova, length, access_flags);
if (access_flags & IB_ACCESS_ON_DEMAND)
return create_user_odp_mr(pd, start, length, iova, access_flags,
udata);
umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
return create_real_mr(pd, umem, iova, access_flags);
}
/** /**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR * mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence * @mr: The MR to fence
...@@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, ...@@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(ib_umem_num_pages(mr->umem), atomic_sub(ib_umem_num_pages(mr->umem),
&dev->mdev->priv.reg_pages); &dev->mdev->priv.reg_pages);
ib_umem_release(mr->umem); ib_umem_release(mr->umem);
mr->umem = mr_umem_get(dev, addr, len, access_flags); mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags);
if (IS_ERR(mr->umem)) { if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem); err = PTR_ERR(mr->umem);
mr->umem = NULL; mr->umem = NULL;
...@@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, ...@@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (err) if (err)
goto err; goto err;
mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true); mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true);
if (IS_ERR(mr)) { if (IS_ERR(mr)) {
err = PTR_ERR(mr); err = PTR_ERR(mr);
mr = to_mmr(ib_mr); mr = to_mmr(ib_mr);
......
...@@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, ...@@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr; struct mlx5_ib_mr *imr;
int err; int err;
if (!mlx5_ib_can_load_pas_with_umr(dev,
MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp)) if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp); return ERR_CAST(umem_odp);
...@@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, ...@@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags); flags);
} }
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{ {
u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
int ret; int ret;
if (enable) ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
flags |= MLX5_PF_FLAGS_ENABLE; mr->umem->length, NULL,
MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
mr->umem->address, mr->umem->length, NULL,
flags);
return ret >= 0 ? 0 : ret; return ret >= 0 ? 0 : ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment