Commit de8f847a authored by Yishai Hadas's avatar Yishai Hadas Committed by Leon Romanovsky

RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct

Add support for DMABUF MR registrations with Data-direct device.

Upon userspace calling to register a DMABUF MR with the data direct bit
set, the below algorithm will be followed.

1) Obtain a pinned DMABUF umem from the IB core using the user input
parameters (FD, offset, length) and the DMA PF device.  The DMA PF
device is needed to allow the IOMMU to enable the DMA PF to access the
user buffer over PCI.

2) Create a KSM MKEY by setting its entries according to the user buffer
VA to IOVA mapping, with the MKEY being the data direct device-crossed
MKEY. This KSM MKEY is umrable and will be used as part of the MR cache.
The PD for creating it is the internal device 'data direct' kernel one.

3) Create a crossing MKEY that points to the KSM MKEY using the crossing
access mode.

4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs
managed on the mlx5_ib device.

5) Return the crossing MKEY to the user, created with its supplied PD.

Upon DMA PF unbind flow, the driver will revoke the KSM entries.
The final deregistration will occur under the hood once the application
deregisters its MKEY.

Notes:
- This version supports only the PINNED UMEM mode, so there is no
  dependency on ODP.
- The IOVA supplied by the application must be system page aligned due to
  HW translations of KSM.
- The crossing MKEY will not be umrable or part of the MR cache, as we
  cannot change its crossed (i.e. KSM) MKEY over UMR.
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 3aa73c6b
......@@ -3490,6 +3490,7 @@ static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev)
if (ret)
return ret;
INIT_LIST_HEAD(&dev->data_direct_mr_list);
ret = mlx5_data_direct_ib_reg(dev, vuid);
if (ret)
mlx5_ib_free_data_direct_resources(dev);
......@@ -3882,6 +3883,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
dump_fill_mkey),
UA_MANDATORY));
ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_reg_dmabuf_mr,
UVERBS_OBJECT_MR,
UVERBS_METHOD_REG_DMABUF_MR,
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
enum mlx5_ib_uapi_reg_dmabuf_flags,
UA_OPTIONAL));
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
......@@ -3891,6 +3900,7 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
......@@ -4396,6 +4406,7 @@ void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev)
{
mutex_lock(&ibdev->data_direct_lock);
mlx5_ib_revoke_data_direct_mrs(ibdev);
ibdev->data_direct_dev = NULL;
mutex_unlock(&ibdev->data_direct_lock);
}
......
......@@ -682,6 +682,8 @@ struct mlx5_ib_mr {
struct mlx5_ib_mkey mmkey;
struct ib_umem *umem;
/* The mr is data direct related */
u8 data_direct :1;
union {
/* Used only by kernel MRs (umem == NULL) */
......@@ -719,6 +721,10 @@ struct mlx5_ib_mr {
} odp_destroy;
struct ib_odp_counters odp_stats;
bool is_odp_implicit;
/* The affilated data direct crossed mr */
struct mlx5_ib_mr *dd_crossed_mr;
struct list_head dd_node;
u8 revoked :1;
};
};
};
......@@ -1169,6 +1175,7 @@ struct mlx5_ib_dev {
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct list_head data_direct_mr_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
......@@ -1437,6 +1444,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
struct mlx5_data_direct_dev *dev);
void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev);
void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
......
This diff is collapsed.
......@@ -710,6 +710,9 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
if (mr->data_direct)
err = mlx5r_umr_update_data_direct_ksm_pas(mr, xlt_flags);
else
err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
......
......@@ -632,44 +632,47 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
static int
_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
{
size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
struct mlx5_ksm *cur_ksm;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
struct mlx5_mtt *mtt;
size_t final_size;
void *curr_entry;
struct ib_sge sg;
void *entry;
u64 offset = 0;
int err = 0;
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
mtt = mlx5r_umr_create_xlt(
dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
sizeof(*mtt), flags);
if (!mtt)
entry = mlx5r_umr_create_xlt(dev, &sg,
ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
ent_size, flags);
if (!entry)
return -ENOMEM;
orig_sg_length = sg.length;
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
if (dd) {
/* Use the data direct internal kernel PD */
MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
cur_ksm = entry;
} else {
cur_mtt = entry;
}
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
cur_mtt = mtt;
curr_entry = entry;
rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
if (curr_entry == entry + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
......@@ -681,23 +684,31 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
cur_mtt = mtt;
if (dd)
cur_ksm = entry;
else
cur_mtt = entry;
}
if (dd) {
cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
cur_ksm++;
curr_entry = cur_ksm;
} else {
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
curr_entry = cur_mtt;
}
}
final_size = (void *)cur_mtt - (void *)mtt;
final_size = curr_entry - entry;
sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
memset(cur_mtt, 0, sg.length - final_size);
memset(curr_entry, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
......@@ -705,10 +716,32 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
err:
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
return err;
}
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
/* No invalidation flow is expected */
if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, true);
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, false);
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
......
......@@ -95,6 +95,7 @@ int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
......
......@@ -274,6 +274,10 @@ enum mlx5_ib_create_cq_attrs {
MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
};
enum mlx5_ib_reg_dmabuf_mr_attrs {
MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
};
#define MLX5_IB_DW_MATCH_PARAM 0xA0
struct mlx5_ib_match_params {
......
......@@ -54,6 +54,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type {
MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
};
enum mlx5_ib_uapi_reg_dmabuf_flags {
MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0,
};
struct mlx5_ib_uapi_devx_async_cmd_hdr {
__aligned_u64 wr_id;
__u8 out_data[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment