Commit 7d0cc6ed authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by David S. Miller

IB/mlx5: Add MR cache for large UMR regions

In this change we turn mlx5_ib_update_mtt() into generic
mlx5_ib_update_xlt() to perfrom HCA translation table modifiactions
supporting both atomic and process contexts and not limited by number
of modified entries.
Using this function we increase preallocated MRs up to 16GB.
Signed-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c438fde1
......@@ -1112,11 +1112,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
context->upd_xlt_page = __get_free_page(GFP_KERNEL);
if (!context->upd_xlt_page) {
err = -ENOMEM;
goto out_uars;
}
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
goto out_uars;
goto out_page;
}
INIT_LIST_HEAD(&context->vma_private_list);
......@@ -1168,6 +1175,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
......@@ -1195,6 +1205,8 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
free_page(context->upd_xlt_page);
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
......
......@@ -159,7 +159,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
unsigned long umem_page_shift = ilog2(umem->page_size);
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k;
int i, k, idx;
u64 cur = 0;
u64 base;
int len;
......@@ -185,18 +185,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
/* Skip elements below offset */
if (i + len < offset << shift) {
i += len;
continue;
}
/* Skip pages below offset */
if (i < offset << shift) {
k = (offset << shift) - i;
i = offset << shift;
} else {
k = 0;
}
for (; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << umem_page_shift);
cur |= access_flags;
idx = (i >> shift) - offset;
pas[i >> shift] = cpu_to_be64(cur);
pas[idx] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
base + (k << umem_page_shift));
i >> shift, be64_to_cpu(pas[idx]));
}
i++;
/* Stop after num_pages reached */
if (i >> shift >= offset + num_pages)
return;
}
}
}
......
......@@ -125,6 +125,10 @@ struct mlx5_ib_ucontext {
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
unsigned long upd_xlt_page;
/* protect ODP/KSM */
struct mutex upd_xlt_page_mutex;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
......@@ -192,6 +196,13 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
......@@ -788,8 +799,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
int npages, int zap);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
......
This diff is collapsed.
......@@ -91,16 +91,21 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
mlx5_ib_update_mtt(mr, blk_start_idx,
idx - blk_start_idx, 1);
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
1);
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
......@@ -257,7 +262,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
* this MR, since ib_umem_odp_map_dma_pages already
* checks this.
*/
ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
ret = mlx5_ib_update_xlt(mr, start_idx, npages,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
......
......@@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = {
.size = 8,
.limit = 4
},
.mr_cache[16] = {
.size = 8,
.limit = 4
},
.mr_cache[17] = {
.size = 8,
.limit = 4
},
.mr_cache[18] = {
.size = 8,
.limit = 4
},
.mr_cache[19] = {
.size = 4,
.limit = 2
},
.mr_cache[20] = {
.size = 4,
.limit = 2
},
},
};
......
......@@ -959,7 +959,7 @@ enum {
};
enum {
MAX_MR_CACHE_ENTRIES = 16,
MAX_MR_CACHE_ENTRIES = 21,
};
enum {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment