Commit 36f30e48 authored by Yishai Hadas's avatar Yishai Hadas Committed by Jason Gunthorpe

IB/core: Improve ODP to use hmm_range_fault()

Move to use hmm_range_fault() instead of get_user_pags_remote() to improve
performance in a few aspects:

This includes:
- Dropping the need to allocate and free memory to hold its output

- No need any more to use put_page() to unpin the pages

- The logic to detect contiguous pages is done based on the returned
  order, no need to run per page and evaluate.

In addition, moving to use hmm_range_fault() enables to reduce page faults
in the system with it's snapshot mode, this will be introduced in next
patches from this series.

As part of this, cleanup some flows and use the required data structures
to work with hmm_range_fault().

Link: https://lore.kernel.org/r/20200930163828.1336747-2-leon@kernel.orgSigned-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 2ee9bf34
...@@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING ...@@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING
depends on INFINIBAND_USER_MEM depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER select MMU_NOTIFIER
select INTERVAL_TREE select INTERVAL_TREE
select HMM_MIRROR
default y default y
help help
On demand paging support for the InfiniBand subsystem. On demand paging support for the InfiniBand subsystem.
......
This diff is collapsed.
...@@ -671,7 +671,6 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, ...@@ -671,7 +671,6 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
{ {
int page_shift, ret, np; int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
unsigned long current_seq;
u64 access_mask; u64 access_mask;
u64 start_idx; u64 start_idx;
...@@ -682,25 +681,16 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, ...@@ -682,25 +681,16 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (odp->umem.writable && !downgrade) if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT; access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = mmu_interval_read_begin(&odp->notifier); np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask);
np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
current_seq);
if (np < 0) if (np < 0)
return np; return np;
mutex_lock(&odp->umem_mutex);
if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
/* /*
* No need to check whether the MTTs really belong to * No need to check whether the MTTs really belong to this MR, since
* this MR, since ib_umem_odp_map_dma_pages already * ib_umem_odp_map_dma_and_lock already checks this.
* checks this.
*/ */
ret = mlx5_ib_update_xlt(mr, start_idx, np, ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift,
page_shift, MLX5_IB_UPD_XLT_ATOMIC); MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
mutex_unlock(&odp->umem_mutex); mutex_unlock(&odp->umem_mutex);
if (ret < 0) { if (ret < 0) {
......
...@@ -14,17 +14,13 @@ struct ib_umem_odp { ...@@ -14,17 +14,13 @@ struct ib_umem_odp {
struct mmu_interval_notifier notifier; struct mmu_interval_notifier notifier;
struct pid *tgid; struct pid *tgid;
/* An array of the pfns included in the on-demand paging umem. */
unsigned long *pfn_list;
/* /*
* An array of the pages included in the on-demand paging umem. * An array with DMA addresses mapped for pfns in pfn_list.
* Indices of pages that are currently not mapped into the device will * The lower two bits designate access permissions.
* contain NULL. * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
*/
struct page **page_list;
/*
* An array of the same size as page_list, with DMA addresses mapped
* for pages the pages in page_list. The lower two bits designate
* access permissions. See ODP_READ_ALLOWED_BIT and
* ODP_WRITE_ALLOWED_BIT.
*/ */
dma_addr_t *dma_list; dma_addr_t *dma_list;
/* /*
...@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, ...@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
const struct mmu_interval_notifier_ops *ops); const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp); void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bcnt, u64 access_mask, u64 bcnt, u64 access_mask);
unsigned long current_seq);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound); u64 bound);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment