Commit 261dc53f authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/odp: Split creating a umem_odp from ib_umem_get

This is the last creation API that is overloaded for both, there is very
little code sharing and a driver has to be specifically ready for a
umem_odp to be created to use the odp version.

Link: https://lore.kernel.org/r/20190819111710.18440-7-leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent f20bef6a
...@@ -184,9 +184,6 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); ...@@ -184,9 +184,6 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/** /**
* ib_umem_get - Pin and DMA map userspace memory. * ib_umem_get - Pin and DMA map userspace memory.
* *
* If access flags indicate ODP memory, avoid pinning. Instead, stores
* the mm for future page fault handling in conjunction with MMU notifiers.
*
* @udata: userspace context to pin memory for * @udata: userspace context to pin memory for
* @addr: userspace virtual address to start at * @addr: userspace virtual address to start at
* @size: length of region to pin * @size: length of region to pin
...@@ -231,17 +228,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, ...@@ -231,17 +228,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
if (!can_do_mlock()) if (!can_do_mlock())
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
if (access & IB_ACCESS_ON_DEMAND) { if (access & IB_ACCESS_ON_DEMAND)
umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); return ERR_PTR(-EOPNOTSUPP);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->is_odp = 1;
} else {
umem = kzalloc(sizeof(*umem), GFP_KERNEL); umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem) if (!umem)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
umem->context = context; umem->context = context;
umem->length = size; umem->length = size;
umem->address = addr; umem->address = addr;
...@@ -249,18 +241,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, ...@@ -249,18 +241,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
umem->owning_mm = mm = current->mm; umem->owning_mm = mm = current->mm;
mmgrab(mm); mmgrab(mm);
if (access & IB_ACCESS_ON_DEMAND) {
if (WARN_ON_ONCE(!context->invalidate_range)) {
ret = -EINVAL;
goto umem_kfree;
}
ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
if (ret)
goto umem_kfree;
return umem;
}
page_list = (struct page **) __get_free_page(GFP_KERNEL); page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) { if (!page_list) {
ret = -ENOMEM; ret = -ENOMEM;
......
...@@ -335,6 +335,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, ...@@ -335,6 +335,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
&per_mm->umem_tree); &per_mm->umem_tree);
up_write(&per_mm->umem_rwsem); up_write(&per_mm->umem_rwsem);
} }
mmgrab(umem_odp->umem.owning_mm);
return 0; return 0;
...@@ -389,9 +390,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, ...@@ -389,9 +390,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
kfree(umem_odp); kfree(umem_odp);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
mmgrab(umem->owning_mm);
return umem_odp; return umem_odp;
} }
EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
...@@ -435,27 +433,51 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root, ...@@ -435,27 +433,51 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
kfree(odp_data); kfree(odp_data);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
mmgrab(umem->owning_mm);
return odp_data; return odp_data;
} }
EXPORT_SYMBOL(ib_umem_odp_alloc_child); EXPORT_SYMBOL(ib_umem_odp_alloc_child);
/** /**
* ib_umem_odp_get - Complete ib_umem_get() * ib_umem_odp_get - Create a umem_odp for a userspace va
* *
* @umem_odp: The partially configured umem from ib_umem_get() * @udata: userspace context to pin memory for
* @addr: The starting userspace VA * @addr: userspace virtual address to start at
* @access: ib_reg_mr access flags * @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*
* The driver should use when the access flags indicate ODP memory. It avoids
* pinning, instead, stores the mm for future page fault handling in
* conjunction with MMU notifiers.
*/ */
int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
size_t size, int access)
{ {
/* struct ib_umem_odp *umem_odp;
* NOTE: This must called in a process context where umem->owning_mm struct ib_ucontext *context;
* == current->mm struct mm_struct *mm;
*/ int ret;
struct mm_struct *mm = umem_odp->umem.owning_mm;
if (!udata)
return ERR_PTR(-EIO);
context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
->context;
if (!context)
return ERR_PTR(-EIO);
if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) ||
WARN_ON_ONCE(!context->invalidate_range))
return ERR_PTR(-EINVAL);
umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
if (!umem_odp)
return ERR_PTR(-ENOMEM);
umem_odp->umem.context = context;
umem_odp->umem.length = size;
umem_odp->umem.address = addr;
umem_odp->umem.writable = ib_access_writable(access);
umem_odp->umem.owning_mm = mm = current->mm;
umem_odp->page_shift = PAGE_SHIFT; umem_odp->page_shift = PAGE_SHIFT;
if (access & IB_ACCESS_HUGETLB) { if (access & IB_ACCESS_HUGETLB) {
...@@ -466,15 +488,24 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) ...@@ -466,15 +488,24 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
vma = find_vma(mm, ib_umem_start(umem_odp)); vma = find_vma(mm, ib_umem_start(umem_odp));
if (!vma || !is_vm_hugetlb_page(vma)) { if (!vma || !is_vm_hugetlb_page(vma)) {
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
return -EINVAL; ret = -EINVAL;
goto err_free;
} }
h = hstate_vma(vma); h = hstate_vma(vma);
umem_odp->page_shift = huge_page_shift(h); umem_odp->page_shift = huge_page_shift(h);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
} }
return ib_init_umem_odp(umem_odp, NULL); ret = ib_init_umem_odp(umem_odp, NULL);
if (ret)
goto err_free;
return umem_odp;
err_free:
kfree(umem_odp);
return ERR_PTR(ret);
} }
EXPORT_SYMBOL(ib_umem_odp_get);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp) void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{ {
......
...@@ -56,19 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, ...@@ -56,19 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
struct scatterlist *sg; struct scatterlist *sg;
int entry; int entry;
if (umem->is_odp) {
struct ib_umem_odp *odp = to_ib_umem_odp(umem);
unsigned int page_shift = odp->page_shift;
*ncont = ib_umem_odp_num_pages(odp);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
if (order)
*order = ilog2(roundup_pow_of_two(*ncont));
return;
}
addr = addr >> PAGE_SHIFT; addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr; tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG); m = find_first_bit(&tmp, BITS_PER_LONG);
......
...@@ -784,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, ...@@ -784,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int *ncont, int *order) int *ncont, int *order)
{ {
struct ib_umem *u; struct ib_umem *u;
int err;
*umem = NULL; *umem = NULL;
if (access_flags & IB_ACCESS_ON_DEMAND) {
struct ib_umem_odp *odp;
odp = ib_umem_odp_get(udata, start, length, access_flags);
if (IS_ERR(odp)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
PTR_ERR(odp));
return PTR_ERR(odp);
}
u = &odp->umem;
*page_shift = odp->page_shift;
*ncont = ib_umem_odp_num_pages(odp);
*npages = *ncont << (*page_shift - PAGE_SHIFT);
if (order)
*order = ilog2(roundup_pow_of_two(*ncont));
} else {
u = ib_umem_get(udata, start, length, access_flags, 0); u = ib_umem_get(udata, start, length, access_flags, 0);
err = PTR_ERR_OR_ZERO(u); if (IS_ERR(u)) {
if (err) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); return PTR_ERR(u);
return err;
} }
mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
page_shift, ncont, order); page_shift, ncont, order);
}
if (!*npages) { if (!*npages) {
mlx5_ib_warn(dev, "avoid zero region\n"); mlx5_ib_warn(dev, "avoid zero region\n");
ib_umem_release(u); ib_umem_release(u);
......
...@@ -139,7 +139,8 @@ struct ib_ucontext_per_mm { ...@@ -139,7 +139,8 @@ struct ib_ucontext_per_mm {
struct rcu_head rcu; struct rcu_head rcu;
}; };
int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
size_t size, int access);
struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
int access); int access);
struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
...@@ -199,9 +200,11 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, ...@@ -199,9 +200,11 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
unsigned long addr,
size_t size, int access)
{ {
return -EINVAL; return ERR_PTR(-EINVAL);
} }
static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment