Commit 8cbe71e7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "A fairly modest set of bug fixes, nothing abnormal from the merge
  window

  The ucma patch is a bit on the larger side, but given the regression
  was recently added I've opted to forward it to the rc stream.

   - Fix a ucma memory leak introduced in v5.9 while fixing the
     Syzkaller bugs

   - Don't fail when the xarray wraps for user verbs objects

   - User triggerable oops regression from the umem page size rework

   - Error unwind bugs in usnic, ocrdma, mlx5 and cma"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/cma: Fix error flow in default_roce_mode_store
  RDMA/mlx5: Fix wrong free of blue flame register on error
  IB/mlx5: Fix error unwinding when set_has_smi_cap fails
  RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two()
  RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd()
  RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp
  RDMA/restrack: Don't treat as an error allocation ID wrapping
  RDMA/ucma: Do not miss ctx destruction steps in some cases
parents 0bc9bc1d 7c7b3e5d
...@@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item, ...@@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item,
return ret; return ret;
gid_type = ib_cache_gid_parse_type_str(buf); gid_type = ib_cache_gid_parse_type_str(buf);
if (gid_type < 0) if (gid_type < 0) {
cma_configfs_params_put(cma_dev);
return -EINVAL; return -EINVAL;
}
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
......
...@@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) ...@@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
} else { } else {
ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
&rt->next_id, GFP_KERNEL); &rt->next_id, GFP_KERNEL);
ret = (ret < 0) ? ret : 0;
} }
out: out:
......
...@@ -95,8 +95,6 @@ struct ucma_context { ...@@ -95,8 +95,6 @@ struct ucma_context {
u64 uid; u64 uid;
struct list_head list; struct list_head list;
/* sync between removal event and id destroy, protected by file mut */
int destroying;
struct work_struct close_work; struct work_struct close_work;
}; };
...@@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table); ...@@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table);
static DEFINE_XARRAY_ALLOC(multicast_table); static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops; static const struct file_operations ucma_fops;
static int __destroy_id(struct ucma_context *ctx); static int ucma_destroy_private_ctx(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id, static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file) struct ucma_file *file)
...@@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work) ...@@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work)
/* once all inflight tasks are finished, we close all underlying /* once all inflight tasks are finished, we close all underlying
* resources. The context is still alive till its explicit destryoing * resources. The context is still alive till its explicit destryoing
* by its creator. * by its creator. This puts back the xarray's reference.
*/ */
ucma_put_ctx(ctx); ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp); wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */ /* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id); rdma_destroy_id(ctx->cm_id);
/* /* Reading the cm_id without holding a positive ref is not allowed */
* At this point ctx->ref is zero so the only place the ctx can be is in
* a uevent or in __destroy_id(). Since the former doesn't touch
* ctx->cm_id and the latter sync cancels this, there is no races with
* this store.
*/
ctx->cm_id = NULL; ctx->cm_id = NULL;
} }
...@@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) ...@@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
return NULL; return NULL;
INIT_WORK(&ctx->close_work, ucma_close_id); INIT_WORK(&ctx->close_work, ucma_close_id);
refcount_set(&ctx->ref, 1);
init_completion(&ctx->comp); init_completion(&ctx->comp);
/* So list_del() will work if we don't do ucma_finish_ctx() */ /* So list_del() will work if we don't do ucma_finish_ctx() */
INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->list);
...@@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) ...@@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
return ctx; return ctx;
} }
static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
struct rdma_cm_id *cm_id)
{
refcount_set(&ctx->ref, 1);
ctx->cm_id = cm_id;
}
static void ucma_finish_ctx(struct ucma_context *ctx) static void ucma_finish_ctx(struct ucma_context *ctx)
{ {
lockdep_assert_held(&ctx->file->mut); lockdep_assert_held(&ctx->file->mut);
...@@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ...@@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
ctx = ucma_alloc_ctx(listen_ctx->file); ctx = ucma_alloc_ctx(listen_ctx->file);
if (!ctx) if (!ctx)
goto err_backlog; goto err_backlog;
ctx->cm_id = cm_id; ucma_set_ctx_cm_id(ctx, cm_id);
uevent = ucma_create_uevent(listen_ctx, event); uevent = ucma_create_uevent(listen_ctx, event);
if (!uevent) if (!uevent)
...@@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ...@@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
return 0; return 0;
err_alloc: err_alloc:
xa_erase(&ctx_table, ctx->id); ucma_destroy_private_ctx(ctx);
kfree(ctx);
err_backlog: err_backlog:
atomic_inc(&listen_ctx->backlog); atomic_inc(&listen_ctx->backlog);
/* Returning error causes the new ID to be destroyed */ /* Returning error causes the new ID to be destroyed */
...@@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ...@@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
wake_up_interruptible(&ctx->file->poll_wait); wake_up_interruptible(&ctx->file->poll_wait);
} }
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
queue_work(system_unbound_wq, &ctx->close_work); xa_lock(&ctx_table);
if (xa_load(&ctx_table, ctx->id) == ctx)
queue_work(system_unbound_wq, &ctx->close_work);
xa_unlock(&ctx_table);
}
return 0; return 0;
} }
...@@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ...@@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
ret = PTR_ERR(cm_id); ret = PTR_ERR(cm_id);
goto err1; goto err1;
} }
ctx->cm_id = cm_id; ucma_set_ctx_cm_id(ctx, cm_id);
resp.id = ctx->id; resp.id = ctx->id;
if (copy_to_user(u64_to_user_ptr(cmd.response), if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) { &resp, sizeof(resp))) {
xa_erase(&ctx_table, ctx->id); ucma_destroy_private_ctx(ctx);
__destroy_id(ctx);
return -EFAULT; return -EFAULT;
} }
...@@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ...@@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return 0; return 0;
err1: err1:
xa_erase(&ctx_table, ctx->id); ucma_destroy_private_ctx(ctx);
kfree(ctx);
return ret; return ret;
} }
...@@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) ...@@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
rdma_unlock_handler(mc->ctx->cm_id); rdma_unlock_handler(mc->ctx->cm_id);
} }
/* static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
* ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
* this point, no new events will be reported from the hardware. However, we
* still need to cleanup the UCMA context for this ID. Specifically, there
* might be events that have not yet been consumed by the user space software.
* mutex. After that we release them as needed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
{ {
int events_reported; int events_reported;
struct ucma_event *uevent, *tmp; struct ucma_event *uevent, *tmp;
LIST_HEAD(list); LIST_HEAD(list);
ucma_cleanup_multicast(ctx); /* Cleanup events not yet reported to the user.*/
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut); mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) if (uevent->ctx != ctx)
continue;
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
uevent->conn_req_ctx, XA_ZERO_ENTRY,
GFP_KERNEL) == uevent->conn_req_ctx) {
list_move_tail(&uevent->list, &list); list_move_tail(&uevent->list, &list);
continue;
}
list_del(&uevent->list);
kfree(uevent);
} }
list_del(&ctx->list); list_del(&ctx->list);
events_reported = ctx->events_reported; events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut); mutex_unlock(&ctx->file->mut);
/* /*
* If this was a listening ID then any connections spawned from it * If this was a listening ID then any connections spawned from it that
* that have not been delivered to userspace are cleaned up too. * have not been delivered to userspace are cleaned up too. Must be done
* Must be done outside any locks. * outside any locks.
*/ */
list_for_each_entry_safe(uevent, tmp, &list, list) { list_for_each_entry_safe(uevent, tmp, &list, list) {
list_del(&uevent->list); ucma_destroy_private_ctx(uevent->conn_req_ctx);
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
uevent->conn_req_ctx != ctx)
__destroy_id(uevent->conn_req_ctx);
kfree(uevent); kfree(uevent);
} }
mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported; return events_reported;
} }
static int __destroy_id(struct ucma_context *ctx) /*
* When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
* the ctx is not public to the user). This either because:
* - ucma_finish_ctx() hasn't been called
* - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
*/
static int ucma_destroy_private_ctx(struct ucma_context *ctx)
{ {
int events_reported;
/* /*
* If the refcount is already 0 then ucma_close_id() has already * Destroy the underlying cm_id. New work queuing is prevented now by
* destroyed the cm_id, otherwise holding the refcount keeps cm_id * the removal from the xarray. Once the work is cancled ref will either
* valid. Prevent queue_work() from being called. * be 0 because the work ran to completion and consumed the ref from the
* xarray, or it will be positive because we still have the ref from the
* xarray. This can also be 0 in cases where cm_id was never set
*/ */
if (refcount_inc_not_zero(&ctx->ref)) {
rdma_lock_handler(ctx->cm_id);
ctx->destroying = 1;
rdma_unlock_handler(ctx->cm_id);
ucma_put_ctx(ctx);
}
cancel_work_sync(&ctx->close_work); cancel_work_sync(&ctx->close_work);
/* At this point it's guaranteed that there is no inflight closing task */ if (refcount_read(&ctx->ref))
if (ctx->cm_id)
ucma_close_id(&ctx->close_work); ucma_close_id(&ctx->close_work);
return ucma_free_ctx(ctx);
events_reported = ucma_cleanup_ctx_events(ctx);
ucma_cleanup_multicast(ctx);
WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
GFP_KERNEL) != NULL);
mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
} }
static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
...@@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, ...@@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
xa_lock(&ctx_table); xa_lock(&ctx_table);
ctx = _ucma_find_context(cmd.id, file); ctx = _ucma_find_context(cmd.id, file);
if (!IS_ERR(ctx)) if (!IS_ERR(ctx)) {
__xa_erase(&ctx_table, ctx->id); if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
GFP_KERNEL) != ctx)
ctx = ERR_PTR(-ENOENT);
}
xa_unlock(&ctx_table); xa_unlock(&ctx_table);
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return PTR_ERR(ctx); return PTR_ERR(ctx);
resp.events_reported = __destroy_id(ctx); resp.events_reported = ucma_destroy_private_ctx(ctx);
if (copy_to_user(u64_to_user_ptr(cmd.response), if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) &resp, sizeof(resp)))
ret = -EFAULT; ret = -EFAULT;
...@@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp) ...@@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp)
* prevented by this being a FD release function. The list_add_tail() in * prevented by this being a FD release function. The list_add_tail() in
* ucma_connect_event_handler() can run concurrently, however it only * ucma_connect_event_handler() can run concurrently, however it only
* adds to the list *after* a listening ID. By only reading the first of * adds to the list *after* a listening ID. By only reading the first of
* the list, and relying on __destroy_id() to block * the list, and relying on ucma_destroy_private_ctx() to block
* ucma_connect_event_handler(), no additional locking is needed. * ucma_connect_event_handler(), no additional locking is needed.
*/ */
while (!list_empty(&file->ctx_list)) { while (!list_empty(&file->ctx_list)) {
struct ucma_context *ctx = list_first_entry( struct ucma_context *ctx = list_first_entry(
&file->ctx_list, struct ucma_context, list); &file->ctx_list, struct ucma_context, list);
xa_erase(&ctx_table, ctx->id); WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
__destroy_id(ctx); GFP_KERNEL) != ctx);
ucma_destroy_private_ctx(ctx);
} }
kfree(file); kfree(file);
return 0; return 0;
......
...@@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, ...@@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
*/ */
if (mask) if (mask)
pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
return rounddown_pow_of_two(pgsz_bitmap); return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
} }
EXPORT_SYMBOL(ib_umem_find_best_pgsz); EXPORT_SYMBOL(ib_umem_find_best_pgsz);
......
...@@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) ...@@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
err = set_has_smi_cap(dev); err = set_has_smi_cap(dev);
if (err) if (err)
return err; goto err_mp;
if (!mlx5_core_mp_enabled(mdev)) { if (!mlx5_core_mp_enabled(mdev)) {
for (i = 1; i <= dev->num_ports; i++) { for (i = 1; i <= dev->num_ports; i++) {
...@@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) ...@@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err) if (err)
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg);
return err; return err;
} }
......
...@@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) ...@@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
pr_err("%s(%d) Freeing in use pdid=0x%x.\n", pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
__func__, dev->id, pd->id); __func__, dev->id, pd->id);
} }
kfree(uctx->cntxt_pd);
uctx->cntxt_pd = NULL; uctx->cntxt_pd = NULL;
_ocrdma_dealloc_pd(dev, pd); _ocrdma_dealloc_pd(dev, pd);
kfree(pd);
} }
static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
......
...@@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, ...@@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
} }
usnic_uiom_free_dev_list(dev_list); usnic_uiom_free_dev_list(dev_list);
dev_list = NULL;
} }
/* Try to find resources on an unused vf */ /* Try to find resources on an unused vf */
...@@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, ...@@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
qp_grp_check: qp_grp_check:
if (IS_ERR_OR_NULL(qp_grp)) { if (IS_ERR_OR_NULL(qp_grp)) {
usnic_err("Failed to allocate qp_grp\n"); usnic_err("Failed to allocate qp_grp\n");
if (usnic_ib_share_vf)
usnic_uiom_free_dev_list(dev_list);
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
} }
return qp_grp; return qp_grp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment