Commit 889d916b authored by Shay Drory's avatar Shay Drory Committed by Jason Gunthorpe

RDMA/core: Don't access cm_id after its destruction

restrack should only be attached to a cm_id while the ID has a valid
device pointer. It is set up when the device is first loaded, but not
cleared when the device is removed. There is also two copies of the device
pointer, one private and one in the public API, and these were left out of
sync.

Make everything go to NULL together and manipulate restrack right around
the device assignments.

Found by syzcaller:
BUG: KASAN: wild-memory-access in __list_del include/linux/list.h:112 [inline]
BUG: KASAN: wild-memory-access in __list_del_entry include/linux/list.h:135 [inline]
BUG: KASAN: wild-memory-access in list_del include/linux/list.h:146 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
Write of size 8 at addr dead000000000108 by task syz-executor716/334

CPU: 0 PID: 334 Comm: syz-executor716 Not tainted 5.11.0+ #271
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0xbe/0xf9 lib/dump_stack.c:120
 __kasan_report mm/kasan/report.c:400 [inline]
 kasan_report.cold+0x5f/0xd5 mm/kasan/report.c:413
 __list_del include/linux/list.h:112 [inline]
 __list_del_entry include/linux/list.h:135 [inline]
 list_del include/linux/list.h:146 [inline]
 cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
 cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
 cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
 _destroy_id+0x29/0x460 drivers/infiniband/core/cma.c:1862
 ucma_close_id+0x36/0x50 drivers/infiniband/core/ucma.c:185
 ucma_destroy_private_ctx+0x58d/0x5b0 drivers/infiniband/core/ucma.c:576
 ucma_close+0x91/0xd0 drivers/infiniband/core/ucma.c:1797
 __fput+0x169/0x540 fs/file_table.c:280
 task_work_run+0xb7/0x100 kernel/task_work.c:140
 exit_task_work include/linux/task_work.h:30 [inline]
 do_exit+0x7da/0x17f0 kernel/exit.c:825
 do_group_exit+0x9e/0x190 kernel/exit.c:922
 __do_sys_exit_group kernel/exit.c:933 [inline]
 __se_sys_exit_group kernel/exit.c:931 [inline]
 __x64_sys_exit_group+0x2d/0x30 kernel/exit.c:931
 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 255d0c14 ("RDMA/cma: rdma_bind_addr() leaks a cma_dev reference count")
Link: https://lore.kernel.org/r/3352ee288fe34f2b44220457a29bfc0548686363.1620711734.git.leonro@nvidia.comSigned-off-by: default avatarShay Drory <shayd@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent dc07628b
...@@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv) ...@@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
list_del(&id_priv->list); list_del(&id_priv->list);
cma_dev_put(id_priv->cma_dev); cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL; id_priv->cma_dev = NULL;
id_priv->id.device = NULL;
if (id_priv->id.route.addr.dev_addr.sgid_attr) { if (id_priv->id.route.addr.dev_addr.sgid_attr) {
rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
id_priv->id.route.addr.dev_addr.sgid_attr = NULL; id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
...@@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, ...@@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
iw_destroy_cm_id(id_priv->cm_id.iw); iw_destroy_cm_id(id_priv->cm_id.iw);
} }
cma_leave_mc_groups(id_priv); cma_leave_mc_groups(id_priv);
rdma_restrack_del(&id_priv->res);
cma_release_dev(id_priv); cma_release_dev(id_priv);
} }
...@@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, ...@@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
kfree(id_priv->id.route.path_rec); kfree(id_priv->id.route.path_rec);
put_net(id_priv->id.route.addr.dev_addr.net); put_net(id_priv->id.route.addr.dev_addr.net);
rdma_restrack_del(&id_priv->res);
kfree(id_priv); kfree(id_priv);
} }
...@@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ...@@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
} }
id_priv->backlog = backlog; id_priv->backlog = backlog;
if (id->device) { if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id->device, 1)) { if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv); ret = cma_ib_listen(id_priv);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment