Commit c55b2b98 authored by Yu Kuai's avatar Yu Kuai Committed by Jens Axboe

nbd: fix race between nbd_alloc_config() and module removal

When nbd module is being removing, nbd_alloc_config() may be
called concurrently by nbd_genl_connect(), although try_module_get()
will return false, but nbd_alloc_config() doesn't handle it.

The race may lead to the leak of nbd_config and its related
resources (e.g, recv_workq) and oops in nbd_read_stat() due
to the unload of nbd module as shown below:

  BUG: kernel NULL pointer dereference, address: 0000000000000040
  Oops: 0000 [#1] SMP PTI
  CPU: 5 PID: 13840 Comm: kworker/u17:33 Not tainted 5.14.0+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
  Workqueue: knbd16-recv recv_work [nbd]
  RIP: 0010:nbd_read_stat.cold+0x130/0x1a4 [nbd]
  Call Trace:
   recv_work+0x3b/0xb0 [nbd]
   process_one_work+0x1ed/0x390
   worker_thread+0x4a/0x3d0
   kthread+0x12a/0x150
   ret_from_fork+0x22/0x30

Fixing it by checking the return value of try_module_get()
in nbd_alloc_config(). As nbd_alloc_config() may return ERR_PTR(-ENODEV),
assign nbd->config only when nbd_alloc_config() succeeds to ensure
the value of nbd->config is binary (valid or NULL).

Also adding a debug message to check the reference counter
of nbd_config during module removal.
Signed-off-by: default avatarHou Tao <houtao1@huawei.com>
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20220521073749.3146892-3-yukuai3@huawei.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 06c4da89
...@@ -1518,15 +1518,20 @@ static struct nbd_config *nbd_alloc_config(void) ...@@ -1518,15 +1518,20 @@ static struct nbd_config *nbd_alloc_config(void)
{ {
struct nbd_config *config; struct nbd_config *config;
if (!try_module_get(THIS_MODULE))
return ERR_PTR(-ENODEV);
config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
if (!config) if (!config) {
return NULL; module_put(THIS_MODULE);
return ERR_PTR(-ENOMEM);
}
atomic_set(&config->recv_threads, 0); atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq); init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait); init_waitqueue_head(&config->conn_wait);
config->blksize_bits = NBD_DEF_BLKSIZE_BITS; config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0); atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config; return config;
} }
...@@ -1553,12 +1558,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) ...@@ -1553,12 +1558,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
goto out; goto out;
} }
config = nbd->config = nbd_alloc_config(); config = nbd_alloc_config();
if (!config) { if (IS_ERR(config)) {
ret = -ENOMEM; ret = PTR_ERR(config);
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
goto out; goto out;
} }
nbd->config = config;
refcount_set(&nbd->config_refs, 1); refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs); refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
...@@ -1964,13 +1970,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) ...@@ -1964,13 +1970,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
nbd_put(nbd); nbd_put(nbd);
return -EINVAL; return -EINVAL;
} }
config = nbd->config = nbd_alloc_config(); config = nbd_alloc_config();
if (!nbd->config) { if (IS_ERR(config)) {
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
nbd_put(nbd); nbd_put(nbd);
printk(KERN_ERR "nbd: couldn't allocate config\n"); printk(KERN_ERR "nbd: couldn't allocate config\n");
return -ENOMEM; return PTR_ERR(config);
} }
nbd->config = config;
refcount_set(&nbd->config_refs, 1); refcount_set(&nbd->config_refs, 1);
set_bit(NBD_RT_BOUND, &config->runtime_flags); set_bit(NBD_RT_BOUND, &config->runtime_flags);
...@@ -2543,6 +2550,9 @@ static void __exit nbd_cleanup(void) ...@@ -2543,6 +2550,9 @@ static void __exit nbd_cleanup(void)
while (!list_empty(&del_list)) { while (!list_empty(&del_list)) {
nbd = list_first_entry(&del_list, struct nbd_device, list); nbd = list_first_entry(&del_list, struct nbd_device, list);
list_del_init(&nbd->list); list_del_init(&nbd->list);
if (refcount_read(&nbd->config_refs))
printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
refcount_read(&nbd->config_refs));
if (refcount_read(&nbd->refs) != 1) if (refcount_read(&nbd->refs) != 1)
printk(KERN_ERR "nbd: possibly leaking a device\n"); printk(KERN_ERR "nbd: possibly leaking a device\n");
nbd_put(nbd); nbd_put(nbd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment