Commit f3db22fe authored by Keith Busch's avatar Keith Busch Committed by Matthew Wilcox

NVMe: Fix hot cpu notification dead lock

There is a potential dead lock if a cpu event occurs during nvme probe
since it registered with hot cpu notification. This fixes the race by
having the module register with notification outside of probe rather
than have each device register.

The actual work is done in a scheduled work queue instead of in the
notifier since assigning IO queues has the potential to block if the
driver creates additional queues.
Signed-off-by: default avatarKeith Busch <keith.busch@intel.com>
Signed-off-by: default avatarMatthew Wilcox <matthew.r.wilcox@intel.com>
parent bd67608a
...@@ -73,6 +73,7 @@ static LIST_HEAD(dev_list); ...@@ -73,6 +73,7 @@ static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread; static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq; static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait; static wait_queue_head_t nvme_kthread_wait;
static struct notifier_block nvme_nb;
static void nvme_reset_failed_dev(struct work_struct *ws); static void nvme_reset_failed_dev(struct work_struct *ws);
...@@ -2115,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) ...@@ -2115,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
} }
static void nvme_cpu_workfn(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
if (dev->initialized)
nvme_assign_io_queues(dev);
}
static int nvme_cpu_notify(struct notifier_block *self, static int nvme_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); struct nvme_dev *dev;
switch (action) { switch (action) {
case CPU_ONLINE: case CPU_ONLINE:
case CPU_DEAD: case CPU_DEAD:
nvme_assign_io_queues(dev); spin_lock(&dev_list_lock);
list_for_each_entry(dev, &dev_list, node)
schedule_work(&dev->cpu_work);
spin_unlock(&dev_list_lock);
break; break;
} }
return NOTIFY_OK; return NOTIFY_OK;
...@@ -2191,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2191,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_free_queues(dev, nr_io_queues + 1); nvme_free_queues(dev, nr_io_queues + 1);
nvme_assign_io_queues(dev); nvme_assign_io_queues(dev);
dev->nb.notifier_call = &nvme_cpu_notify;
result = register_hotcpu_notifier(&dev->nb);
if (result)
goto free_queues;
return 0; return 0;
free_queues: free_queues:
...@@ -2495,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) ...@@ -2495,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
int i; int i;
dev->initialized = 0; dev->initialized = 0;
unregister_hotcpu_notifier(&dev->nb);
nvme_dev_list_remove(dev); nvme_dev_list_remove(dev);
if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
...@@ -2767,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2767,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&dev->namespaces); INIT_LIST_HEAD(&dev->namespaces);
dev->reset_workfn = nvme_reset_failed_dev; dev->reset_workfn = nvme_reset_failed_dev;
INIT_WORK(&dev->reset_work, nvme_reset_workfn); INIT_WORK(&dev->reset_work, nvme_reset_workfn);
INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
dev->pci_dev = pdev; dev->pci_dev = pdev;
pci_set_drvdata(pdev, dev); pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev); result = nvme_set_instance(dev);
...@@ -2836,6 +2842,7 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2836,6 +2842,7 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
flush_work(&dev->reset_work); flush_work(&dev->reset_work);
flush_work(&dev->cpu_work);
misc_deregister(&dev->miscdev); misc_deregister(&dev->miscdev);
nvme_dev_remove(dev); nvme_dev_remove(dev);
nvme_dev_shutdown(dev); nvme_dev_shutdown(dev);
...@@ -2923,11 +2930,18 @@ static int __init nvme_init(void) ...@@ -2923,11 +2930,18 @@ static int __init nvme_init(void)
else if (result > 0) else if (result > 0)
nvme_major = result; nvme_major = result;
result = pci_register_driver(&nvme_driver); nvme_nb.notifier_call = &nvme_cpu_notify;
result = register_hotcpu_notifier(&nvme_nb);
if (result) if (result)
goto unregister_blkdev; goto unregister_blkdev;
result = pci_register_driver(&nvme_driver);
if (result)
goto unregister_hotcpu;
return 0; return 0;
unregister_hotcpu:
unregister_hotcpu_notifier(&nvme_nb);
unregister_blkdev: unregister_blkdev:
unregister_blkdev(nvme_major, "nvme"); unregister_blkdev(nvme_major, "nvme");
kill_workq: kill_workq:
...@@ -2938,6 +2952,7 @@ static int __init nvme_init(void) ...@@ -2938,6 +2952,7 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void) static void __exit nvme_exit(void)
{ {
pci_unregister_driver(&nvme_driver); pci_unregister_driver(&nvme_driver);
unregister_hotcpu_notifier(&nvme_nb);
unregister_blkdev(nvme_major, "nvme"); unregister_blkdev(nvme_major, "nvme");
destroy_workqueue(nvme_workq); destroy_workqueue(nvme_workq);
BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
......
...@@ -90,7 +90,7 @@ struct nvme_dev { ...@@ -90,7 +90,7 @@ struct nvme_dev {
struct miscdevice miscdev; struct miscdevice miscdev;
work_func_t reset_workfn; work_func_t reset_workfn;
struct work_struct reset_work; struct work_struct reset_work;
struct notifier_block nb; struct work_struct cpu_work;
char name[12]; char name[12];
char serial[20]; char serial[20];
char model[40]; char model[40];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment