Commit 2d55cd5f authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

nvme: replace the kthread with a per-device watchdog timer

The only work left in the kthread is the periodic health check for each
controller.  There is no need to run this from process context or keep
a thread context around for it, so replace it with a simpler timer.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <keith.busch@intel.com>
Reviewed-by: default avatarSagi Grimberg <sagig@mellanox.com>
Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 79f2b358
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/kdev_t.h> #include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -39,6 +38,7 @@ ...@@ -39,6 +38,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/t10-pi.h> #include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
...@@ -64,11 +64,7 @@ static bool use_cmb_sqes = true; ...@@ -64,11 +64,7 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644); module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
static LIST_HEAD(dev_list);
static DEFINE_SPINLOCK(dev_list_lock);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq; static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
struct nvme_dev; struct nvme_dev;
struct nvme_queue; struct nvme_queue;
...@@ -82,7 +78,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); ...@@ -82,7 +78,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
* Represents an NVM Express device. Each nvme_dev is a PCI function. * Represents an NVM Express device. Each nvme_dev is a PCI function.
*/ */
struct nvme_dev { struct nvme_dev {
struct list_head node;
struct nvme_queue **queues; struct nvme_queue **queues;
struct blk_mq_tag_set tagset; struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset; struct blk_mq_tag_set admin_tagset;
...@@ -101,6 +96,7 @@ struct nvme_dev { ...@@ -101,6 +96,7 @@ struct nvme_dev {
struct work_struct scan_work; struct work_struct scan_work;
struct work_struct remove_work; struct work_struct remove_work;
struct work_struct async_work; struct work_struct async_work;
struct timer_list watchdog_timer;
struct mutex shutdown_lock; struct mutex shutdown_lock;
bool subsystem; bool subsystem;
void __iomem *cmb; void __iomem *cmb;
...@@ -1334,36 +1330,26 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1334,36 +1330,26 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result; return result;
} }
static int nvme_kthread(void *data) static void nvme_watchdog_timer(unsigned long data)
{ {
struct nvme_dev *dev, *next; struct nvme_dev *dev = (struct nvme_dev *)data;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
while (!kthread_should_stop()) { /*
set_current_state(TASK_INTERRUPTIBLE); * Skip controllers currently under reset.
spin_lock(&dev_list_lock); */
list_for_each_entry_safe(dev, next, &dev_list, node) { if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
u32 csts = readl(dev->bar + NVME_REG_CSTS); ((csts & NVME_CSTS_CFS) ||
(dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
/* if (queue_work(nvme_workq, &dev->reset_work)) {
* Skip controllers currently under reset. dev_warn(dev->dev,
*/ "Failed status: 0x%x, reset controller.\n",
if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work)) csts);
continue;
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
if (queue_work(nvme_workq, &dev->reset_work)) {
dev_warn(dev->ctrl.device,
"Failed status: %x, reset controller\n",
readl(dev->bar + NVME_REG_CSTS));
}
continue;
}
} }
spin_unlock(&dev_list_lock); return;
schedule_timeout(round_jiffies_relative(HZ));
} }
return 0;
mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
} }
static int nvme_create_io_queues(struct nvme_dev *dev) static int nvme_create_io_queues(struct nvme_dev *dev)
...@@ -1777,56 +1763,12 @@ static void nvme_dev_unmap(struct nvme_dev *dev) ...@@ -1777,56 +1763,12 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
} }
} }
static int nvme_dev_list_add(struct nvme_dev *dev)
{
bool start_thread = false;
spin_lock(&dev_list_lock);
if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
start_thread = true;
nvme_thread = NULL;
}
list_add(&dev->node, &dev_list);
spin_unlock(&dev_list_lock);
if (start_thread) {
nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
wake_up_all(&nvme_kthread_wait);
} else
wait_event_killable(nvme_kthread_wait, nvme_thread);
if (IS_ERR_OR_NULL(nvme_thread))
return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
return 0;
}
/*
* Remove the node from the device list and check
* for whether or not we need to stop the nvme_thread.
*/
static void nvme_dev_list_remove(struct nvme_dev *dev)
{
struct task_struct *tmp = NULL;
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) {
tmp = nvme_thread;
nvme_thread = NULL;
}
spin_unlock(&dev_list_lock);
if (tmp)
kthread_stop(tmp);
}
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{ {
int i; int i;
u32 csts = -1; u32 csts = -1;
nvme_dev_list_remove(dev); del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock); mutex_lock(&dev->shutdown_lock);
if (dev->bar) { if (dev->bar) {
...@@ -1927,9 +1869,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1927,9 +1869,7 @@ static void nvme_reset_work(struct work_struct *work)
dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
queue_work(nvme_workq, &dev->async_work); queue_work(nvme_workq, &dev->async_work);
result = nvme_dev_list_add(dev); mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
if (result)
goto remove;
/* /*
* Keep the controller around but remove all namespaces if we don't have * Keep the controller around but remove all namespaces if we don't have
...@@ -1946,8 +1886,6 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1946,8 +1886,6 @@ static void nvme_reset_work(struct work_struct *work)
clear_bit(NVME_CTRL_RESETTING, &dev->flags); clear_bit(NVME_CTRL_RESETTING, &dev->flags);
return; return;
remove:
nvme_dev_list_remove(dev);
free_tags: free_tags:
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
blk_put_queue(dev->ctrl.admin_q); blk_put_queue(dev->ctrl.admin_q);
...@@ -2055,11 +1993,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2055,11 +1993,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->dev = get_device(&pdev->dev); dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev); pci_set_drvdata(pdev, dev);
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->reset_work, nvme_reset_work); INIT_WORK(&dev->reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
INIT_WORK(&dev->async_work, nvme_async_event_work); INIT_WORK(&dev->async_work, nvme_async_event_work);
setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
(unsigned long)dev);
mutex_init(&dev->shutdown_lock); mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait); init_completion(&dev->ioq_wait);
...@@ -2108,9 +2047,7 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2108,9 +2047,7 @@ static void nvme_remove(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
spin_lock(&dev_list_lock); del_timer_sync(&dev->watchdog_timer);
list_del_init(&dev->node);
spin_unlock(&dev_list_lock);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
flush_work(&dev->async_work); flush_work(&dev->async_work);
...@@ -2223,8 +2160,6 @@ static int __init nvme_init(void) ...@@ -2223,8 +2160,6 @@ static int __init nvme_init(void)
{ {
int result; int result;
init_waitqueue_head(&nvme_kthread_wait);
nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!nvme_workq) if (!nvme_workq)
return -ENOMEM; return -ENOMEM;
...@@ -2239,7 +2174,6 @@ static void __exit nvme_exit(void) ...@@ -2239,7 +2174,6 @@ static void __exit nvme_exit(void)
{ {
pci_unregister_driver(&nvme_driver); pci_unregister_driver(&nvme_driver);
destroy_workqueue(nvme_workq); destroy_workqueue(nvme_workq);
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size(); _nvme_check_size();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment