Commit 07836e65 authored by Keith Busch's avatar Keith Busch

NVMe: Fix potential corruption during shutdown

The driver has to end unreturned commands at some point even if the
controller has not provided a completion. The driver tried to be safe by
deleting IO queues prior to ending all unreturned commands. That should
cause the controller to internally abort inflight commands, but IO queue
deletion request does not have to be successful, so all bets are off. We
still have to make progress, so to be extra safe, this patch doesn't
clear a queue to release the dma mapping for a command until after the
pci device has been disabled.

This patch removes the special handling during device initialization
so controller recovery can be done all the time. This is possible since
initialization is not inlined with pci probe anymore.
Reported-by: default avatarNilish Choudhury <nilesh.choudhury@oracle.com>
Signed-off-by: default avatarKeith Busch <keith.busch@intel.com>
parent 2e1d8448
...@@ -1274,29 +1274,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) ...@@ -1274,29 +1274,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = cmd->nvmeq; struct nvme_queue *nvmeq = cmd->nvmeq;
/*
* The aborted req will be completed on receiving the abort req.
* We enable the timer again. If hit twice, it'll cause a device reset,
* as the device then is in a faulty state.
*/
int ret = BLK_EH_RESET_TIMER;
dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
nvmeq->qid); nvmeq->qid);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (!nvmeq->dev->initialized) {
/*
* Force cancelled command frees the request, which requires we
* return BLK_EH_NOT_HANDLED.
*/
nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
ret = BLK_EH_NOT_HANDLED;
} else
nvme_abort_req(req); nvme_abort_req(req);
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
return ret; /*
* The aborted req will be completed on receiving the abort req.
* We enable the timer again. If hit twice, it'll cause a device reset,
* as the device then is in a faulty state.
*/
return BLK_EH_RESET_TIMER;
} }
static void nvme_free_queue(struct nvme_queue *nvmeq) static void nvme_free_queue(struct nvme_queue *nvmeq)
...@@ -1349,7 +1338,6 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq) ...@@ -1349,7 +1338,6 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
struct blk_mq_hw_ctx *hctx = nvmeq->hctx; struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
if (hctx && hctx->tags) if (hctx && hctx->tags)
blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
...@@ -1372,7 +1360,10 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) ...@@ -1372,7 +1360,10 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
} }
if (!qid && dev->admin_q) if (!qid && dev->admin_q)
blk_mq_freeze_queue_start(dev->admin_q); blk_mq_freeze_queue_start(dev->admin_q);
nvme_clear_queue(nvmeq);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
} }
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
...@@ -2121,8 +2112,7 @@ static int nvme_kthread(void *data) ...@@ -2121,8 +2112,7 @@ static int nvme_kthread(void *data)
spin_lock(&dev_list_lock); spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) { list_for_each_entry_safe(dev, next, &dev_list, node) {
int i; int i;
if (readl(&dev->bar->csts) & NVME_CSTS_CFS && if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
dev->initialized) {
if (work_busy(&dev->reset_work)) if (work_busy(&dev->reset_work))
continue; continue;
list_del_init(&dev->node); list_del_init(&dev->node);
...@@ -2525,8 +2515,6 @@ static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq) ...@@ -2525,8 +2515,6 @@ static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
static void nvme_del_queue_end(struct nvme_queue *nvmeq) static void nvme_del_queue_end(struct nvme_queue *nvmeq)
{ {
struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
nvme_clear_queue(nvmeq);
nvme_put_dq(dq); nvme_put_dq(dq);
} }
...@@ -2669,7 +2657,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) ...@@ -2669,7 +2657,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
int i; int i;
u32 csts = -1; u32 csts = -1;
dev->initialized = 0;
nvme_dev_list_remove(dev); nvme_dev_list_remove(dev);
if (dev->bar) { if (dev->bar) {
...@@ -2680,7 +2667,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) ...@@ -2680,7 +2667,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
for (i = dev->queue_count - 1; i >= 0; i--) { for (i = dev->queue_count - 1; i >= 0; i--) {
struct nvme_queue *nvmeq = dev->queues[i]; struct nvme_queue *nvmeq = dev->queues[i];
nvme_suspend_queue(nvmeq); nvme_suspend_queue(nvmeq);
nvme_clear_queue(nvmeq);
} }
} else { } else {
nvme_disable_io_queues(dev); nvme_disable_io_queues(dev);
...@@ -2688,6 +2674,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) ...@@ -2688,6 +2674,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
nvme_disable_queue(dev, 0); nvme_disable_queue(dev, 0);
} }
nvme_dev_unmap(dev); nvme_dev_unmap(dev);
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_clear_queue(dev->queues[i]);
} }
static void nvme_dev_remove(struct nvme_dev *dev) static void nvme_dev_remove(struct nvme_dev *dev)
...@@ -2955,7 +2944,6 @@ static int nvme_dev_resume(struct nvme_dev *dev) ...@@ -2955,7 +2944,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
nvme_unfreeze_queues(dev); nvme_unfreeze_queues(dev);
nvme_set_irq_hints(dev); nvme_set_irq_hints(dev);
} }
dev->initialized = 1;
return 0; return 0;
} }
...@@ -3063,11 +3051,12 @@ static void nvme_async_probe(struct work_struct *work) ...@@ -3063,11 +3051,12 @@ static void nvme_async_probe(struct work_struct *work)
goto reset; goto reset;
nvme_set_irq_hints(dev); nvme_set_irq_hints(dev);
dev->initialized = 1;
return; return;
reset: reset:
if (!work_busy(&dev->reset_work)) {
dev->reset_workfn = nvme_reset_failed_dev; dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work); queue_work(nvme_workq, &dev->reset_work);
}
} }
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
......
...@@ -103,7 +103,6 @@ struct nvme_dev { ...@@ -103,7 +103,6 @@ struct nvme_dev {
u16 abort_limit; u16 abort_limit;
u8 event_limit; u8 event_limit;
u8 vwc; u8 vwc;
u8 initialized;
}; };
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment