Commit 5110f402 authored by Sagi Grimberg's avatar Sagi Grimberg

nvme-rdma: serialize controller teardown sequences

In the timeout handler we may need to complete a request because the
request that timed out may be an I/O that is a part of a serial sequence
of controller teardown or initialization. In order to complete the
request, we need to fence any other context that may compete with us
and complete the request that is timing out.

In this case, we could have a potential double completion in case
a hard-irq or a different competing context triggered error recovery
and is running inflight request cancellation concurrently with the
timeout handler.

Protect using a ctrl teardown_lock to serialize contexts that may
complete a cancelled request due to error recovery or a reset.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarJames Smart <james.smart@broadcom.com>
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
parent e5c01f4f
...@@ -122,6 +122,7 @@ struct nvme_rdma_ctrl { ...@@ -122,6 +122,7 @@ struct nvme_rdma_ctrl {
struct sockaddr_storage src_addr; struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl; struct nvme_ctrl ctrl;
struct mutex teardown_lock;
bool use_inline_data; bool use_inline_data;
u32 io_queues[HCTX_MAX_TYPES]; u32 io_queues[HCTX_MAX_TYPES];
}; };
...@@ -997,6 +998,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -997,6 +998,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove) bool remove)
{ {
mutex_lock(&ctrl->teardown_lock);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_stop_queue(&ctrl->queues[0]);
if (ctrl->ctrl.admin_tagset) { if (ctrl->ctrl.admin_tagset) {
...@@ -1007,11 +1009,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -1007,11 +1009,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (remove) if (remove)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, remove); nvme_rdma_destroy_admin_queue(ctrl, remove);
mutex_unlock(&ctrl->teardown_lock);
} }
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove) bool remove)
{ {
mutex_lock(&ctrl->teardown_lock);
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl); nvme_start_freeze(&ctrl->ctrl);
nvme_stop_queues(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl);
...@@ -1025,6 +1029,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, ...@@ -1025,6 +1029,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
nvme_start_queues(&ctrl->ctrl); nvme_start_queues(&ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, remove); nvme_rdma_destroy_io_queues(ctrl, remove);
} }
mutex_unlock(&ctrl->teardown_lock);
} }
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
...@@ -2278,6 +2283,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ...@@ -2278,6 +2283,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ctrl->ctrl.opts = opts; ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list); INIT_LIST_HEAD(&ctrl->list);
mutex_init(&ctrl->teardown_lock);
if (!(opts->mask & NVMF_OPT_TRSVCID)) { if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid = opts->trsvcid =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment