Commit 70f437fb authored by Keith Busch's avatar Keith Busch Committed by Christoph Hellwig

nvme-tcp: fix io_work priority inversion

Dispatching requests inline with the .queue_rq() call may block while
holding the send_mutex. If the tcp io_work also happens to schedule, it
may see the req_list is non-empty, leaving "pending" true and remaining
in TASK_RUNNING. Since io_work is of higher scheduling priority, the
.queue_rq task may not get a chance to run, blocking forward progress
and leading to io timeouts.

Instead of checking for pending requests within io_work, let the queueing
restart io_work outside the send_mutex lock if there is more work to be
done.

Fixes: a0fdd141 ("nvme-tcp: rerun io_work if req_list is not empty")
Reported-by: default avatarSamuel Jones <sjones@kalrayinc.com>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 9817d763
...@@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) ...@@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
} while (ret > 0); } while (ret > 0);
} }
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list) || queue->more_requests;
}
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync, bool last) bool sync, bool last)
{ {
...@@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, ...@@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
nvme_tcp_send_all(queue); nvme_tcp_send_all(queue);
queue->more_requests = false; queue->more_requests = false;
mutex_unlock(&queue->send_mutex); mutex_unlock(&queue->send_mutex);
} else if (last) {
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
} }
if (last && nvme_tcp_queue_more(queue))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
} }
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
...@@ -906,12 +913,6 @@ static void nvme_tcp_state_change(struct sock *sk) ...@@ -906,12 +913,6 @@ static void nvme_tcp_state_change(struct sock *sk)
read_unlock_bh(&sk->sk_callback_lock); read_unlock_bh(&sk->sk_callback_lock);
} }
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list) || queue->more_requests;
}
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{ {
queue->request = NULL; queue->request = NULL;
...@@ -1145,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w) ...@@ -1145,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
pending = true; pending = true;
else if (unlikely(result < 0)) else if (unlikely(result < 0))
break; break;
} else }
pending = !llist_empty(&queue->req_list);
result = nvme_tcp_try_recv(queue); result = nvme_tcp_try_recv(queue);
if (result > 0) if (result > 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment