Commit 8407879c authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Christoph Hellwig

nvmet-rdma: fix possible bogus dereference under heavy load

Currently we always repost the recv buffer before we send a response
capsule back to the host. Since ordering is not guaranteed for send
and recv completions, it is posible that we will receive a new request
from the host before we got a send completion for the response capsule.

Today, we pre-allocate 2x rsps the length of the queue, but in reality,
under heavy load there is nothing that is really preventing the gap to
expand until we exhaust all our rsps.

To fix this, if we don't have any pre-allocated rsps left, we dynamically
allocate a rsp and make sure to free it when we are done. If under memory
pressure we fail to allocate a rsp, we silently drop the command and
wait for the host to retry.
Reported-by: default avatarSteve Wise <swise@opengridcomputing.com>
Tested-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
[hch: dropped a superflous assignment]
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent bc811f05
...@@ -66,6 +66,7 @@ struct nvmet_rdma_rsp { ...@@ -66,6 +66,7 @@ struct nvmet_rdma_rsp {
struct nvmet_req req; struct nvmet_req req;
bool allocated;
u8 n_rdma; u8 n_rdma;
u32 flags; u32 flags;
u32 invalidate_rkey; u32 invalidate_rkey;
...@@ -174,11 +175,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) ...@@ -174,11 +175,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&queue->rsps_lock, flags); spin_lock_irqsave(&queue->rsps_lock, flags);
rsp = list_first_entry(&queue->free_rsps, rsp = list_first_entry_or_null(&queue->free_rsps,
struct nvmet_rdma_rsp, free_list); struct nvmet_rdma_rsp, free_list);
if (likely(rsp))
list_del(&rsp->free_list); list_del(&rsp->free_list);
spin_unlock_irqrestore(&queue->rsps_lock, flags); spin_unlock_irqrestore(&queue->rsps_lock, flags);
if (unlikely(!rsp)) {
rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp))
return NULL;
rsp->allocated = true;
}
return rsp; return rsp;
} }
...@@ -187,6 +196,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) ...@@ -187,6 +196,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{ {
unsigned long flags; unsigned long flags;
if (rsp->allocated) {
kfree(rsp);
return;
}
spin_lock_irqsave(&rsp->queue->rsps_lock, flags); spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
...@@ -776,6 +790,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -776,6 +790,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
cmd->queue = queue; cmd->queue = queue;
rsp = nvmet_rdma_get_rsp(queue); rsp = nvmet_rdma_get_rsp(queue);
if (unlikely(!rsp)) {
/*
* we get here only under memory pressure,
* silently drop and have the host retry
* as we can't even fail it.
*/
nvmet_rdma_post_recv(queue->dev, cmd);
return;
}
rsp->queue = queue; rsp->queue = queue;
rsp->cmd = cmd; rsp->cmd = cmd;
rsp->flags = 0; rsp->flags = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment