Commit 8f3ea359 authored by Josef Bacik's avatar Josef Bacik Committed by Jens Axboe

nbd: handle unexpected replies better

If the server or network is misbehaving and we get an unexpected reply
we can sometimes miss the request not being started and wait on a
request and never get a response, or even double complete the same
request.  Fix this by replacing the send_complete completion with just a
per command lock.  Add a per command cookie as well so that we can know
if we're getting a double completion for a previous event.  Also check
to make sure we dont have REQUEUED set as that means we raced with the
timeout handler and need to just let the retry occur.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d7d94d48
...@@ -116,11 +116,12 @@ struct nbd_device { ...@@ -116,11 +116,12 @@ struct nbd_device {
struct nbd_cmd { struct nbd_cmd {
struct nbd_device *nbd; struct nbd_device *nbd;
struct mutex lock;
int index; int index;
int cookie; int cookie;
struct completion send_complete;
blk_status_t status; blk_status_t status;
unsigned long flags; unsigned long flags;
u32 cmd_cookie;
}; };
#if IS_ENABLED(CONFIG_DEBUG_FS) #if IS_ENABLED(CONFIG_DEBUG_FS)
...@@ -157,6 +158,27 @@ static void nbd_requeue_cmd(struct nbd_cmd *cmd) ...@@ -157,6 +158,27 @@ static void nbd_requeue_cmd(struct nbd_cmd *cmd)
blk_mq_requeue_request(req, true); blk_mq_requeue_request(req, true);
} }
#define NBD_COOKIE_BITS 32
static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
u32 tag = blk_mq_unique_tag(req);
u64 cookie = cmd->cmd_cookie;
return (cookie << NBD_COOKIE_BITS) | tag;
}
static u32 nbd_handle_to_tag(u64 handle)
{
return (u32)handle;
}
static u32 nbd_handle_to_cookie(u64 handle)
{
return (u32)(handle >> NBD_COOKIE_BITS);
}
static const char *nbdcmd_to_ascii(int cmd) static const char *nbdcmd_to_ascii(int cmd)
{ {
switch (cmd) { switch (cmd) {
...@@ -330,6 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, ...@@ -330,6 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
} }
config = nbd->config; config = nbd->config;
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
if (config->num_connections > 1) { if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd), dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n", "Connection timed out, retrying (%d/%d alive)\n",
...@@ -354,6 +379,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, ...@@ -354,6 +379,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
nbd_mark_nsock_dead(nbd, nsock, 1); nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock); mutex_unlock(&nsock->tx_lock);
} }
mutex_unlock(&cmd->lock);
nbd_requeue_cmd(cmd); nbd_requeue_cmd(cmd);
nbd_config_put(nbd); nbd_config_put(nbd);
return BLK_EH_DONE; return BLK_EH_DONE;
...@@ -364,6 +390,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, ...@@ -364,6 +390,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
} }
set_bit(NBD_TIMEDOUT, &config->runtime_flags); set_bit(NBD_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR; cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
sock_shutdown(nbd); sock_shutdown(nbd);
nbd_config_put(nbd); nbd_config_put(nbd);
done: done:
...@@ -441,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -441,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct iov_iter from; struct iov_iter from;
unsigned long size = blk_rq_bytes(req); unsigned long size = blk_rq_bytes(req);
struct bio *bio; struct bio *bio;
u64 handle;
u32 type; u32 type;
u32 nbd_cmd_flags = 0; u32 nbd_cmd_flags = 0;
u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0; int sent = nsock->sent, skip = 0;
iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
...@@ -485,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -485,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
goto send_pages; goto send_pages;
} }
iov_iter_advance(&from, sent); iov_iter_advance(&from, sent);
} else {
cmd->cmd_cookie++;
} }
cmd->index = index; cmd->index = index;
cmd->cookie = nsock->cookie; cmd->cookie = nsock->cookie;
...@@ -493,7 +522,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -493,7 +522,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size); request.len = htonl(size);
} }
memcpy(request.handle, &tag, sizeof(tag)); handle = nbd_cmd_handle(cmd);
memcpy(request.handle, &handle, sizeof(handle));
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
req, nbdcmd_to_ascii(type), req, nbdcmd_to_ascii(type),
...@@ -586,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -586,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct nbd_reply reply; struct nbd_reply reply;
struct nbd_cmd *cmd; struct nbd_cmd *cmd;
struct request *req = NULL; struct request *req = NULL;
u64 handle;
u16 hwq; u16 hwq;
u32 tag; u32 tag;
struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
struct iov_iter to; struct iov_iter to;
int ret = 0;
reply.magic = 0; reply.magic = 0;
iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
...@@ -607,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -607,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-EPROTO); return ERR_PTR(-EPROTO);
} }
memcpy(&tag, reply.handle, sizeof(u32)); memcpy(&handle, reply.handle, sizeof(handle));
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag); hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues) if (hwq < nbd->tag_set.nr_hw_queues)
req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
...@@ -619,11 +651,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -619,11 +651,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
} }
cmd = blk_mq_rq_to_pdu(req); cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
ret = -ENOENT;
goto out;
}
if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
req);
ret = -ENOENT;
goto out;
}
if (ntohl(reply.error)) { if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error)); ntohl(reply.error));
cmd->status = BLK_STS_IOERR; cmd->status = BLK_STS_IOERR;
return cmd; goto out;
} }
dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
...@@ -648,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -648,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (nbd_disconnected(config) || if (nbd_disconnected(config) ||
config->num_connections <= 1) { config->num_connections <= 1) {
cmd->status = BLK_STS_IOERR; cmd->status = BLK_STS_IOERR;
return cmd; goto out;
} }
return ERR_PTR(-EIO); ret = -EIO;
goto out;
} }
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
req, bvec.bv_len); req, bvec.bv_len);
} }
} else {
/* See the comment in nbd_queue_rq. */
wait_for_completion(&cmd->send_complete);
} }
return cmd; out:
mutex_unlock(&cmd->lock);
return ret ? ERR_PTR(ret) : cmd;
} }
static void recv_work(struct work_struct *work) static void recv_work(struct work_struct *work)
...@@ -855,7 +901,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -855,7 +901,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* that the server is misbehaving (or there was an error) before we're * that the server is misbehaving (or there was an error) before we're
* done sending everything over the wire. * done sending everything over the wire.
*/ */
init_completion(&cmd->send_complete); mutex_lock(&cmd->lock);
clear_bit(NBD_CMD_REQUEUED, &cmd->flags); clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
/* We can be called directly from the user space process, which means we /* We can be called directly from the user space process, which means we
...@@ -868,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -868,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
ret = BLK_STS_IOERR; ret = BLK_STS_IOERR;
else if (!ret) else if (!ret)
ret = BLK_STS_OK; ret = BLK_STS_OK;
complete(&cmd->send_complete); mutex_unlock(&cmd->lock);
return ret; return ret;
} }
...@@ -1475,6 +1521,7 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, ...@@ -1475,6 +1521,7 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->nbd = set->driver_data; cmd->nbd = set->driver_data;
cmd->flags = 0; cmd->flags = 0;
mutex_init(&cmd->lock);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment