Commit b2750f14 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-5.17-2022-02-24' of git://git.infradead.org/nvme into block-5.17

Pull NVMe fixes from Christoph:

"nvme fixes for Linux 5.17

 - send H2CData PDUs based on MAXH2CDATA (Varun Prakash)
 - fix passthrough to namespaces with unsupported features (me)"

* tag 'nvme-5.17-2022-02-24' of git://git.infradead.org/nvme:
  nvme-tcp: send H2CData PDUs based on MAXH2CDATA
  nvme: also mark passthrough-only namespaces ready in nvme_update_ns_info
  nvme: don't return an error from nvme_configure_metadata
parents bb49c6fa c2700d28
...@@ -1723,7 +1723,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1723,7 +1723,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return 0; return 0;
} }
static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
...@@ -1739,7 +1739,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1739,7 +1739,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
return 0; return;
if (ctrl->ops->flags & NVME_F_FABRICS) { if (ctrl->ops->flags & NVME_F_FABRICS) {
/* /*
* The NVMe over Fabrics specification only supports metadata as * The NVMe over Fabrics specification only supports metadata as
...@@ -1747,7 +1748,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1747,7 +1748,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
* remap the separate metadata buffer from the block layer. * remap the separate metadata buffer from the block layer.
*/ */
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
return -EINVAL; return;
ns->features |= NVME_NS_EXT_LBAS; ns->features |= NVME_NS_EXT_LBAS;
...@@ -1774,8 +1775,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1774,8 +1775,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
else else
ns->features |= NVME_NS_METADATA_SUPPORTED; ns->features |= NVME_NS_METADATA_SUPPORTED;
} }
return 0;
} }
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
...@@ -1916,9 +1915,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1916,9 +1915,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
ns->lba_shift = id->lbaf[lbaf].ds; ns->lba_shift = id->lbaf[lbaf].ds;
nvme_set_queue_limits(ns->ctrl, ns->queue); nvme_set_queue_limits(ns->ctrl, ns->queue);
ret = nvme_configure_metadata(ns, id); nvme_configure_metadata(ns, id);
if (ret)
goto out_unfreeze;
nvme_set_chunk_sectors(ns, id); nvme_set_chunk_sectors(ns, id);
nvme_update_disk_info(ns->disk, ns, id); nvme_update_disk_info(ns->disk, ns, id);
...@@ -1934,7 +1931,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1934,7 +1931,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (blk_queue_is_zoned(ns->queue)) { if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns); ret = nvme_revalidate_zones(ns);
if (ret && !nvme_first_scan(ns->disk)) if (ret && !nvme_first_scan(ns->disk))
goto out; return ret;
} }
if (nvme_ns_head_multipath(ns->head)) { if (nvme_ns_head_multipath(ns->head)) {
...@@ -1949,16 +1946,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -1949,16 +1946,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
return 0; return 0;
out_unfreeze: out_unfreeze:
blk_mq_unfreeze_queue(ns->disk->queue);
out:
/* /*
* If probing fails due an unsupported feature, hide the block device, * If probing fails due an unsupported feature, hide the block device,
* but still allow other access. * but still allow other access.
*/ */
if (ret == -ENODEV) { if (ret == -ENODEV) {
ns->disk->flags |= GENHD_FL_HIDDEN; ns->disk->flags |= GENHD_FL_HIDDEN;
set_bit(NVME_NS_READY, &ns->flags);
ret = 0; ret = 0;
} }
blk_mq_unfreeze_queue(ns->disk->queue);
return ret; return ret;
} }
......
...@@ -44,6 +44,8 @@ struct nvme_tcp_request { ...@@ -44,6 +44,8 @@ struct nvme_tcp_request {
u32 data_len; u32 data_len;
u32 pdu_len; u32 pdu_len;
u32 pdu_sent; u32 pdu_sent;
u32 h2cdata_left;
u32 h2cdata_offset;
u16 ttag; u16 ttag;
__le16 status; __le16 status;
struct list_head entry; struct list_head entry;
...@@ -95,6 +97,7 @@ struct nvme_tcp_queue { ...@@ -95,6 +97,7 @@ struct nvme_tcp_queue {
struct nvme_tcp_request *request; struct nvme_tcp_request *request;
int queue_size; int queue_size;
u32 maxh2cdata;
size_t cmnd_capsule_len; size_t cmnd_capsule_len;
struct nvme_tcp_ctrl *ctrl; struct nvme_tcp_ctrl *ctrl;
unsigned long flags; unsigned long flags;
...@@ -572,22 +575,25 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, ...@@ -572,22 +575,25 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
return ret; return ret;
} }
static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
struct nvme_tcp_r2t_pdu *pdu)
{ {
struct nvme_tcp_data_pdu *data = req->pdu; struct nvme_tcp_data_pdu *data = req->pdu;
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
struct request *rq = blk_mq_rq_from_pdu(req); struct request *rq = blk_mq_rq_from_pdu(req);
u32 h2cdata_sent = req->pdu_len;
u8 hdgst = nvme_tcp_hdgst_len(queue); u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue); u8 ddgst = nvme_tcp_ddgst_len(queue);
req->state = NVME_TCP_SEND_H2C_PDU; req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0; req->offset = 0;
req->pdu_len = le32_to_cpu(pdu->r2t_length); req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
req->pdu_sent = 0; req->pdu_sent = 0;
req->h2cdata_left -= req->pdu_len;
req->h2cdata_offset += h2cdata_sent;
memset(data, 0, sizeof(*data)); memset(data, 0, sizeof(*data));
data->hdr.type = nvme_tcp_h2c_data; data->hdr.type = nvme_tcp_h2c_data;
if (!req->h2cdata_left)
data->hdr.flags = NVME_TCP_F_DATA_LAST; data->hdr.flags = NVME_TCP_F_DATA_LAST;
if (queue->hdr_digest) if (queue->hdr_digest)
data->hdr.flags |= NVME_TCP_F_HDGST; data->hdr.flags |= NVME_TCP_F_HDGST;
...@@ -597,9 +603,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, ...@@ -597,9 +603,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->hdr.pdo = data->hdr.hlen + hdgst; data->hdr.pdo = data->hdr.hlen + hdgst;
data->hdr.plen = data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag; data->ttag = req->ttag;
data->command_id = nvme_cid(rq); data->command_id = nvme_cid(rq);
data->data_offset = pdu->r2t_offset; data->data_offset = cpu_to_le32(req->h2cdata_offset);
data->data_length = cpu_to_le32(req->pdu_len); data->data_length = cpu_to_le32(req->pdu_len);
} }
...@@ -609,6 +615,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, ...@@ -609,6 +615,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct nvme_tcp_request *req; struct nvme_tcp_request *req;
struct request *rq; struct request *rq;
u32 r2t_length = le32_to_cpu(pdu->r2t_length); u32 r2t_length = le32_to_cpu(pdu->r2t_length);
u32 r2t_offset = le32_to_cpu(pdu->r2t_offset);
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) { if (!rq) {
...@@ -633,14 +640,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, ...@@ -633,14 +640,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return -EPROTO; return -EPROTO;
} }
if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) { if (unlikely(r2t_offset < req->data_sent)) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"req %d unexpected r2t offset %u (expected %zu)\n", "req %d unexpected r2t offset %u (expected %zu)\n",
rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent); rq->tag, r2t_offset, req->data_sent);
return -EPROTO; return -EPROTO;
} }
nvme_tcp_setup_h2c_data_pdu(req, pdu); req->pdu_len = 0;
req->h2cdata_left = r2t_length;
req->h2cdata_offset = r2t_offset;
req->ttag = pdu->ttag;
nvme_tcp_setup_h2c_data_pdu(req);
nvme_tcp_queue_request(req, false, true); nvme_tcp_queue_request(req, false, true);
return 0; return 0;
...@@ -928,6 +940,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -928,6 +940,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len; int req_data_len = req->data_len;
u32 h2cdata_left = req->h2cdata_left;
while (true) { while (true) {
struct page *page = nvme_tcp_req_cur_page(req); struct page *page = nvme_tcp_req_cur_page(req);
...@@ -972,6 +985,9 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -972,6 +985,9 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
req->state = NVME_TCP_SEND_DDGST; req->state = NVME_TCP_SEND_DDGST;
req->offset = 0; req->offset = 0;
} else { } else {
if (h2cdata_left)
nvme_tcp_setup_h2c_data_pdu(req);
else
nvme_tcp_done_send_req(queue); nvme_tcp_done_send_req(queue);
} }
return 1; return 1;
...@@ -1030,9 +1046,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ...@@ -1030,9 +1046,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
if (queue->hdr_digest && !req->offset) if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
if (!req->h2cdata_left)
ret = kernel_sendpage(queue->sock, virt_to_page(pdu), ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len, offset_in_page(pdu) + req->offset, len,
MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
else
ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len,
MSG_DONTWAIT | MSG_MORE);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
...@@ -1052,6 +1073,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) ...@@ -1052,6 +1073,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset; size_t offset = req->offset;
u32 h2cdata_left = req->h2cdata_left;
int ret; int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
...@@ -1069,6 +1091,9 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) ...@@ -1069,6 +1091,9 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
return ret; return ret;
if (offset + ret == NVME_TCP_DIGEST_LENGTH) { if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
if (h2cdata_left)
nvme_tcp_setup_h2c_data_pdu(req);
else
nvme_tcp_done_send_req(queue); nvme_tcp_done_send_req(queue);
return 1; return 1;
} }
...@@ -1261,6 +1286,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) ...@@ -1261,6 +1286,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
struct msghdr msg = {}; struct msghdr msg = {};
struct kvec iov; struct kvec iov;
bool ctrl_hdgst, ctrl_ddgst; bool ctrl_hdgst, ctrl_ddgst;
u32 maxh2cdata;
int ret; int ret;
icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
...@@ -1344,6 +1370,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) ...@@ -1344,6 +1370,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
goto free_icresp; goto free_icresp;
} }
maxh2cdata = le32_to_cpu(icresp->maxdata);
if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
pr_err("queue %d: invalid maxh2cdata returned %u\n",
nvme_tcp_queue_id(queue), maxh2cdata);
goto free_icresp;
}
queue->maxh2cdata = maxh2cdata;
ret = 0; ret = 0;
free_icresp: free_icresp:
kfree(icresp); kfree(icresp);
...@@ -2329,6 +2363,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ...@@ -2329,6 +2363,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
req->data_sent = 0; req->data_sent = 0;
req->pdu_len = 0; req->pdu_len = 0;
req->pdu_sent = 0; req->pdu_sent = 0;
req->h2cdata_left = 0;
req->data_len = blk_rq_nr_phys_segments(rq) ? req->data_len = blk_rq_nr_phys_segments(rq) ?
blk_rq_payload_bytes(rq) : 0; blk_rq_payload_bytes(rq) : 0;
req->curr_bio = rq->bio; req->curr_bio = rq->bio;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_DISC_PORT 8009
#define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_ADMIN_CCSZ SZ_8K
#define NVME_TCP_DIGEST_LENGTH 4 #define NVME_TCP_DIGEST_LENGTH 4
#define NVME_TCP_MIN_MAXH2CDATA 4096
enum nvme_tcp_pfv { enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0, NVME_TCP_PFV_1_0 = 0x0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment