Commit a379fbbc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request:
      - fix nvmet-tcp header digest verification (Amit Engel)
      - fix a memory leak in nvmet-tcp when releasing a queue (Maurizio
        Lombardi)
      - fix nvme-tcp H2CData PDU send accounting again (Sagi Grimberg)
      - fix digest pointer calculation in nvme-tcp and nvmet-tcp (Varun
        Prakash)
      - fix possible nvme-tcp req->offset corruption (Varun Prakash)

 - Queue drain ordering fix (Ming)

 - Partition check regression for zoned devices (Shin'ichiro)

 - Zone queue restart fix (Naohiro)

* tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block:
  block: Fix partition check for host-aware zoned block devices
  nvmet-tcp: fix header digest verification
  nvmet-tcp: fix data digest pointer calculation
  nvme-tcp: fix data digest pointer calculation
  nvme-tcp: fix possible req->offset corruption
  block: schedule queue restart after BLK_STS_ZONE_RESOURCE
  block: drain queue after disk is removed from sysfs
  nvme-tcp: fix H2CData PDU send accounting (again)
  nvmet-tcp: fix a memory leak when releasing a queue
parents 17d50f89 f4aaf1fa
...@@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
int errors, queued; int errors, queued;
blk_status_t ret = BLK_STS_OK; blk_status_t ret = BLK_STS_OK;
LIST_HEAD(zone_list); LIST_HEAD(zone_list);
bool needs_resource = false;
if (list_empty(list)) if (list_empty(list))
return false; return false;
...@@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
queued++; queued++;
break; break;
case BLK_STS_RESOURCE: case BLK_STS_RESOURCE:
needs_resource = true;
fallthrough;
case BLK_STS_DEV_RESOURCE: case BLK_STS_DEV_RESOURCE:
blk_mq_handle_dev_resource(rq, list); blk_mq_handle_dev_resource(rq, list);
goto out; goto out;
...@@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
* accept. * accept.
*/ */
blk_mq_handle_zone_resource(rq, &zone_list); blk_mq_handle_zone_resource(rq, &zone_list);
needs_resource = true;
break; break;
default: default:
errors++; errors++;
...@@ -1406,7 +1410,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1406,7 +1410,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
/* For non-shared tags, the RESTART check will suffice */ /* For non-shared tags, the RESTART check will suffice */
bool no_tag = prep == PREP_DISPATCH_NO_TAG && bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
if (nr_budgets) if (nr_budgets)
blk_mq_release_budgets(q, list); blk_mq_release_budgets(q, list);
...@@ -1447,14 +1450,16 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ...@@ -1447,14 +1450,16 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
* If driver returns BLK_STS_RESOURCE and SCHED_RESTART * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
* bit is set, run queue after a delay to avoid IO stalls * bit is set, run queue after a delay to avoid IO stalls
* that could otherwise occur if the queue is idle. We'll do * that could otherwise occur if the queue is idle. We'll do
* similar if we couldn't get budget and SCHED_RESTART is set. * similar if we couldn't get budget or couldn't lock a zone
* and SCHED_RESTART is set.
*/ */
needs_restart = blk_mq_sched_needs_restart(hctx); needs_restart = blk_mq_sched_needs_restart(hctx);
if (prep == PREP_DISPATCH_NO_BUDGET)
needs_resource = true;
if (!needs_restart || if (!needs_restart ||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true); blk_mq_run_hw_queue(hctx, true);
else if (needs_restart && (ret == BLK_STS_RESOURCE || else if (needs_restart && needs_resource)
no_budget_avail))
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
blk_mq_update_dispatch_busy(hctx, true); blk_mq_update_dispatch_busy(hctx, true);
......
...@@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, ...@@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
} }
EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
static bool disk_has_partitions(struct gendisk *disk)
{
unsigned long idx;
struct block_device *part;
bool ret = false;
rcu_read_lock();
xa_for_each(&disk->part_tbl, idx, part) {
if (bdev_is_partition(part)) {
ret = true;
break;
}
}
rcu_read_unlock();
return ret;
}
/** /**
* blk_queue_set_zoned - configure a disk queue zoned model. * blk_queue_set_zoned - configure a disk queue zoned model.
* @disk: the gendisk of the queue to configure * @disk: the gendisk of the queue to configure
...@@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) ...@@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
* we do nothing special as far as the block layer is concerned. * we do nothing special as far as the block layer is concerned.
*/ */
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
!xa_empty(&disk->part_tbl)) disk_has_partitions(disk))
model = BLK_ZONED_NONE; model = BLK_ZONED_NONE;
break; break;
case BLK_ZONED_NONE: case BLK_ZONED_NONE:
......
...@@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk) ...@@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk)
* Prevent new I/O from crossing bio_queue_enter(). * Prevent new I/O from crossing bio_queue_enter().
*/ */
blk_queue_start_drain(q); blk_queue_start_drain(q);
blk_mq_freeze_queue_wait(q);
rq_qos_exit(q);
blk_sync_queue(q);
blk_flush_integrity();
/*
* Allow using passthrough request again after the queue is torn down.
*/
blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
__blk_mq_unfreeze_queue(q, true);
if (!(disk->flags & GENHD_FL_HIDDEN)) { if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
...@@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk) ...@@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk)); device_del(disk_to_dev(disk));
blk_mq_freeze_queue_wait(q);
rq_qos_exit(q);
blk_sync_queue(q);
blk_flush_integrity();
/*
* Allow using passthrough request again after the queue is torn down.
*/
blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
__blk_mq_unfreeze_queue(q, true);
} }
EXPORT_SYMBOL(del_gendisk); EXPORT_SYMBOL(del_gendisk);
......
...@@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) ...@@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len;
while (true) { while (true) {
struct page *page = nvme_tcp_req_cur_page(req); struct page *page = nvme_tcp_req_cur_page(req);
size_t offset = nvme_tcp_req_cur_offset(req); size_t offset = nvme_tcp_req_cur_offset(req);
size_t len = nvme_tcp_req_cur_length(req); size_t len = nvme_tcp_req_cur_length(req);
bool last = nvme_tcp_pdu_last_send(req, len); bool last = nvme_tcp_pdu_last_send(req, len);
int req_data_sent = req->data_sent;
int ret, flags = MSG_DONTWAIT; int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
...@@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
* in the request where we don't want to modify it as we may * in the request where we don't want to modify it as we may
* compete with the RX path completing the request. * compete with the RX path completing the request.
*/ */
if (req->data_sent + ret < req->data_len) if (req_data_sent + ret < req_data_len)
nvme_tcp_advance_req(req, ret); nvme_tcp_advance_req(req, ret);
/* fully successful last send in current PDU */ /* fully successful last send in current PDU */
...@@ -1048,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ...@@ -1048,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset;
int ret; int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = &req->ddgst + req->offset, .iov_base = (u8 *)&req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
}; };
...@@ -1064,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) ...@@ -1064,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
nvme_tcp_done_send_req(queue); nvme_tcp_done_send_req(queue);
return 1; return 1;
} }
......
...@@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) ...@@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
struct nvmet_tcp_queue *queue = cmd->queue; struct nvmet_tcp_queue *queue = cmd->queue;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = &cmd->exp_ddgst + cmd->offset, .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
}; };
int ret; int ret;
...@@ -1096,7 +1096,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) ...@@ -1096,7 +1096,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
} }
if (queue->hdr_digest && if (queue->hdr_digest &&
nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
nvmet_tcp_fatal_error(queue); /* fatal */ nvmet_tcp_fatal_error(queue); /* fatal */
return -EPROTO; return -EPROTO;
} }
...@@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) ...@@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_release_queue_work(struct work_struct *w) static void nvmet_tcp_release_queue_work(struct work_struct *w)
{ {
struct page *page;
struct nvmet_tcp_queue *queue = struct nvmet_tcp_queue *queue =
container_of(w, struct nvmet_tcp_queue, release_work); container_of(w, struct nvmet_tcp_queue, release_work);
...@@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) ...@@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_tcp_free_crypto(queue); nvmet_tcp_free_crypto(queue);
ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
page = virt_to_head_page(queue->pf_cache.va);
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
kfree(queue); kfree(queue);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment