Commit ee0c8a9b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
     - Invalid namespace identification error handling (Marizio Ewan,
       Keith)
     - Fabrics keep-alive tuning (Mark)

 - Fix for a bad error check regression in bcache (Markus)

 - Fix for a performance regression with O_DIRECT (Ming)

 - Fix for a flush related deadlock (Ming)

 - Make the read-only warn on per-partition (Yu)

* tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux:
  nvme-core: check for too small lba shift
  blk-mq: don't count completed flush data request as inflight in case of quiesce
  block: Document the role of the two attribute groups
  block: warn once for each partition in bio_check_ro()
  block: move .bd_inode into 1st cacheline of block_device
  nvme: check for valid nvme_identify_ns() before using it
  nvme-core: fix a memory leak in nvme_ns_info_from_identify()
  nvme: fine-tune sending of first keep-alive
  bcache: revert replacing IS_ERR_OR_NULL with IS_ERR
parents abd792f3 8ad3ac92
...@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio) ...@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio)
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return; return;
pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
if (bio->bi_bdev->bd_ro_warned)
return;
bio->bi_bdev->bd_ro_warned = true;
/*
* Use ioctl to set underlying disk of raid/dm to read-only
* will trigger this.
*/
pr_warn("Trying to write to read-only block-device %pg\n",
bio->bi_bdev); bio->bi_bdev);
/* Older lvm-tools actually trigger this */
} }
} }
......
...@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, ...@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
} }
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
static bool blk_is_flush_data_rq(struct request *rq)
{
return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
}
static bool blk_mq_rq_inflight(struct request *rq, void *priv) static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{ {
/* /*
* If we find a request that isn't idle we know the queue is busy * If we find a request that isn't idle we know the queue is busy
* as it's checked in the iter. * as it's checked in the iter.
* Return false to stop the iteration. * Return false to stop the iteration.
*/ *
if (blk_mq_request_started(rq)) { * In case of queue quiesce, if one flush data request is completed,
* don't count it as inflight given the flush sequence is suspended,
* and the original flush data request is invisible to driver, just
* like other pending requests because of quiesce
*/
if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
blk_is_flush_data_rq(rq) &&
blk_mq_request_completed(rq))) {
bool *busy = priv; bool *busy = priv;
*busy = true; *busy = true;
......
...@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ...@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif #endif
/* Common attributes for bio-based and request-based queues. */
static struct attribute *queue_attrs[] = { static struct attribute *queue_attrs[] = {
&queue_ra_entry.attr, &queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr, &queue_max_hw_sectors_entry.attr,
...@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = { ...@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = {
NULL, NULL,
}; };
/* Request-based queue attributes that are not relevant for bio-based queues. */
static struct attribute *blk_mq_queue_attrs[] = { static struct attribute *blk_mq_queue_attrs[] = {
&queue_requests_entry.attr, &queue_requests_entry.attr,
&elv_iosched_entry.attr, &elv_iosched_entry.attr,
......
...@@ -1522,7 +1522,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, ...@@ -1522,7 +1522,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
bch_keylist_free(&keylist); bch_keylist_free(&keylist);
for (i = 0; i < nodes; i++) for (i = 0; i < nodes; i++)
if (!IS_ERR(new_nodes[i])) { if (!IS_ERR_OR_NULL(new_nodes[i])) {
btree_node_free(new_nodes[i]); btree_node_free(new_nodes[i]);
rw_unlock(true, new_nodes[i]); rw_unlock(true, new_nodes[i]);
} }
......
...@@ -1192,8 +1192,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) ...@@ -1192,8 +1192,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{ {
queue_delayed_work(nvme_wq, &ctrl->ka_work, unsigned long now = jiffies;
nvme_keep_alive_work_period(ctrl)); unsigned long delay = nvme_keep_alive_work_period(ctrl);
unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay;
if (time_after(now, ka_next_check_tm))
delay = 0;
else
delay = ka_next_check_tm - now;
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
} }
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
...@@ -1479,7 +1487,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, ...@@ -1479,7 +1487,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
if (id->ncap == 0) { if (id->ncap == 0) {
/* namespace not allocated or attached */ /* namespace not allocated or attached */
info->is_removed = true; info->is_removed = true;
return -ENODEV; ret = -ENODEV;
goto error;
} }
info->anagrpid = id->anagrpid; info->anagrpid = id->anagrpid;
...@@ -1497,8 +1506,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, ...@@ -1497,8 +1506,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
memcpy(ids->nguid, id->nguid, sizeof(ids->nguid)); memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
} }
error:
kfree(id); kfree(id);
return 0; return ret;
} }
static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
...@@ -1890,9 +1901,10 @@ static void nvme_update_disk_info(struct gendisk *disk, ...@@ -1890,9 +1901,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
/* /*
* The block layer can't support LBA sizes larger than the page size * The block layer can't support LBA sizes larger than the page size
* yet, so catch this early and don't allow block I/O. * or smaller than a sector size yet, so catch this early and don't
* allow block I/O.
*/ */
if (ns->lba_shift > PAGE_SHIFT) { if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
capacity = 0; capacity = 0;
bs = (1 << 9); bs = (1 << 9);
} }
...@@ -2029,6 +2041,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ...@@ -2029,6 +2041,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (ret) if (ret)
return ret; return ret;
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
ret = -ENODEV;
goto error;
}
blk_mq_freeze_queue(ns->disk->queue); blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas); lbaf = nvme_lbaf_index(id->flbas);
ns->lba_shift = id->lbaf[lbaf].ds; ns->lba_shift = id->lbaf[lbaf].ds;
...@@ -2090,6 +2109,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ...@@ -2090,6 +2109,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
set_bit(NVME_NS_READY, &ns->flags); set_bit(NVME_NS_READY, &ns->flags);
ret = 0; ret = 0;
} }
error:
kfree(id); kfree(id);
return ret; return ret;
} }
...@@ -4471,6 +4492,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4471,6 +4492,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
ctrl->ka_last_check_time = jiffies;
BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
PAGE_SIZE); PAGE_SIZE);
......
...@@ -49,9 +49,10 @@ struct block_device { ...@@ -49,9 +49,10 @@ struct block_device {
bool bd_write_holder; bool bd_write_holder;
bool bd_has_submit_bio; bool bd_has_submit_bio;
dev_t bd_dev; dev_t bd_dev;
struct inode *bd_inode; /* will die */
atomic_t bd_openers; atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct inode * bd_inode; /* will die */
void * bd_claiming; void * bd_claiming;
void * bd_holder; void * bd_holder;
const struct blk_holder_ops *bd_holder_ops; const struct blk_holder_ops *bd_holder_ops;
...@@ -69,6 +70,7 @@ struct block_device { ...@@ -69,6 +70,7 @@ struct block_device {
#ifdef CONFIG_FAIL_MAKE_REQUEST #ifdef CONFIG_FAIL_MAKE_REQUEST
bool bd_make_it_fail; bool bd_make_it_fail;
#endif #endif
bool bd_ro_warned;
/* /*
* keep this out-of-line as it's both big and not needed in the fast * keep this out-of-line as it's both big and not needed in the fast
* path * path
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment