Commit ee0c8a9b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
     - Invalid namespace identification error handling (Marizio Ewan,
       Keith)
     - Fabrics keep-alive tuning (Mark)

 - Fix for a bad error check regression in bcache (Markus)

 - Fix for a performance regression with O_DIRECT (Ming)

 - Fix for a flush related deadlock (Ming)

 - Make the read-only warn on per-partition (Yu)

* tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux:
  nvme-core: check for too small lba shift
  blk-mq: don't count completed flush data request as inflight in case of quiesce
  block: Document the role of the two attribute groups
  block: warn once for each partition in bio_check_ro()
  block: move .bd_inode into 1st cacheline of block_device
  nvme: check for valid nvme_identify_ns() before using it
  nvme-core: fix a memory leak in nvme_ns_info_from_identify()
  nvme: fine-tune sending of first keep-alive
  bcache: revert replacing IS_ERR_OR_NULL with IS_ERR
parents abd792f3 8ad3ac92
......@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio)
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return;
pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
bio->bi_bdev);
/* Older lvm-tools actually trigger this */
if (bio->bi_bdev->bd_ro_warned)
return;
bio->bi_bdev->bd_ro_warned = true;
/*
* Use ioctl to set underlying disk of raid/dm to read-only
* will trigger this.
*/
pr_warn("Trying to write to read-only block-device %pg\n",
bio->bi_bdev);
}
}
......
......@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
static bool blk_is_flush_data_rq(struct request *rq)
{
return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
}
static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
/*
* If we find a request that isn't idle we know the queue is busy
* as it's checked in the iter.
* Return false to stop the iteration.
*
* In case of queue quiesce, if one flush data request is completed,
* don't count it as inflight given the flush sequence is suspended,
* and the original flush data request is invisible to driver, just
* like other pending requests because of quiesce
*/
if (blk_mq_request_started(rq)) {
if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
blk_is_flush_data_rq(rq) &&
blk_mq_request_completed(rq))) {
bool *busy = priv;
*busy = true;
......
......@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif
/* Common attributes for bio-based and request-based queues. */
static struct attribute *queue_attrs[] = {
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
......@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = {
NULL,
};
/* Request-based queue attributes that are not relevant for bio-based queues. */
static struct attribute *blk_mq_queue_attrs[] = {
&queue_requests_entry.attr,
&elv_iosched_entry.attr,
......
......@@ -1522,7 +1522,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
bch_keylist_free(&keylist);
for (i = 0; i < nodes; i++)
if (!IS_ERR(new_nodes[i])) {
if (!IS_ERR_OR_NULL(new_nodes[i])) {
btree_node_free(new_nodes[i]);
rw_unlock(true, new_nodes[i]);
}
......
......@@ -1192,8 +1192,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
queue_delayed_work(nvme_wq, &ctrl->ka_work,
nvme_keep_alive_work_period(ctrl));
unsigned long now = jiffies;
unsigned long delay = nvme_keep_alive_work_period(ctrl);
unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay;
if (time_after(now, ka_next_check_tm))
delay = 0;
else
delay = ka_next_check_tm - now;
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
}
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
......@@ -1479,7 +1487,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
return -ENODEV;
ret = -ENODEV;
goto error;
}
info->anagrpid = id->anagrpid;
......@@ -1497,8 +1506,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
}
error:
kfree(id);
return 0;
return ret;
}
static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
......@@ -1890,9 +1901,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
/*
* The block layer can't support LBA sizes larger than the page size
* yet, so catch this early and don't allow block I/O.
* or smaller than a sector size yet, so catch this early and don't
* allow block I/O.
*/
if (ns->lba_shift > PAGE_SHIFT) {
if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
capacity = 0;
bs = (1 << 9);
}
......@@ -2029,6 +2041,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (ret)
return ret;
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
ret = -ENODEV;
goto error;
}
blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas);
ns->lba_shift = id->lbaf[lbaf].ds;
......@@ -2090,6 +2109,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
set_bit(NVME_NS_READY, &ns->flags);
ret = 0;
}
error:
kfree(id);
return ret;
}
......@@ -4471,6 +4492,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
ctrl->ka_last_check_time = jiffies;
BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
PAGE_SIZE);
......
......@@ -49,9 +49,10 @@ struct block_device {
bool bd_write_holder;
bool bd_has_submit_bio;
dev_t bd_dev;
struct inode *bd_inode; /* will die */
atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct inode * bd_inode; /* will die */
void * bd_claiming;
void * bd_holder;
const struct blk_holder_ops *bd_holder_ops;
......@@ -69,6 +70,7 @@ struct block_device {
#ifdef CONFIG_FAIL_MAKE_REQUEST
bool bd_make_it_fail;
#endif
bool bd_ro_warned;
/*
* keep this out-of-line as it's both big and not needed in the fast
* path
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment