Commit 2d70de4e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.15-2021-09-25' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request via Christoph:
      - keep ctrl->namespaces ordered (Christoph Hellwig)
      - fix incorrect h2cdata pdu offset accounting in nvme-tcp (Sagi
        Grimberg)
      - handled updated hw_queues in nvme-fc more carefully (Daniel
        Wagner, James Smart)

 - md lock order fix (Christoph)

 - fallocate locking fix (Ming)

 - blktrace UAF fix (Zhihao)

 - rq-qos bio tracking fix (Ming)

* tag 'block-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
  block: hold ->invalidate_lock in blkdev_fallocate
  blktrace: Fix uaf in blk_trace access after removing by sysfs
  block: don't call rq_qos_ops->done_bio if the bio isn't tracked
  md: fix a lock order reversal in md_alloc
  nvme: keep ctrl->namespaces ordered
  nvme-tcp: fix incorrect h2cdata pdu offset accounting
  nvme-fc: remove freeze/unfreeze around update_nr_hw_queues
  nvme-fc: avoid race between time out and tear down
  nvme-fc: update hardware queues before using them
parents 57398443 f278eb3d
...@@ -1466,7 +1466,7 @@ void bio_endio(struct bio *bio) ...@@ -1466,7 +1466,7 @@ void bio_endio(struct bio *bio)
if (!bio_integrity_endio(bio)) if (!bio_integrity_endio(bio))
return; return;
if (bio->bi_bdev) if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/task_io_accounting_ops.h> #include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/fs.h>
#include "blk.h" #include "blk.h"
static struct inode *bdev_file_inode(struct file *file) static struct inode *bdev_file_inode(struct file *file)
...@@ -553,7 +554,8 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -553,7 +554,8 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
static long blkdev_fallocate(struct file *file, int mode, loff_t start, static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len) loff_t len)
{ {
struct block_device *bdev = I_BDEV(bdev_file_inode(file)); struct inode *inode = bdev_file_inode(file);
struct block_device *bdev = I_BDEV(inode);
loff_t end = start + len - 1; loff_t end = start + len - 1;
loff_t isize; loff_t isize;
int error; int error;
...@@ -580,10 +582,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, ...@@ -580,10 +582,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
if ((start | len) & (bdev_logical_block_size(bdev) - 1)) if ((start | len) & (bdev_logical_block_size(bdev) - 1))
return -EINVAL; return -EINVAL;
filemap_invalidate_lock(inode->i_mapping);
/* Invalidate the page cache, including dirty pages. */ /* Invalidate the page cache, including dirty pages. */
error = truncate_bdev_range(bdev, file->f_mode, start, end); error = truncate_bdev_range(bdev, file->f_mode, start, end);
if (error) if (error)
return error; goto fail;
switch (mode) { switch (mode) {
case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE:
...@@ -600,17 +604,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, ...@@ -600,17 +604,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
GFP_KERNEL, 0); GFP_KERNEL, 0);
break; break;
default: default:
return -EOPNOTSUPP; error = -EOPNOTSUPP;
} }
if (error)
return error;
/* fail:
* Invalidate the page cache again; if someone wandered in and dirtied filemap_invalidate_unlock(inode->i_mapping);
* a page, we just discard it - userspace has no way of knowing whether return error;
* the write happened before or after discard completing...
*/
return truncate_bdev_range(bdev, file->f_mode, start, end);
} }
const struct file_operations def_blk_fops = { const struct file_operations def_blk_fops = {
......
...@@ -5700,10 +5700,6 @@ static int md_alloc(dev_t dev, char *name) ...@@ -5700,10 +5700,6 @@ static int md_alloc(dev_t dev, char *name)
disk->flags |= GENHD_FL_EXT_DEVT; disk->flags |= GENHD_FL_EXT_DEVT;
disk->events |= DISK_EVENT_MEDIA_CHANGE; disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk; mddev->gendisk = disk;
/* As soon as we call add_disk(), another thread could get
* through to md_open, so make sure it doesn't get too far
*/
mutex_lock(&mddev->open_mutex);
add_disk(disk); add_disk(disk);
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
...@@ -5718,7 +5714,6 @@ static int md_alloc(dev_t dev, char *name) ...@@ -5718,7 +5714,6 @@ static int md_alloc(dev_t dev, char *name)
if (mddev->kobj.sd && if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group)) sysfs_create_group(&mddev->kobj, &md_bitmap_group))
pr_debug("pointless warning\n"); pr_debug("pointless warning\n");
mutex_unlock(&mddev->open_mutex);
abort: abort:
mutex_unlock(&disks_mutex); mutex_unlock(&disks_mutex);
if (!error && mddev->kobj.sd) { if (!error && mddev->kobj.sd) {
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/list_sort.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/pr.h> #include <linux/pr.h>
...@@ -3716,15 +3715,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ...@@ -3716,15 +3715,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
return ret; return ret;
} }
static int ns_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
return nsa->head->ns_id - nsb->head->ns_id;
}
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{ {
struct nvme_ns *ns, *ret = NULL; struct nvme_ns *ns, *ret = NULL;
...@@ -3745,6 +3735,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3745,6 +3735,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
} }
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
/*
* Add the namespace to the controller list while keeping the list ordered.
*/
static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
{
struct nvme_ns *tmp;
list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
if (tmp->head->ns_id < ns->head->ns_id) {
list_add(&ns->list, &tmp->list);
return;
}
}
list_add(&ns->list, &ns->ctrl->namespaces);
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids) struct nvme_ns_ids *ids)
{ {
...@@ -3795,9 +3801,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -3795,9 +3801,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_unlink_ns; goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem); down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces); nvme_ns_add_to_ctrl_list(ns);
up_write(&ctrl->namespaces_rwsem); up_write(&ctrl->namespaces_rwsem);
nvme_get_ctrl(ctrl); nvme_get_ctrl(ctrl);
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
...@@ -4080,10 +4085,6 @@ static void nvme_scan_work(struct work_struct *work) ...@@ -4080,10 +4085,6 @@ static void nvme_scan_work(struct work_struct *work)
if (nvme_scan_ns_list(ctrl) != 0) if (nvme_scan_ns_list(ctrl) != 0)
nvme_scan_ns_sequential(ctrl); nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock); mutex_unlock(&ctrl->scan_lock);
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);
} }
/* /*
......
...@@ -2487,6 +2487,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) ...@@ -2487,6 +2487,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
*/ */
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set, blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl); nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set); blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
...@@ -2510,6 +2511,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) ...@@ -2510,6 +2511,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
* clean up the admin queue. Same thing as above. * clean up the admin queue. Same thing as above.
*/ */
blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_sync_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl); nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
...@@ -2951,6 +2953,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2951,6 +2953,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.queue_count == 1) if (ctrl->ctrl.queue_count == 1)
return 0; return 0;
if (prior_ioq_cnt != nr_io_queues) {
dev_info(ctrl->ctrl.device,
"reconnect: revising io queue count from %d to %d\n",
prior_ioq_cnt, nr_io_queues);
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
}
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret) if (ret)
goto out_free_io_queues; goto out_free_io_queues;
...@@ -2959,15 +2968,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) ...@@ -2959,15 +2968,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret) if (ret)
goto out_delete_hw_queues; goto out_delete_hw_queues;
if (prior_ioq_cnt != nr_io_queues) {
dev_info(ctrl->ctrl.device,
"reconnect: revising io queue count from %d to %d\n",
prior_ioq_cnt, nr_io_queues);
nvme_wait_freeze(&ctrl->ctrl);
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
nvme_unfreeze(&ctrl->ctrl);
}
return 0; return 0;
out_delete_hw_queues: out_delete_hw_queues:
......
...@@ -620,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, ...@@ -620,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag; data->ttag = pdu->ttag;
data->command_id = nvme_cid(rq); data->command_id = nvme_cid(rq);
data->data_offset = cpu_to_le32(req->data_sent); data->data_offset = pdu->r2t_offset;
data->data_length = cpu_to_le32(req->pdu_len); data->data_length = cpu_to_le32(req->pdu_len);
return 0; return 0;
} }
...@@ -953,7 +953,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -953,7 +953,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
nvme_tcp_ddgst_update(queue->snd_hash, page, nvme_tcp_ddgst_update(queue->snd_hash, page,
offset, ret); offset, ret);
/* fully successful last write*/ /*
* update the request iterator except for the last payload send
* in the request where we don't want to modify it as we may
* compete with the RX path completing the request.
*/
if (req->data_sent + ret < req->data_len)
nvme_tcp_advance_req(req, ret);
/* fully successful last send in current PDU */
if (last && ret == len) { if (last && ret == len) {
if (queue->data_digest) { if (queue->data_digest) {
nvme_tcp_ddgst_final(queue->snd_hash, nvme_tcp_ddgst_final(queue->snd_hash,
...@@ -965,7 +973,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -965,7 +973,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
} }
return 1; return 1;
} }
nvme_tcp_advance_req(req, ret);
} }
return -EAGAIN; return -EAGAIN;
} }
......
...@@ -1605,6 +1605,14 @@ static int blk_trace_remove_queue(struct request_queue *q) ...@@ -1605,6 +1605,14 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL) if (bt == NULL)
return -EINVAL; return -EINVAL;
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
}
put_probe_ref(); put_probe_ref();
synchronize_rcu(); synchronize_rcu();
blk_trace_free(bt); blk_trace_free(bt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment