Commit a3b111b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.4/block-2023-05-06' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:

 - MD pull request via Song:
      - Improve raid5 sequential IO performance on spinning disks, which
        fixes a regression since v6.0 (Jan Kara)
      - Fix bitmap offset types, which fixes an issue introduced in this
        merge window (Jonathan Derrick)

 - Cleanup of hweight type used for cgroup writeback (Maxim)

 - Fix a regression with the "has_submit_bio" changes across partitions
   (Ming)

 - Cleanup of QUEUE_FLAG_ADD_RANDOM clearing.

   We used to set this flag on queues non blk-mq queues, and hence some
   drivers clear it unconditionally. Since all of these have since been
   converted to true blk-mq drivers, drop the useless clear as the bit
   is not set (Chaitanya)

 - Fix the flags being set in a bio for a flush for drbd (Christoph)

 - Cleanup and deduplication of the code handling setting block device
   capacity (Damien)

 - Fix for ublk handling IO timeouts (Ming)

 - Fix for a regression in blk-cgroup teardown (Tao)

 - NBD documentation and code fixes (Eric)

 - Convert blk-integrity to using device_attributes rather than a second
   kobject to manage lifetimes (Thomas)

* tag 'for-6.4/block-2023-05-06' of git://git.kernel.dk/linux:
  ublk: add timeout handler
  drbd: correctly submit flush bio on barrier
  mailmap: add mailmap entries for Jens Axboe
  block: Skip destroyed blkg when restart in blkg_destroy_all()
  writeback: fix call of incorrect macro
  md: Fix bitmap offset type in sb writer
  md/raid5: Improve performance for sequential IO
  docs nbd: userspace NBD now favors github over sourceforge
  block nbd: use req.cookie instead of req.handle
  uapi nbd: add cookie alias to handle
  uapi nbd: improve doc links to userspace spec
  blk-integrity: register sysfs attributes on struct device
  blk-integrity: convert to struct device_attribute
  blk-integrity: use sysfs_emit
  block/drivers: remove dead clear of random flag
  block: sync part's ->bd_has_submit_bio with disk's
  block: Cleanup set_capacity()/bdev_set_nr_sectors()
parents 7644c823 c0b79b0f
...@@ -213,7 +213,10 @@ Jeff Garzik <jgarzik@pretzel.yyz.us> ...@@ -213,7 +213,10 @@ Jeff Garzik <jgarzik@pretzel.yyz.us>
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net> Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com> Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com> Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
Jens Axboe <axboe@suse.de> Jens Axboe <axboe@kernel.dk> <axboe@suse.de>
Jens Axboe <axboe@kernel.dk> <jens.axboe@oracle.com>
Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
Jens Osterkamp <Jens.Osterkamp@de.ibm.com> Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net> Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org> Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
......
...@@ -14,7 +14,7 @@ to borrow disk space from another computer. ...@@ -14,7 +14,7 @@ to borrow disk space from another computer.
Unlike NFS, it is possible to put any filesystem on it, etc. Unlike NFS, it is possible to put any filesystem on it, etc.
For more information, or to download the nbd-client and nbd-server For more information, or to download the nbd-client and nbd-server
tools, go to http://nbd.sf.net/. tools, go to https://github.com/NetworkBlockDevice/nbd.
The nbd kernel module need only be installed on the client The nbd kernel module need only be installed on the client
system, as the nbd-server is completely in userspace. In fact, system, as the nbd-server is completely in userspace. In fact,
......
...@@ -418,8 +418,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) ...@@ -418,8 +418,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev->bd_partno = partno; bdev->bd_partno = partno;
bdev->bd_inode = inode; bdev->bd_inode = inode;
bdev->bd_queue = disk->queue; bdev->bd_queue = disk->queue;
bdev->bd_stats = alloc_percpu(struct disk_stats); if (partno)
bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
else
bdev->bd_has_submit_bio = false; bdev->bd_has_submit_bio = false;
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) { if (!bdev->bd_stats) {
iput(inode); iput(inode);
return NULL; return NULL;
...@@ -428,6 +431,14 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) ...@@ -428,6 +431,14 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
return bdev; return bdev;
} }
void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
bdev->bd_nr_sectors = sectors;
spin_unlock(&bdev->bd_size_lock);
}
void bdev_add(struct block_device *bdev, dev_t dev) void bdev_add(struct block_device *bdev, dev_t dev)
{ {
bdev->bd_dev = dev; bdev->bd_dev = dev;
......
...@@ -567,6 +567,9 @@ static void blkg_destroy_all(struct gendisk *disk) ...@@ -567,6 +567,9 @@ static void blkg_destroy_all(struct gendisk *disk)
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg; struct blkcg *blkcg = blkg->blkcg;
if (hlist_unhashed(&blkg->blkcg_node))
continue;
spin_lock(&blkcg->lock); spin_lock(&blkcg->lock);
blkg_destroy(blkg); blkg_destroy(blkg);
spin_unlock(&blkcg->lock); spin_unlock(&blkcg->lock);
......
...@@ -212,61 +212,44 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, ...@@ -212,61 +212,44 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
return true; return true;
} }
struct integrity_sysfs_entry { static inline struct blk_integrity *dev_to_bi(struct device *dev)
struct attribute attr;
ssize_t (*show)(struct blk_integrity *, char *);
ssize_t (*store)(struct blk_integrity *, const char *, size_t);
};
static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
char *page)
{ {
struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); return &dev_to_disk(dev)->queue->integrity;
struct blk_integrity *bi = &disk->queue->integrity;
struct integrity_sysfs_entry *entry =
container_of(attr, struct integrity_sysfs_entry, attr);
return entry->show(bi, page);
} }
static ssize_t integrity_attr_store(struct kobject *kobj, static ssize_t format_show(struct device *dev, struct device_attribute *attr,
struct attribute *attr, const char *page, char *page)
size_t count)
{ {
struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); struct blk_integrity *bi = dev_to_bi(dev);
struct blk_integrity *bi = &disk->queue->integrity;
struct integrity_sysfs_entry *entry =
container_of(attr, struct integrity_sysfs_entry, attr);
ssize_t ret = 0;
if (entry->store)
ret = entry->store(bi, page, count);
return ret;
}
static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
{
if (bi->profile && bi->profile->name) if (bi->profile && bi->profile->name)
return sprintf(page, "%s\n", bi->profile->name); return sysfs_emit(page, "%s\n", bi->profile->name);
else return sysfs_emit(page, "none\n");
return sprintf(page, "none\n");
} }
static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page) static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr,
char *page)
{ {
return sprintf(page, "%u\n", bi->tag_size); struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n", bi->tag_size);
} }
static ssize_t integrity_interval_show(struct blk_integrity *bi, char *page) static ssize_t protection_interval_bytes_show(struct device *dev,
struct device_attribute *attr,
char *page)
{ {
return sprintf(page, "%u\n", struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n",
bi->interval_exp ? 1 << bi->interval_exp : 0); bi->interval_exp ? 1 << bi->interval_exp : 0);
} }
static ssize_t integrity_verify_store(struct blk_integrity *bi, static ssize_t read_verify_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count) const char *page, size_t count)
{ {
struct blk_integrity *bi = dev_to_bi(dev);
char *p = (char *) page; char *p = (char *) page;
unsigned long val = simple_strtoul(p, &p, 10); unsigned long val = simple_strtoul(p, &p, 10);
...@@ -278,14 +261,20 @@ static ssize_t integrity_verify_store(struct blk_integrity *bi, ...@@ -278,14 +261,20 @@ static ssize_t integrity_verify_store(struct blk_integrity *bi,
return count; return count;
} }
static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page) static ssize_t read_verify_show(struct device *dev,
struct device_attribute *attr, char *page)
{ {
return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0); struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_VERIFY));
} }
static ssize_t integrity_generate_store(struct blk_integrity *bi, static ssize_t write_generate_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count) const char *page, size_t count)
{ {
struct blk_integrity *bi = dev_to_bi(dev);
char *p = (char *) page; char *p = (char *) page;
unsigned long val = simple_strtoul(p, &p, 10); unsigned long val = simple_strtoul(p, &p, 10);
...@@ -297,68 +286,44 @@ static ssize_t integrity_generate_store(struct blk_integrity *bi, ...@@ -297,68 +286,44 @@ static ssize_t integrity_generate_store(struct blk_integrity *bi,
return count; return count;
} }
static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page) static ssize_t write_generate_show(struct device *dev,
struct device_attribute *attr, char *page)
{ {
return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0); struct blk_integrity *bi = dev_to_bi(dev);
}
static ssize_t integrity_device_show(struct blk_integrity *bi, char *page) return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_GENERATE));
{
return sprintf(page, "%u\n",
(bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
} }
static struct integrity_sysfs_entry integrity_format_entry = { static ssize_t device_is_integrity_capable_show(struct device *dev,
.attr = { .name = "format", .mode = 0444 }, struct device_attribute *attr,
.show = integrity_format_show, char *page)
}; {
struct blk_integrity *bi = dev_to_bi(dev);
static struct integrity_sysfs_entry integrity_tag_size_entry = {
.attr = { .name = "tag_size", .mode = 0444 },
.show = integrity_tag_size_show,
};
static struct integrity_sysfs_entry integrity_interval_entry = {
.attr = { .name = "protection_interval_bytes", .mode = 0444 },
.show = integrity_interval_show,
};
static struct integrity_sysfs_entry integrity_verify_entry = {
.attr = { .name = "read_verify", .mode = 0644 },
.show = integrity_verify_show,
.store = integrity_verify_store,
};
static struct integrity_sysfs_entry integrity_generate_entry = { return sysfs_emit(page, "%u\n",
.attr = { .name = "write_generate", .mode = 0644 }, !!(bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE));
.show = integrity_generate_show, }
.store = integrity_generate_store,
};
static struct integrity_sysfs_entry integrity_device_entry = { static DEVICE_ATTR_RO(format);
.attr = { .name = "device_is_integrity_capable", .mode = 0444 }, static DEVICE_ATTR_RO(tag_size);
.show = integrity_device_show, static DEVICE_ATTR_RO(protection_interval_bytes);
}; static DEVICE_ATTR_RW(read_verify);
static DEVICE_ATTR_RW(write_generate);
static DEVICE_ATTR_RO(device_is_integrity_capable);
static struct attribute *integrity_attrs[] = { static struct attribute *integrity_attrs[] = {
&integrity_format_entry.attr, &dev_attr_format.attr,
&integrity_tag_size_entry.attr, &dev_attr_tag_size.attr,
&integrity_interval_entry.attr, &dev_attr_protection_interval_bytes.attr,
&integrity_verify_entry.attr, &dev_attr_read_verify.attr,
&integrity_generate_entry.attr, &dev_attr_write_generate.attr,
&integrity_device_entry.attr, &dev_attr_device_is_integrity_capable.attr,
NULL, NULL
};
ATTRIBUTE_GROUPS(integrity);
static const struct sysfs_ops integrity_ops = {
.show = &integrity_attr_show,
.store = &integrity_attr_store,
}; };
static const struct kobj_type integrity_ktype = { const struct attribute_group blk_integrity_attr_group = {
.default_groups = integrity_groups, .name = "integrity",
.sysfs_ops = &integrity_ops, .attrs = integrity_attrs,
}; };
static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
...@@ -437,21 +402,3 @@ void blk_integrity_unregister(struct gendisk *disk) ...@@ -437,21 +402,3 @@ void blk_integrity_unregister(struct gendisk *disk)
memset(bi, 0, sizeof(*bi)); memset(bi, 0, sizeof(*bi));
} }
EXPORT_SYMBOL(blk_integrity_unregister); EXPORT_SYMBOL(blk_integrity_unregister);
int blk_integrity_add(struct gendisk *disk)
{
int ret;
ret = kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
&disk_to_dev(disk)->kobj, "%s", "integrity");
if (!ret)
kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
return ret;
}
void blk_integrity_del(struct gendisk *disk)
{
kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE);
kobject_del(&disk->integrity_kobj);
kobject_put(&disk->integrity_kobj);
}
...@@ -214,8 +214,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, ...@@ -214,8 +214,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
bip_next->bip_vec[0].bv_offset); bip_next->bip_vec[0].bv_offset);
} }
int blk_integrity_add(struct gendisk *disk); extern const struct attribute_group blk_integrity_attr_group;
void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool blk_integrity_merge_rq(struct request_queue *rq, static inline bool blk_integrity_merge_rq(struct request_queue *rq,
struct request *r1, struct request *r2) struct request *r1, struct request *r2)
...@@ -248,13 +247,6 @@ static inline bool bio_integrity_endio(struct bio *bio) ...@@ -248,13 +247,6 @@ static inline bool bio_integrity_endio(struct bio *bio)
static inline void bio_integrity_free(struct bio *bio) static inline void bio_integrity_free(struct bio *bio)
{ {
} }
static inline int blk_integrity_add(struct gendisk *disk)
{
return 0;
}
static inline void blk_integrity_del(struct gendisk *disk)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
unsigned long blk_rq_timeout(unsigned long timeout); unsigned long blk_rq_timeout(unsigned long timeout);
...@@ -419,6 +411,8 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start, ...@@ -419,6 +411,8 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length); sector_t length);
void blk_drop_partitions(struct gendisk *disk); void blk_drop_partitions(struct gendisk *disk);
void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
struct lock_class_key *lkclass); struct lock_class_key *lkclass);
......
...@@ -57,12 +57,7 @@ static DEFINE_IDA(ext_devt_ida); ...@@ -57,12 +57,7 @@ static DEFINE_IDA(ext_devt_ida);
void set_capacity(struct gendisk *disk, sector_t sectors) void set_capacity(struct gendisk *disk, sector_t sectors)
{ {
struct block_device *bdev = disk->part0; bdev_set_nr_sectors(disk->part0, sectors);
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
bdev->bd_nr_sectors = sectors;
spin_unlock(&bdev->bd_size_lock);
} }
EXPORT_SYMBOL(set_capacity); EXPORT_SYMBOL(set_capacity);
...@@ -487,15 +482,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ...@@ -487,15 +482,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
*/ */
pm_runtime_set_memalloc_noio(ddev, true); pm_runtime_set_memalloc_noio(ddev, true);
ret = blk_integrity_add(disk);
if (ret)
goto out_del_block_link;
disk->part0->bd_holder_dir = disk->part0->bd_holder_dir =
kobject_create_and_add("holders", &ddev->kobj); kobject_create_and_add("holders", &ddev->kobj);
if (!disk->part0->bd_holder_dir) { if (!disk->part0->bd_holder_dir) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_del_integrity; goto out_del_block_link;
} }
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
if (!disk->slave_dir) { if (!disk->slave_dir) {
...@@ -558,8 +549,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ...@@ -558,8 +549,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk->slave_dir = NULL; disk->slave_dir = NULL;
out_put_holder_dir: out_put_holder_dir:
kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->part0->bd_holder_dir);
out_del_integrity:
blk_integrity_del(disk);
out_del_block_link: out_del_block_link:
sysfs_remove_link(block_depr, dev_name(ddev)); sysfs_remove_link(block_depr, dev_name(ddev));
out_device_del: out_device_del:
...@@ -621,7 +610,6 @@ void del_gendisk(struct gendisk *disk) ...@@ -621,7 +610,6 @@ void del_gendisk(struct gendisk *disk)
if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
return; return;
blk_integrity_del(disk);
disk_del_events(disk); disk_del_events(disk);
mutex_lock(&disk->open_mutex); mutex_lock(&disk->open_mutex);
...@@ -1155,6 +1143,9 @@ static const struct attribute_group *disk_attr_groups[] = { ...@@ -1155,6 +1143,9 @@ static const struct attribute_group *disk_attr_groups[] = {
&disk_attr_group, &disk_attr_group,
#ifdef CONFIG_BLK_DEV_IO_TRACE #ifdef CONFIG_BLK_DEV_IO_TRACE
&blk_trace_attr_group, &blk_trace_attr_group,
#endif
#ifdef CONFIG_BLK_DEV_INTEGRITY
&blk_integrity_attr_group,
#endif #endif
NULL NULL
}; };
......
...@@ -85,14 +85,6 @@ static int (*check_part[])(struct parsed_partitions *) = { ...@@ -85,14 +85,6 @@ static int (*check_part[])(struct parsed_partitions *) = {
NULL NULL
}; };
static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
bdev->bd_nr_sectors = sectors;
spin_unlock(&bdev->bd_size_lock);
}
static struct parsed_partitions *allocate_partitions(struct gendisk *hd) static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{ {
struct parsed_partitions *state; struct parsed_partitions *state;
......
...@@ -404,7 +404,6 @@ static int brd_alloc(int i) ...@@ -404,7 +404,6 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */ /* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk); err = add_disk(disk);
if (err) if (err)
......
...@@ -1283,7 +1283,7 @@ static void one_flush_endio(struct bio *bio) ...@@ -1283,7 +1283,7 @@ static void one_flush_endio(struct bio *bio)
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
{ {
struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0, struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO); REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO);
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
if (!octx) { if (!octx) {
......
...@@ -609,7 +609,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -609,7 +609,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.len = htonl(size); request.len = htonl(size);
} }
handle = nbd_cmd_handle(cmd); handle = nbd_cmd_handle(cmd);
memcpy(request.handle, &handle, sizeof(handle)); request.cookie = cpu_to_be64(handle);
trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
...@@ -621,7 +621,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -621,7 +621,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
trace_nbd_header_sent(req, handle); trace_nbd_header_sent(req, handle);
if (result < 0) { if (result < 0) {
if (was_interrupted(result)) { if (was_interrupted(result)) {
/* If we havne't sent anything we can just return BUSY, /* If we haven't sent anything we can just return BUSY,
* however if we have sent something we need to make * however if we have sent something we need to make
* sure we only allow this req to be sent until we are * sure we only allow this req to be sent until we are
* completely done. * completely done.
...@@ -735,7 +735,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, ...@@ -735,7 +735,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
u32 tag; u32 tag;
int ret = 0; int ret = 0;
memcpy(&handle, reply->handle, sizeof(handle)); handle = be64_to_cpu(reply->cookie);
tag = nbd_handle_to_tag(handle); tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag); hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues) if (hwq < nbd->tag_set.nr_hw_queues)
...@@ -1805,7 +1805,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) ...@@ -1805,7 +1805,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
* Tell the block layer that we are not a rotational device * Tell the block layer that we are not a rotational device
*/ */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 0; disk->queue->limits.discard_granularity = 0;
blk_queue_max_discard_sectors(disk->queue, 0); blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX); blk_queue_max_segment_size(disk->queue, UINT_MAX);
......
...@@ -2144,7 +2144,6 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -2144,7 +2144,6 @@ static int null_add_dev(struct nullb_device *dev)
nullb->q->queuedata = nullb; nullb->q->queuedata = nullb;
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock); mutex_lock(&lock);
rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
......
...@@ -129,6 +129,7 @@ struct ublk_queue { ...@@ -129,6 +129,7 @@ struct ublk_queue {
unsigned long io_addr; /* mapped vm address */ unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz; unsigned int max_io_sz;
bool force_abort; bool force_abort;
bool timeout;
unsigned short nr_io_ready; /* how many ios setup */ unsigned short nr_io_ready; /* how many ios setup */
struct ublk_device *dev; struct ublk_device *dev;
struct ublk_io ios[]; struct ublk_io ios[];
...@@ -898,6 +899,22 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) ...@@ -898,6 +899,22 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
} }
} }
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
{
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
if (!ubq->timeout) {
send_sig(SIGKILL, ubq->ubq_daemon, 0);
ubq->timeout = true;
}
return BLK_EH_DONE;
}
return BLK_EH_RESET_TIMER;
}
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
...@@ -957,6 +974,7 @@ static const struct blk_mq_ops ublk_mq_ops = { ...@@ -957,6 +974,7 @@ static const struct blk_mq_ops ublk_mq_ops = {
.queue_rq = ublk_queue_rq, .queue_rq = ublk_queue_rq,
.init_hctx = ublk_init_hctx, .init_hctx = ublk_init_hctx,
.init_request = ublk_init_rq, .init_request = ublk_init_rq,
.timeout = ublk_timeout,
}; };
static int ublk_ch_open(struct inode *inode, struct file *filp) static int ublk_ch_open(struct inode *inode, struct file *filp)
...@@ -1737,6 +1755,18 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) ...@@ -1737,6 +1755,18 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
return -EPERM; return -EPERM;
/*
* unprivileged device can't be trusted, but RECOVERY and
* RECOVERY_REISSUE still may hang error handling, so can't
* support recovery features for unprivileged ublk now
*
* TODO: provide forward progress for RECOVERY handler, so that
* unprivileged device can benefit from it
*/
if (info.flags & UBLK_F_UNPRIVILEGED_DEV)
info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE |
UBLK_F_USER_RECOVERY);
/* the created device is always owned by current user */ /* the created device is always owned by current user */
ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid); ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
...@@ -2007,6 +2037,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) ...@@ -2007,6 +2037,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
put_task_struct(ubq->ubq_daemon); put_task_struct(ubq->ubq_daemon);
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
ubq->ubq_daemon = NULL; ubq->ubq_daemon = NULL;
ubq->timeout = false;
for (i = 0; i < ubq->q_depth; i++) { for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i]; struct ublk_io *io = &ubq->ios[i];
......
...@@ -2215,7 +2215,6 @@ static int zram_add(void) ...@@ -2215,7 +2215,6 @@ static int zram_add(void)
/* zram devices sort of resembles non-rotational disks */ /* zram devices sort of resembles non-rotational disks */
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
/* /*
* To ensure that we always get PAGE_SIZE aligned * To ensure that we always get PAGE_SIZE aligned
......
...@@ -971,7 +971,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, ...@@ -971,7 +971,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
} }
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
blk_queue_write_cache(q, true, true); blk_queue_write_cache(q, true, true);
......
...@@ -219,7 +219,7 @@ static unsigned int optimal_io_size(struct block_device *bdev, ...@@ -219,7 +219,7 @@ static unsigned int optimal_io_size(struct block_device *bdev,
} }
static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
sector_t start, sector_t boundary) loff_t start, loff_t boundary)
{ {
if (io_size != opt_size && if (io_size != opt_size &&
start + opt_size / SECTOR_SIZE <= boundary) start + opt_size / SECTOR_SIZE <= boundary)
...@@ -237,8 +237,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, ...@@ -237,8 +237,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev; struct block_device *bdev;
struct mddev *mddev = bitmap->mddev; struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage; struct bitmap_storage *store = &bitmap->storage;
sector_t offset = mddev->bitmap_info.offset; loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps, sboff, doff; sector_t ps, doff;
unsigned int size = PAGE_SIZE; unsigned int size = PAGE_SIZE;
unsigned int opt_size = PAGE_SIZE; unsigned int opt_size = PAGE_SIZE;
......
...@@ -6079,6 +6079,38 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, ...@@ -6079,6 +6079,38 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
return ret; return ret;
} }
/*
* If the bio covers multiple data disks, find sector within the bio that has
* the lowest chunk offset in the first chunk.
*/
static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
struct bio *bi)
{
int sectors_per_chunk = conf->chunk_sectors;
int raid_disks = conf->raid_disks;
int dd_idx;
struct stripe_head sh;
unsigned int chunk_offset;
sector_t r_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
sector_t sector;
/* We pass in fake stripe_head to get back parity disk numbers */
sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh);
chunk_offset = sector_div(sector, sectors_per_chunk);
if (sectors_per_chunk - chunk_offset >= bio_sectors(bi))
return r_sector;
/*
* Bio crosses to the next data disk. Check whether it's in the same
* chunk.
*/
dd_idx++;
while (dd_idx == sh.pd_idx || dd_idx == sh.qd_idx)
dd_idx++;
if (dd_idx >= raid_disks)
return r_sector;
return r_sector + sectors_per_chunk - chunk_offset;
}
static bool raid5_make_request(struct mddev *mddev, struct bio * bi) static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
{ {
DEFINE_WAIT_FUNC(wait, woken_wake_function); DEFINE_WAIT_FUNC(wait, woken_wake_function);
...@@ -6150,6 +6182,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) ...@@ -6150,6 +6182,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
} }
md_account_bio(mddev, &bi); md_account_bio(mddev, &bi);
/*
* Lets start with the stripe with the lowest chunk offset in the first
* chunk. That has the best chances of creating IOs adjacent to
* previous IOs in case of sequential IO and thus creates the most
* sequential IO pattern. We don't bother with the optimization when
* reshaping as the performance benefit is not worth the complexity.
*/
if (likely(conf->reshape_progress == MaxSector))
logical_sector = raid5_bio_lowest_chunk_sector(conf, bi);
s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf);
add_wait_queue(&conf->wait_for_overlap, &wait); add_wait_queue(&conf->wait_for_overlap, &wait);
while (1) { while (1) {
res = make_stripe_request(mddev, conf, &ctx, logical_sector, res = make_stripe_request(mddev, conf, &ctx, logical_sector,
...@@ -6178,7 +6221,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) ...@@ -6178,7 +6221,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
continue; continue;
} }
s = find_first_bit(ctx.sectors_to_do, stripe_cnt); s = find_next_bit_wrap(ctx.sectors_to_do, stripe_cnt, s);
if (s == stripe_cnt) if (s == stripe_cnt)
break; break;
......
...@@ -829,7 +829,7 @@ void wbc_detach_inode(struct writeback_control *wbc) ...@@ -829,7 +829,7 @@ void wbc_detach_inode(struct writeback_control *wbc)
* is okay. The main goal is avoiding keeping an inode on * is okay. The main goal is avoiding keeping an inode on
* the wrong wb for an extended period of time. * the wrong wb for an extended period of time.
*/ */
if (hweight32(history) > WB_FRN_HIST_THR_SLOTS) if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
inode_switch_wbs(inode, max_id); inode_switch_wbs(inode, max_id);
} }
......
...@@ -157,9 +157,6 @@ struct gendisk { ...@@ -157,9 +157,6 @@ struct gendisk {
struct timer_rand_state *random; struct timer_rand_state *random;
atomic_t sync_io; /* RAID */ atomic_t sync_io; /* RAID */
struct disk_events *ev; struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct kobject integrity_kobj;
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
/* /*
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
* Cleanup PARANOIA usage & code. * Cleanup PARANOIA usage & code.
* 2004/02/19 Paul Clements * 2004/02/19 Paul Clements
* Removed PARANOIA, plus various cleanup and comments * Removed PARANOIA, plus various cleanup and comments
* 2023 Copyright Red Hat
* Link to userspace extensions, favor cookie over handle.
*/ */
#ifndef _UAPILINUX_NBD_H #ifndef _UAPILINUX_NBD_H
...@@ -30,12 +32,18 @@ ...@@ -30,12 +32,18 @@
#define NBD_SET_TIMEOUT _IO( 0xab, 9 ) #define NBD_SET_TIMEOUT _IO( 0xab, 9 )
#define NBD_SET_FLAGS _IO( 0xab, 10) #define NBD_SET_FLAGS _IO( 0xab, 10)
/*
* See also https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md
* for additional userspace extensions not yet utilized in the kernel module.
*/
enum { enum {
NBD_CMD_READ = 0, NBD_CMD_READ = 0,
NBD_CMD_WRITE = 1, NBD_CMD_WRITE = 1,
NBD_CMD_DISC = 2, NBD_CMD_DISC = 2,
NBD_CMD_FLUSH = 3, NBD_CMD_FLUSH = 3,
NBD_CMD_TRIM = 4 NBD_CMD_TRIM = 4
/* userspace defines additional extension commands */
}; };
/* values for flags field, these are server interaction specific. */ /* values for flags field, these are server interaction specific. */
...@@ -64,15 +72,19 @@ enum { ...@@ -64,15 +72,19 @@ enum {
#define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698 #define NBD_REPLY_MAGIC 0x67446698
/* Do *not* use magics: 0x12560953 0x96744668. */ /* Do *not* use magics: 0x12560953 0x96744668. */
/* magic 0x668e33ef for structured reply not supported by kernel yet */
/* /*
* This is the packet used for communication between client and * This is the packet used for communication between client and
* server. All data are in network byte order. * server. All data are in network byte order.
*/ */
struct nbd_request { struct nbd_request {
__be32 magic; __be32 magic; /* NBD_REQUEST_MAGIC */
__be32 type; /* == READ || == WRITE */ __be32 type; /* See NBD_CMD_* */
char handle[8]; union {
__be64 cookie; /* Opaque identifier for request */
char handle[8]; /* older spelling of cookie */
};
__be64 from; __be64 from;
__be32 len; __be32 len;
} __attribute__((packed)); } __attribute__((packed));
...@@ -82,8 +94,11 @@ struct nbd_request { ...@@ -82,8 +94,11 @@ struct nbd_request {
* it has completed an I/O request (or an error occurs). * it has completed an I/O request (or an error occurs).
*/ */
struct nbd_reply { struct nbd_reply {
__be32 magic; __be32 magic; /* NBD_REPLY_MAGIC */
__be32 error; /* 0 = ok, else error */ __be32 error; /* 0 = ok, else error */
char handle[8]; /* handle you got from request */ union {
__be64 cookie; /* Opaque identifier from request */
char handle[8]; /* older spelling of cookie */
};
}; };
#endif /* _UAPILINUX_NBD_H */ #endif /* _UAPILINUX_NBD_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment