Commit 9feb1af9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20191205' of git://git.kernel.dk/linux-block

Pull more block and io_uring updates from Jens Axboe:
 "I wasn't expecting this to be so big, and if I was, I would have used
  separate branches for this. Going forward I'll be doing separate
  branches for the current tree, just like for the next kernel version
  tree. In any case, this contains:

   - Series from Christoph that fixes an inherent race condition with
     zoned devices and revalidation.

   - null_blk zone size fix (Damien)

   - Fix for a regression in this merge window that caused busy spins by
     sending empty disk uevents (Eric)

   - Fix for a regression in this merge window for bfq stats (Hou)

   - Fix for io_uring creds allocation failure handling (me)

   - io_uring -ERESTARTSYS send/recvmsg fix (me)

   - Series that fixes the need for applications to retain state across
     async request punts for io_uring. This one is a bit larger than I
     would have hoped, but I think it's important we get this fixed for
     5.5.

   - connect(2) improvement for io_uring, handling EINPROGRESS instead
     of having applications needing to poll for it (me)

   - Have io_uring use a hash for poll requests instead of an rbtree.
     This turned out to work much better in practice, so I think we
     should make the switch now. For some workloads, even with a fair
     amount of cancellations, the insertion sort is just too expensive.
     (me)

   - Various little io_uring fixes (me, Jackie, Pavel, LimingWu)

   - Fix for brd unaligned IO, and a warning for the future (Ming)

   - Fix for a bio integrity data leak (Justin)

   - bvec_iter_advance() improvement (Pavel)

   - Xen blkback page unmap fix (SeongJae)

  The major items in here are all well tested, and on the liburing side
  we continue to add regression and feature test cases. We're up to 50
  topic cases now, each with anywhere from 1 to more than 10 cases in
  each"

* tag 'for-linus-20191205' of git://git.kernel.dk/linux-block: (33 commits)
  block: fix memleak of bio integrity data
  io_uring: fix a typo in a comment
  bfq-iosched: Ensure bio->bi_blkg is valid before using it
  io_uring: hook all linked requests via link_list
  io_uring: fix error handling in io_queue_link_head
  io_uring: use hash table for poll command lookups
  io-wq: clear node->next on list deletion
  io_uring: ensure deferred timeouts copy necessary data
  io_uring: allow IO_SQE_* flags on IORING_OP_TIMEOUT
  null_blk: remove unused variable warning on !CONFIG_BLK_DEV_ZONED
  brd: warn on un-aligned buffer
  brd: remove max_hw_sectors queue limit
  xen/blkback: Avoid unmapping unmapped grant pages
  io_uring: handle connect -EINPROGRESS like -EAGAIN
  block: set the zone size in blk_revalidate_disk_zones atomically
  block: don't handle bio based drivers in blk_revalidate_disk_zones
  block: allocate the zone bitmaps lazily
  block: replace seq_zones_bitmap with conv_zones_bitmap
  block: simplify blkdev_nr_zones
  block: remove the empty line at the end of blk-zoned.c
  ...
parents 0aecba61 85394299
...@@ -351,6 +351,9 @@ void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) ...@@ -351,6 +351,9 @@ void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
{ {
struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
if (!bfqg)
return;
blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
} }
......
...@@ -87,7 +87,7 @@ EXPORT_SYMBOL(bio_integrity_alloc); ...@@ -87,7 +87,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
* Description: Used to free the integrity portion of a bio. Usually * Description: Used to free the integrity portion of a bio. Usually
* called from bio_free(). * called from bio_free().
*/ */
static void bio_integrity_free(struct bio *bio) void bio_integrity_free(struct bio *bio)
{ {
struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_set *bs = bio->bi_pool; struct bio_set *bs = bio->bi_pool;
......
...@@ -233,6 +233,9 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, ...@@ -233,6 +233,9 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
void bio_uninit(struct bio *bio) void bio_uninit(struct bio *bio)
{ {
bio_disassociate_blkg(bio); bio_disassociate_blkg(bio);
if (bio_integrity(bio))
bio_integrity_free(bio);
} }
EXPORT_SYMBOL(bio_uninit); EXPORT_SYMBOL(bio_uninit);
......
...@@ -70,30 +70,20 @@ void __blk_req_zone_write_unlock(struct request *rq) ...@@ -70,30 +70,20 @@ void __blk_req_zone_write_unlock(struct request *rq)
} }
EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
sector_t nr_sectors)
{
sector_t zone_sectors = blk_queue_zone_sectors(q);
return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
}
/** /**
* blkdev_nr_zones - Get number of zones * blkdev_nr_zones - Get number of zones
* @bdev: Target block device * @disk: Target gendisk
* *
* Description: * Return the total number of zones of a zoned block device. For a block
* Return the total number of zones of a zoned block device. * device without zone capabilities, the number of zones is always 0.
* For a regular block device, the number of zones is always 0.
*/ */
unsigned int blkdev_nr_zones(struct block_device *bdev) unsigned int blkdev_nr_zones(struct gendisk *disk)
{ {
struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
if (!blk_queue_is_zoned(q)) if (!blk_queue_is_zoned(disk->queue))
return 0; return 0;
return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk));
} }
EXPORT_SYMBOL_GPL(blkdev_nr_zones); EXPORT_SYMBOL_GPL(blkdev_nr_zones);
...@@ -342,16 +332,18 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, ...@@ -342,16 +332,18 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
void blk_queue_free_zone_bitmaps(struct request_queue *q) void blk_queue_free_zone_bitmaps(struct request_queue *q)
{ {
kfree(q->seq_zones_bitmap); kfree(q->conv_zones_bitmap);
q->seq_zones_bitmap = NULL; q->conv_zones_bitmap = NULL;
kfree(q->seq_zones_wlock); kfree(q->seq_zones_wlock);
q->seq_zones_wlock = NULL; q->seq_zones_wlock = NULL;
} }
struct blk_revalidate_zone_args { struct blk_revalidate_zone_args {
struct gendisk *disk; struct gendisk *disk;
unsigned long *seq_zones_bitmap; unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock; unsigned long *seq_zones_wlock;
unsigned int nr_zones;
sector_t zone_sectors;
sector_t sector; sector_t sector;
}; };
...@@ -364,25 +356,33 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, ...@@ -364,25 +356,33 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
struct blk_revalidate_zone_args *args = data; struct blk_revalidate_zone_args *args = data;
struct gendisk *disk = args->disk; struct gendisk *disk = args->disk;
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
sector_t zone_sectors = blk_queue_zone_sectors(q);
sector_t capacity = get_capacity(disk); sector_t capacity = get_capacity(disk);
/* /*
* All zones must have the same size, with the exception on an eventual * All zones must have the same size, with the exception on an eventual
* smaller last zone. * smaller last zone.
*/ */
if (zone->start + zone_sectors < capacity && if (zone->start == 0) {
zone->len != zone_sectors) { if (zone->len == 0 || !is_power_of_2(zone->len)) {
pr_warn("%s: Invalid zoned device with non constant zone size\n", pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
disk->disk_name); disk->disk_name, zone->len);
return false; return -ENODEV;
} }
if (zone->start + zone->len >= capacity && args->zone_sectors = zone->len;
zone->len > zone_sectors) { args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
pr_warn("%s: Invalid zoned device with larger last zone size\n", } else if (zone->start + args->zone_sectors < capacity) {
disk->disk_name); if (zone->len != args->zone_sectors) {
return -ENODEV; pr_warn("%s: Invalid zoned device with non constant zone size\n",
disk->disk_name);
return -ENODEV;
}
} else {
if (zone->len > args->zone_sectors) {
pr_warn("%s: Invalid zoned device with larger last zone size\n",
disk->disk_name);
return -ENODEV;
}
} }
/* Check for holes in the zone report */ /* Check for holes in the zone report */
...@@ -395,8 +395,22 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, ...@@ -395,8 +395,22 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
/* Check zone type */ /* Check zone type */
switch (zone->type) { switch (zone->type) {
case BLK_ZONE_TYPE_CONVENTIONAL: case BLK_ZONE_TYPE_CONVENTIONAL:
if (!args->conv_zones_bitmap) {
args->conv_zones_bitmap =
blk_alloc_zone_bitmap(q->node, args->nr_zones);
if (!args->conv_zones_bitmap)
return -ENOMEM;
}
set_bit(idx, args->conv_zones_bitmap);
break;
case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF: case BLK_ZONE_TYPE_SEQWRITE_PREF:
if (!args->seq_zones_wlock) {
args->seq_zones_wlock =
blk_alloc_zone_bitmap(q->node, args->nr_zones);
if (!args->seq_zones_wlock)
return -ENOMEM;
}
break; break;
default: default:
pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
...@@ -404,78 +418,54 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, ...@@ -404,78 +418,54 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
return -ENODEV; return -ENODEV;
} }
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
set_bit(idx, args->seq_zones_bitmap);
args->sector += zone->len; args->sector += zone->len;
return 0; return 0;
} }
static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones,
struct blk_revalidate_zone_args *args)
{
/*
* Ensure that all memory allocations in this context are done as
* if GFP_NOIO was specified.
*/
unsigned int noio_flag = memalloc_noio_save();
struct request_queue *q = disk->queue;
int ret;
args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!args->seq_zones_wlock)
return -ENOMEM;
args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!args->seq_zones_bitmap)
return -ENOMEM;
ret = disk->fops->report_zones(disk, 0, nr_zones,
blk_revalidate_zone_cb, args);
memalloc_noio_restore(noio_flag);
return ret;
}
/** /**
* blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
* @disk: Target disk * @disk: Target disk
* *
* Helper function for low-level device drivers to (re) allocate and initialize * Helper function for low-level device drivers to (re) allocate and initialize
* a disk request queue zone bitmaps. This functions should normally be called * a disk request queue zone bitmaps. This functions should normally be called
* within the disk ->revalidate method. For BIO based queues, no zone bitmap * within the disk ->revalidate method for blk-mq based drivers. For BIO based
* is allocated. * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
* is correct.
*/ */
int blk_revalidate_disk_zones(struct gendisk *disk) int blk_revalidate_disk_zones(struct gendisk *disk)
{ {
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); struct blk_revalidate_zone_args args = {
struct blk_revalidate_zone_args args = { .disk = disk }; .disk = disk,
int ret = 0; };
unsigned int noio_flag;
int ret;
if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
return -EIO; return -EIO;
if (WARN_ON_ONCE(!queue_is_mq(q)))
return -EIO;
/* /*
* BIO based queues do not use a scheduler so only q->nr_zones * Ensure that all memory allocations in this context are done as if
* needs to be updated so that the sysfs exposed value is correct. * GFP_NOIO was specified.
*/ */
if (!queue_is_mq(q)) { noio_flag = memalloc_noio_save();
q->nr_zones = nr_zones; ret = disk->fops->report_zones(disk, 0, UINT_MAX,
return 0; blk_revalidate_zone_cb, &args);
} memalloc_noio_restore(noio_flag);
if (nr_zones)
ret = blk_update_zone_info(disk, nr_zones, &args);
/* /*
* Install the new bitmaps, making sure the queue is stopped and * Install the new bitmaps and update nr_zones only once the queue is
* all I/Os are completed (i.e. a scheduler is not referencing the * stopped and all I/Os are completed (i.e. a scheduler is not
* bitmaps). * referencing the bitmaps).
*/ */
blk_mq_freeze_queue(q); blk_mq_freeze_queue(q);
if (ret >= 0) { if (ret >= 0) {
q->nr_zones = nr_zones; blk_queue_chunk_sectors(q, args.zone_sectors);
q->nr_zones = args.nr_zones;
swap(q->seq_zones_wlock, args.seq_zones_wlock); swap(q->seq_zones_wlock, args.seq_zones_wlock);
swap(q->seq_zones_bitmap, args.seq_zones_bitmap); swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
ret = 0; ret = 0;
} else { } else {
pr_warn("%s: failed to revalidate zones\n", disk->disk_name); pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
...@@ -484,8 +474,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) ...@@ -484,8 +474,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
kfree(args.seq_zones_wlock); kfree(args.seq_zones_wlock);
kfree(args.seq_zones_bitmap); kfree(args.conv_zones_bitmap);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
...@@ -121,6 +121,7 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, ...@@ -121,6 +121,7 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
void blk_flush_integrity(void); void blk_flush_integrity(void);
bool __bio_integrity_endio(struct bio *); bool __bio_integrity_endio(struct bio *);
void bio_integrity_free(struct bio *bio);
static inline bool bio_integrity_endio(struct bio *bio) static inline bool bio_integrity_endio(struct bio *bio)
{ {
if (bio_integrity(bio)) if (bio_integrity(bio))
...@@ -166,6 +167,9 @@ static inline bool bio_integrity_endio(struct bio *bio) ...@@ -166,6 +167,9 @@ static inline bool bio_integrity_endio(struct bio *bio)
{ {
return true; return true;
} }
static inline void bio_integrity_free(struct bio *bio)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLK_DEV_INTEGRITY */
unsigned long blk_rq_timeout(unsigned long timeout); unsigned long blk_rq_timeout(unsigned long timeout);
......
...@@ -512,7 +512,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, ...@@ -512,7 +512,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKGETZONESZ: case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev)); return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES: case BLKGETNRZONES:
return put_uint(arg, blkdev_nr_zones(bdev)); return put_uint(arg, blkdev_nr_zones(bdev->bd_disk));
case HDIO_GETGEO: case HDIO_GETGEO:
return blkdev_getgeo(bdev, argp); return blkdev_getgeo(bdev, argp);
case BLKRAGET: case BLKRAGET:
......
...@@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) ...@@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
unsigned int len = bvec.bv_len; unsigned int len = bvec.bv_len;
int err; int err;
/* Don't support un-aligned buffer */
WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
bio_op(bio), sector); bio_op(bio), sector);
if (err) if (err)
...@@ -382,7 +386,6 @@ static struct brd_device *brd_alloc(int i) ...@@ -382,7 +386,6 @@ static struct brd_device *brd_alloc(int i)
goto out_free_dev; goto out_free_dev;
blk_queue_make_request(brd->brd_queue, brd_make_request); blk_queue_make_request(brd->brd_queue, brd_make_request);
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
/* This is so fdisk will align partitions on 4k, because of /* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN * direct_access API needing 4k alignment, returning a PFN
......
...@@ -1559,14 +1559,13 @@ static int init_driver_queues(struct nullb *nullb) ...@@ -1559,14 +1559,13 @@ static int init_driver_queues(struct nullb *nullb)
static int null_gendisk_register(struct nullb *nullb) static int null_gendisk_register(struct nullb *nullb)
{ {
sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
struct gendisk *disk; struct gendisk *disk;
sector_t size;
disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
if (!disk) if (!disk)
return -ENOMEM; return -ENOMEM;
size = (sector_t)nullb->dev->size * 1024 * 1024ULL; set_capacity(disk, size);
set_capacity(disk, size >> 9);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major; disk->major = null_major;
...@@ -1576,12 +1575,19 @@ static int null_gendisk_register(struct nullb *nullb) ...@@ -1576,12 +1575,19 @@ static int null_gendisk_register(struct nullb *nullb)
disk->queue = nullb->q; disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
#ifdef CONFIG_BLK_DEV_ZONED
if (nullb->dev->zoned) { if (nullb->dev->zoned) {
int ret = blk_revalidate_disk_zones(disk); if (queue_is_mq(nullb->q)) {
int ret = blk_revalidate_disk_zones(disk);
if (ret != 0) if (ret)
return ret; return ret;
} else {
blk_queue_chunk_sectors(nullb->q,
nullb->dev->zone_size_sects);
nullb->q->nr_zones = blkdev_nr_zones(disk);
}
} }
#endif
add_disk(disk); add_disk(disk);
return 0; return 0;
...@@ -1607,7 +1613,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) ...@@ -1607,7 +1613,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
return blk_mq_alloc_tag_set(set); return blk_mq_alloc_tag_set(set);
} }
static void null_validate_conf(struct nullb_device *dev) static int null_validate_conf(struct nullb_device *dev)
{ {
dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = round_down(dev->blocksize, 512);
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
...@@ -1634,6 +1640,14 @@ static void null_validate_conf(struct nullb_device *dev) ...@@ -1634,6 +1640,14 @@ static void null_validate_conf(struct nullb_device *dev)
/* can not stop a queue */ /* can not stop a queue */
if (dev->queue_mode == NULL_Q_BIO) if (dev->queue_mode == NULL_Q_BIO)
dev->mbps = 0; dev->mbps = 0;
if (dev->zoned &&
(!dev->zone_size || !is_power_of_2(dev->zone_size))) {
pr_err("zone_size must be power-of-two\n");
return -EINVAL;
}
return 0;
} }
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
...@@ -1666,7 +1680,9 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -1666,7 +1680,9 @@ static int null_add_dev(struct nullb_device *dev)
struct nullb *nullb; struct nullb *nullb;
int rv; int rv;
null_validate_conf(dev); rv = null_validate_conf(dev);
if (rv)
return rv;
nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
if (!nullb) { if (!nullb) {
...@@ -1731,7 +1747,6 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -1731,7 +1747,6 @@ static int null_add_dev(struct nullb_device *dev)
if (rv) if (rv)
goto out_cleanup_blk_queue; goto out_cleanup_blk_queue;
blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
nullb->q->limits.zoned = BLK_ZONED_HM; nullb->q->limits.zoned = BLK_ZONED_HM;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
blk_queue_required_elevator_features(nullb->q, blk_queue_required_elevator_features(nullb->q,
...@@ -1792,11 +1807,6 @@ static int __init null_init(void) ...@@ -1792,11 +1807,6 @@ static int __init null_init(void)
g_bs = PAGE_SIZE; g_bs = PAGE_SIZE;
} }
if (!is_power_of_2(g_zone_size)) {
pr_err("zone_size must be power-of-two\n");
return -EINVAL;
}
if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
pr_err("invalid home_node value\n"); pr_err("invalid home_node value\n");
g_home_node = NUMA_NO_NODE; g_home_node = NUMA_NO_NODE;
......
...@@ -936,6 +936,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, ...@@ -936,6 +936,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
out_of_memory: out_of_memory:
pr_alert("%s: out of memory\n", __func__); pr_alert("%s: out of memory\n", __func__);
put_free_pages(ring, pages_to_gnt, segs_to_map); put_free_pages(ring, pages_to_gnt, segs_to_map);
for (i = last_map; i < num; i++)
pages[i]->handle = BLKBACK_INVALID_HANDLE;
return -ENOMEM; return -ENOMEM;
} }
......
...@@ -1954,12 +1954,14 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1954,12 +1954,14 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
/* /*
* For a zoned target, the number of zones should be updated for the * For a zoned target, the number of zones should be updated for the
* correct value to be exposed in sysfs queue/nr_zones. For a BIO based * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
* target, this is all that is needed. For a request based target, the * target, this is all that is needed.
* queue zone bitmaps must also be updated.
* Use blk_revalidate_disk_zones() to handle this.
*/ */
if (blk_queue_is_zoned(q)) #ifdef CONFIG_BLK_DEV_ZONED
blk_revalidate_disk_zones(t->md->disk); if (blk_queue_is_zoned(q)) {
WARN_ON_ONCE(queue_is_mq(q));
q->nr_zones = blkdev_nr_zones(t->md->disk);
}
#endif
/* Allow reads to exceed readahead limits */ /* Allow reads to exceed readahead limits */
q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
......
...@@ -727,7 +727,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) ...@@ -727,7 +727,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks); dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
dev->nr_zones = blkdev_nr_zones(dev->bdev); dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
dmz->dev = dev; dmz->dev = dev;
......
...@@ -412,8 +412,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) ...@@ -412,8 +412,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
goto err; goto err;
/* The drive satisfies the kernel restrictions: set it up */ /* The drive satisfies the kernel restrictions: set it up */
blk_queue_chunk_sectors(sdkp->disk->queue,
logical_to_sectors(sdkp->device, zone_blocks));
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
blk_queue_required_elevator_features(sdkp->disk->queue, blk_queue_required_elevator_features(sdkp->disk->queue,
ELEVATOR_F_ZBD_SEQ_WRITE); ELEVATOR_F_ZBD_SEQ_WRITE);
......
...@@ -1531,7 +1531,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) ...@@ -1531,7 +1531,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
ret = blk_add_partitions(disk, bdev); ret = blk_add_partitions(disk, bdev);
if (ret == -EAGAIN) if (ret == -EAGAIN)
goto rescan; goto rescan;
} else { } else if (invalidate) {
/* /*
* Tell userspace that the media / partition table may have * Tell userspace that the media / partition table may have
* changed. * changed.
......
...@@ -111,7 +111,7 @@ struct io_wq { ...@@ -111,7 +111,7 @@ struct io_wq {
struct task_struct *manager; struct task_struct *manager;
struct user_struct *user; struct user_struct *user;
struct cred *creds; const struct cred *creds;
struct mm_struct *mm; struct mm_struct *mm;
refcount_t refs; refcount_t refs;
struct completion done; struct completion done;
......
...@@ -52,6 +52,7 @@ static inline void wq_node_del(struct io_wq_work_list *list, ...@@ -52,6 +52,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
list->last = prev; list->last = prev;
if (prev) if (prev)
prev->next = node->next; prev->next = node->next;
node->next = NULL;
} }
#define wq_list_for_each(pos, prv, head) \ #define wq_list_for_each(pos, prv, head) \
...@@ -87,7 +88,7 @@ typedef void (put_work_fn)(struct io_wq_work *); ...@@ -87,7 +88,7 @@ typedef void (put_work_fn)(struct io_wq_work *);
struct io_wq_data { struct io_wq_data {
struct mm_struct *mm; struct mm_struct *mm;
struct user_struct *user; struct user_struct *user;
struct cred *creds; const struct cred *creds;
get_work_fn *get_work; get_work_fn *get_work;
put_work_fn *put_work; put_work_fn *put_work;
...@@ -118,10 +119,6 @@ static inline void io_wq_worker_sleeping(struct task_struct *tsk) ...@@ -118,10 +119,6 @@ static inline void io_wq_worker_sleeping(struct task_struct *tsk)
static inline void io_wq_worker_running(struct task_struct *tsk) static inline void io_wq_worker_running(struct task_struct *tsk)
{ {
} }
#endif #endif /* CONFIG_IO_WQ */
static inline bool io_wq_current_is_worker(void) #endif /* INTERNAL_IO_WQ_H */
{
return in_task() && (current->flags & PF_IO_WORKER);
}
#endif
This diff is collapsed.
...@@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, ...@@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
#define BLK_ALL_ZONES ((unsigned int)-1) #define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector, int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int nr_zones, report_zones_cb cb, void *data);
unsigned int blkdev_nr_zones(struct gendisk *disk);
extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t sectors, sector_t nr_sectors, sector_t sectors, sector_t nr_sectors,
gfp_t gfp_mask); gfp_t gfp_mask);
...@@ -371,12 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -371,12 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int blkdev_nr_zones(struct block_device *bdev) static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
{
return 0;
}
static inline int blk_revalidate_disk_zones(struct gendisk *disk)
{ {
return 0; return 0;
} }
...@@ -504,9 +498,9 @@ struct request_queue { ...@@ -504,9 +498,9 @@ struct request_queue {
/* /*
* Zoned block device information for request dispatch control. * Zoned block device information for request dispatch control.
* nr_zones is the total number of zones of the device. This is always * nr_zones is the total number of zones of the device. This is always
* 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
* bits which indicates if a zone is conventional (bit clear) or * bits which indicates if a zone is conventional (bit set) or
* sequential (bit set). seq_zones_wlock is a bitmap of nr_zones * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
* bits which indicates if a zone is write locked, that is, if a write * bits which indicates if a zone is write locked, that is, if a write
* request targeting the zone was dispatched. All three fields are * request targeting the zone was dispatched. All three fields are
* initialized by the low level device driver (e.g. scsi/sd.c). * initialized by the low level device driver (e.g. scsi/sd.c).
...@@ -519,7 +513,7 @@ struct request_queue { ...@@ -519,7 +513,7 @@ struct request_queue {
* blk_mq_unfreeze_queue(). * blk_mq_unfreeze_queue().
*/ */
unsigned int nr_zones; unsigned int nr_zones;
unsigned long *seq_zones_bitmap; unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock; unsigned long *seq_zones_wlock;
#endif /* CONFIG_BLK_DEV_ZONED */ #endif /* CONFIG_BLK_DEV_ZONED */
...@@ -724,9 +718,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, ...@@ -724,9 +718,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
static inline bool blk_queue_zone_is_seq(struct request_queue *q, static inline bool blk_queue_zone_is_seq(struct request_queue *q,
sector_t sector) sector_t sector)
{ {
if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) if (!blk_queue_is_zoned(q))
return false; return false;
return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); if (!q->conv_zones_bitmap)
return true;
return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
} }
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int blk_queue_nr_zones(struct request_queue *q) static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
......
...@@ -87,26 +87,24 @@ struct bvec_iter_all { ...@@ -87,26 +87,24 @@ struct bvec_iter_all {
static inline bool bvec_iter_advance(const struct bio_vec *bv, static inline bool bvec_iter_advance(const struct bio_vec *bv,
struct bvec_iter *iter, unsigned bytes) struct bvec_iter *iter, unsigned bytes)
{ {
unsigned int idx = iter->bi_idx;
if (WARN_ONCE(bytes > iter->bi_size, if (WARN_ONCE(bytes > iter->bi_size,
"Attempted to advance past end of bvec iter\n")) { "Attempted to advance past end of bvec iter\n")) {
iter->bi_size = 0; iter->bi_size = 0;
return false; return false;
} }
while (bytes) { iter->bi_size -= bytes;
const struct bio_vec *cur = bv + iter->bi_idx; bytes += iter->bi_bvec_done;
unsigned len = min3(bytes, iter->bi_size,
cur->bv_len - iter->bi_bvec_done);
bytes -= len;
iter->bi_size -= len;
iter->bi_bvec_done += len;
if (iter->bi_bvec_done == cur->bv_len) { while (bytes && bytes >= bv[idx].bv_len) {
iter->bi_bvec_done = 0; bytes -= bv[idx].bv_len;
iter->bi_idx++; idx++;
}
} }
iter->bi_idx = idx;
iter->bi_bvec_done = bytes;
return true; return true;
} }
......
...@@ -378,12 +378,19 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, ...@@ -378,12 +378,19 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags, unsigned int vlen, unsigned int flags,
bool forbid_cmsg_compat); bool forbid_cmsg_compat);
extern long __sys_sendmsg_sock(struct socket *sock, extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *msg,
unsigned int flags); unsigned int flags);
extern long __sys_recvmsg_sock(struct socket *sock, extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *msg, struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr,
unsigned int flags); unsigned int flags);
extern int sendmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags,
struct iovec **iov);
extern int recvmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags,
struct sockaddr __user **uaddr,
struct iovec **iov);
/* helpers which do the actual work for syscalls */ /* helpers which do the actual work for syscalls */
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
...@@ -399,9 +406,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ...@@ -399,9 +406,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
struct sockaddr __user *uservaddr, int addrlen, int addrlen, int file_flags);
int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen); int addrlen);
extern int __sys_listen(int fd, int backlog); extern int __sys_listen(int fd, int backlog);
......
...@@ -157,6 +157,7 @@ struct io_uring_params { ...@@ -157,6 +157,7 @@ struct io_uring_params {
*/ */
#define IORING_FEAT_SINGLE_MMAP (1U << 0) #define IORING_FEAT_SINGLE_MMAP (1U << 0)
#define IORING_FEAT_NODROP (1U << 1) #define IORING_FEAT_NODROP (1U << 1)
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
/* /*
* io_uring_register(2) opcodes and arguments * io_uring_register(2) opcodes and arguments
......
...@@ -1826,26 +1826,22 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, ...@@ -1826,26 +1826,22 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
* include the -EINPROGRESS status for such sockets. * include the -EINPROGRESS status for such sockets.
*/ */
int __sys_connect_file(struct file *file, struct sockaddr __user *uservaddr, int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
int addrlen, int file_flags) int addrlen, int file_flags)
{ {
struct socket *sock; struct socket *sock;
struct sockaddr_storage address;
int err; int err;
sock = sock_from_file(file, &err); sock = sock_from_file(file, &err);
if (!sock) if (!sock)
goto out; goto out;
err = move_addr_to_kernel(uservaddr, addrlen, &address);
if (err < 0)
goto out;
err = err =
security_socket_connect(sock, (struct sockaddr *)&address, addrlen); security_socket_connect(sock, (struct sockaddr *)address, addrlen);
if (err) if (err)
goto out; goto out;
err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
sock->file->f_flags | file_flags); sock->file->f_flags | file_flags);
out: out:
return err; return err;
...@@ -1858,7 +1854,11 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) ...@@ -1858,7 +1854,11 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
f = fdget(fd); f = fdget(fd);
if (f.file) { if (f.file) {
ret = __sys_connect_file(f.file, uservaddr, addrlen, 0); struct sockaddr_storage address;
ret = move_addr_to_kernel(uservaddr, addrlen, &address);
if (!ret)
ret = __sys_connect_file(f.file, &address, addrlen, 0);
if (f.flags) if (f.flags)
fput(f.file); fput(f.file);
} }
...@@ -2346,9 +2346,9 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, ...@@ -2346,9 +2346,9 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
return err; return err;
} }
static int sendmsg_copy_msghdr(struct msghdr *msg, int sendmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags, struct user_msghdr __user *umsg, unsigned flags,
struct iovec **iov) struct iovec **iov)
{ {
int err; int err;
...@@ -2390,27 +2390,14 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2390,27 +2390,14 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
/* /*
* BSD sendmsg interface * BSD sendmsg interface
*/ */
long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *umsg, long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
unsigned int flags) unsigned int flags)
{ {
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
struct sockaddr_storage address;
struct msghdr msg = { .msg_name = &address };
ssize_t err;
err = sendmsg_copy_msghdr(&msg, umsg, flags, &iov);
if (err)
return err;
/* disallow ancillary data requests from this path */ /* disallow ancillary data requests from this path */
if (msg.msg_control || msg.msg_controllen) { if (msg->msg_control || msg->msg_controllen)
err = -EINVAL; return -EINVAL;
goto out;
}
err = ____sys_sendmsg(sock, &msg, flags, NULL, 0); return ____sys_sendmsg(sock, msg, flags, NULL, 0);
out:
kfree(iov);
return err;
} }
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
...@@ -2516,10 +2503,10 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, ...@@ -2516,10 +2503,10 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
return __sys_sendmmsg(fd, mmsg, vlen, flags, true); return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
} }
static int recvmsg_copy_msghdr(struct msghdr *msg, int recvmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags, struct user_msghdr __user *umsg, unsigned flags,
struct sockaddr __user **uaddr, struct sockaddr __user **uaddr,
struct iovec **iov) struct iovec **iov)
{ {
ssize_t err; ssize_t err;
...@@ -2609,28 +2596,15 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, ...@@ -2609,28 +2596,15 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
* BSD recvmsg interface * BSD recvmsg interface
*/ */
long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *umsg, long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
unsigned int flags) struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr, unsigned int flags)
{ {
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
struct sockaddr_storage address;
struct msghdr msg = { .msg_name = &address };
struct sockaddr __user *uaddr;
ssize_t err;
err = recvmsg_copy_msghdr(&msg, umsg, flags, &uaddr, &iov);
if (err)
return err;
/* disallow ancillary data requests from this path */ /* disallow ancillary data requests from this path */
if (msg.msg_control || msg.msg_controllen) { if (msg->msg_control || msg->msg_controllen)
err = -EINVAL; return -EINVAL;
goto out;
}
err = ____sys_recvmsg(sock, &msg, umsg, uaddr, flags, 0); return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
out:
kfree(iov);
return err;
} }
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment