Commit 978e51ba authored by Mike Snitzer's avatar Mike Snitzer

dm: optimize bio-based NVMe IO submission

Upper level bio-based drivers that stack immediately ontop of NVMe can
leverage direct_make_request().  In addition DM's NVMe bio-based
will initially only ever have one NVMe device that it submits IO to at a
time.  There is no splitting needed.  Enhance DM core so that
DM_TYPE_NVME_BIO_BASED's IO submission takes advantage of both of these
characteristics.
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 22c11858
...@@ -532,7 +532,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -532,7 +532,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
return r; return r;
} }
static struct dm_io *alloc_io(struct mapped_device *md) static void start_io_acct(struct dm_io *io);
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
{ {
struct dm_io *io; struct dm_io *io;
struct dm_target_io *tio; struct dm_target_io *tio;
...@@ -548,6 +550,13 @@ static struct dm_io *alloc_io(struct mapped_device *md) ...@@ -548,6 +550,13 @@ static struct dm_io *alloc_io(struct mapped_device *md)
io = container_of(tio, struct dm_io, tio); io = container_of(tio, struct dm_io, tio);
io->magic = DM_IO_MAGIC; io->magic = DM_IO_MAGIC;
io->status = 0;
atomic_set(&io->io_count, 1);
io->orig_bio = bio;
io->md = md;
spin_lock_init(&io->endio_lock);
start_io_acct(io);
return io; return io;
} }
...@@ -924,7 +933,7 @@ static void clone_endio(struct bio *bio) ...@@ -924,7 +933,7 @@ static void clone_endio(struct bio *bio)
struct mapped_device *md = tio->io->md; struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io; dm_endio_fn endio = tio->ti->type->end_io;
if (unlikely(error == BLK_STS_TARGET)) { if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
if (bio_op(bio) == REQ_OP_WRITE_SAME && if (bio_op(bio) == REQ_OP_WRITE_SAME &&
!bio->bi_disk->queue->limits.max_write_same_sectors) !bio->bi_disk->queue->limits.max_write_same_sectors)
disable_write_same(md); disable_write_same(md);
...@@ -1191,13 +1200,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) ...@@ -1191,13 +1200,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
} }
EXPORT_SYMBOL_GPL(dm_remap_zone_report); EXPORT_SYMBOL_GPL(dm_remap_zone_report);
static void __map_bio(struct dm_target_io *tio) static blk_qc_t __map_bio(struct dm_target_io *tio)
{ {
int r; int r;
sector_t sector; sector_t sector;
struct bio *clone = &tio->clone; struct bio *clone = &tio->clone;
struct dm_io *io = tio->io; struct dm_io *io = tio->io;
struct mapped_device *md = io->md;
struct dm_target *ti = tio->ti; struct dm_target *ti = tio->ti;
blk_qc_t ret = BLK_QC_T_NONE;
clone->bi_end_io = clone_endio; clone->bi_end_io = clone_endio;
...@@ -1217,7 +1228,10 @@ static void __map_bio(struct dm_target_io *tio) ...@@ -1217,7 +1228,10 @@ static void __map_bio(struct dm_target_io *tio)
/* the bio has been remapped so dispatch it */ /* the bio has been remapped so dispatch it */
trace_block_bio_remap(clone->bi_disk->queue, clone, trace_block_bio_remap(clone->bi_disk->queue, clone,
bio_dev(io->orig_bio), sector); bio_dev(io->orig_bio), sector);
generic_make_request(clone); if (md->type == DM_TYPE_NVME_BIO_BASED)
ret = direct_make_request(clone);
else
ret = generic_make_request(clone);
break; break;
case DM_MAPIO_KILL: case DM_MAPIO_KILL:
free_tio(tio); free_tio(tio);
...@@ -1231,6 +1245,8 @@ static void __map_bio(struct dm_target_io *tio) ...@@ -1231,6 +1245,8 @@ static void __map_bio(struct dm_target_io *tio)
DMWARN("unimplemented target map return value: %d", r); DMWARN("unimplemented target map return value: %d", r);
BUG(); BUG();
} }
return ret;
} }
static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
...@@ -1315,7 +1331,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, ...@@ -1315,7 +1331,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
} }
} }
static void __clone_and_map_simple_bio(struct clone_info *ci, static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci,
struct dm_target_io *tio, unsigned *len) struct dm_target_io *tio, unsigned *len)
{ {
struct bio *clone = &tio->clone; struct bio *clone = &tio->clone;
...@@ -1326,7 +1342,7 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, ...@@ -1326,7 +1342,7 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
if (len) if (len)
bio_setup_sector(clone, ci->sector, *len); bio_setup_sector(clone, ci->sector, *len);
__map_bio(tio); return __map_bio(tio);
} }
static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
...@@ -1340,7 +1356,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, ...@@ -1340,7 +1356,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
while ((bio = bio_list_pop(&blist))) { while ((bio = bio_list_pop(&blist))) {
tio = container_of(bio, struct dm_target_io, clone); tio = container_of(bio, struct dm_target_io, clone);
__clone_and_map_simple_bio(ci, tio, len); (void) __clone_and_map_simple_bio(ci, tio, len);
} }
} }
...@@ -1370,7 +1386,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, ...@@ -1370,7 +1386,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
free_tio(tio); free_tio(tio);
return r; return r;
} }
__map_bio(tio); (void) __map_bio(tio);
return 0; return 0;
} }
...@@ -1482,30 +1498,30 @@ static int __split_and_process_non_flush(struct clone_info *ci) ...@@ -1482,30 +1498,30 @@ static int __split_and_process_non_flush(struct clone_info *ci)
return 0; return 0;
} }
static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
ci->map = map;
ci->io = alloc_io(md, bio);
ci->sector = bio->bi_iter.bi_sector;
}
/* /*
* Entry point to split a bio into clones and submit them to the targets. * Entry point to split a bio into clones and submit them to the targets.
*/ */
static void __split_and_process_bio(struct mapped_device *md, static blk_qc_t __split_and_process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio) struct dm_table *map, struct bio *bio)
{ {
struct clone_info ci; struct clone_info ci;
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0; int error = 0;
if (unlikely(!map)) { if (unlikely(!map)) {
bio_io_error(bio); bio_io_error(bio);
return; return ret;
} }
ci.map = map; init_clone_info(&ci, md, map, bio);
ci.io = alloc_io(md);
ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->orig_bio = bio;
ci.io->md = md;
spin_lock_init(&ci.io->endio_lock);
ci.sector = bio->bi_iter.bi_sector;
start_io_acct(ci.io);
if (bio->bi_opf & REQ_PREFLUSH) { if (bio->bi_opf & REQ_PREFLUSH) {
ci.bio = &ci.io->md->flush_bio; ci.bio = &ci.io->md->flush_bio;
...@@ -1538,7 +1554,7 @@ static void __split_and_process_bio(struct mapped_device *md, ...@@ -1538,7 +1554,7 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.io->orig_bio = b; ci.io->orig_bio = b;
bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9); bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
bio_chain(b, bio); bio_chain(b, bio);
generic_make_request(bio); ret = generic_make_request(bio);
break; break;
} }
} }
...@@ -1546,15 +1562,63 @@ static void __split_and_process_bio(struct mapped_device *md, ...@@ -1546,15 +1562,63 @@ static void __split_and_process_bio(struct mapped_device *md,
/* drop the extra reference count */ /* drop the extra reference count */
dec_pending(ci.io, errno_to_blk_status(error)); dec_pending(ci.io, errno_to_blk_status(error));
return ret;
} }
/* /*
* The request function that remaps the bio to one target and * Optimized variant of __split_and_process_bio that leverages the
* splits off any remainder. * fact that targets that use it do _not_ have a need to split bios.
*/ */
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) static blk_qc_t __process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
struct clone_info ci;
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
if (unlikely(!map)) {
bio_io_error(bio);
return ret;
}
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
ci.bio = &ci.io->md->flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
} else {
struct dm_target *ti = md->immutable_target;
struct dm_target_io *tio;
/*
* Defend against IO still getting in during teardown
* - as was seen for a time with nvme-fcloop
*/
if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) {
error = -EIO;
goto out;
}
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
ret = __clone_and_map_simple_bio(&ci, tio, NULL);
}
out:
/* drop the extra reference count */
dec_pending(ci.io, errno_to_blk_status(error));
return ret;
}
typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *);
static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
process_bio_fn process_bio)
{ {
struct mapped_device *md = q->queuedata; struct mapped_device *md = q->queuedata;
blk_qc_t ret = BLK_QC_T_NONE;
int srcu_idx; int srcu_idx;
struct dm_table *map; struct dm_table *map;
...@@ -1568,12 +1632,27 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) ...@@ -1568,12 +1632,27 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
queue_io(md, bio); queue_io(md, bio);
else else
bio_io_error(bio); bio_io_error(bio);
return BLK_QC_T_NONE; return ret;
} }
__split_and_process_bio(md, map, bio); ret = process_bio(md, map, bio);
dm_put_live_table(md, srcu_idx); dm_put_live_table(md, srcu_idx);
return BLK_QC_T_NONE; return ret;
}
/*
* The request function that remaps the bio to one target and
* splits off any remainder.
*/
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
{
return __dm_make_request(q, bio, __split_and_process_bio);
}
static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio)
{
return __dm_make_request(q, bio, __process_bio);
} }
static int dm_any_congested(void *congested_data, int bdi_bits) static int dm_any_congested(void *congested_data, int bdi_bits)
...@@ -1927,6 +2006,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, ...@@ -1927,6 +2006,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
{ {
struct dm_table *old_map; struct dm_table *old_map;
struct request_queue *q = md->queue; struct request_queue *q = md->queue;
bool request_based = dm_table_request_based(t);
sector_t size; sector_t size;
lockdep_assert_held(&md->suspend_lock); lockdep_assert_held(&md->suspend_lock);
...@@ -1950,12 +2030,15 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, ...@@ -1950,12 +2030,15 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
* This must be done before setting the queue restrictions, * This must be done before setting the queue restrictions,
* because request-based dm may be run just after the setting. * because request-based dm may be run just after the setting.
*/ */
if (dm_table_request_based(t)) { if (request_based)
dm_stop_queue(q); dm_stop_queue(q);
if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
/* /*
* Leverage the fact that request-based DM targets are * Leverage the fact that request-based DM targets and
* immutable singletons and establish md->immutable_target * NVMe bio based targets are immutable singletons
* - used to optimize both dm_request_fn and dm_mq_queue_rq * - used to optimize both dm_request_fn and dm_mq_queue_rq;
* and __process_bio.
*/ */
md->immutable_target = dm_table_get_immutable_target(t); md->immutable_target = dm_table_get_immutable_target(t);
} }
...@@ -2073,10 +2156,13 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) ...@@ -2073,10 +2156,13 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
break; break;
case DM_TYPE_BIO_BASED: case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md); dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request); blk_queue_make_request(md->queue, dm_make_request);
break; break;
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request_nvme);
break;
case DM_TYPE_NONE: case DM_TYPE_NONE:
WARN_ON_ONCE(true); WARN_ON_ONCE(true);
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment