Commit eb8db831 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

dm: always defer request allocation to the owner of the request_queue

DM already calls blk_mq_alloc_request on the request_queue of the
underlying device if it is a blk-mq device.  But now that we allow drivers
to allocate additional data and initialize it ahead of time we need to do
the same for all drivers.   Doing so and using the new cmd_size
infrastructure in the block layer greatly simplifies the dm-rq and mpath
code, and should also make arbitrary combinations of SQ and MQ devices
with SQ or MQ device mapper tables easily possible as a further step.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
Reviewed-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 4bf58435
......@@ -92,7 +92,6 @@ struct mapped_device {
* io objects are allocated from here.
*/
mempool_t *io_pool;
mempool_t *rq_pool;
struct bio_set *bs;
......
......@@ -92,12 +92,6 @@ struct multipath {
unsigned queue_mode;
/*
* We must use a mempool of dm_mpath_io structs so that we
* can resubmit bios on error.
*/
mempool_t *mpio_pool;
struct mutex work_mutex;
struct work_struct trigger_event;
......@@ -115,8 +109,6 @@ struct dm_mpath_io {
typedef int (*action_fn) (struct pgpath *pgpath);
static struct kmem_cache *_mpio_cache;
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void trigger_event(struct work_struct *work);
static void activate_path(struct work_struct *work);
......@@ -209,7 +201,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
m->mpio_pool = NULL;
m->queue_mode = DM_TYPE_NONE;
m->ti = ti;
......@@ -229,16 +220,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
}
if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
unsigned min_ios = dm_get_reserved_rq_based_ios();
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
if (!m->mpio_pool)
return -ENOMEM;
}
else if (m->queue_mode == DM_TYPE_BIO_BASED) {
} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
/*
* bio-based doesn't support any direct scsi_dh management;
......@@ -263,7 +245,6 @@ static void free_multipath(struct multipath *m)
kfree(m->hw_handler_name);
kfree(m->hw_handler_params);
mempool_destroy(m->mpio_pool);
kfree(m);
}
......@@ -272,38 +253,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
return info->ptr;
}
static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
{
struct dm_mpath_io *mpio;
if (!m->mpio_pool) {
/* Use blk-mq pdu memory requested via per_io_data_size */
mpio = get_mpio(info);
memset(mpio, 0, sizeof(*mpio));
return mpio;
}
mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
if (!mpio)
return NULL;
memset(mpio, 0, sizeof(*mpio));
info->ptr = mpio;
return mpio;
}
static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
{
/* Only needed for non blk-mq (.request_fn) multipath */
if (m->mpio_pool) {
struct dm_mpath_io *mpio = info->ptr;
info->ptr = NULL;
mempool_free(mpio, m->mpio_pool);
}
}
static size_t multipath_per_bio_data_size(void)
{
return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
......@@ -530,16 +479,17 @@ static bool must_push_back_bio(struct multipath *m)
/*
* Map cloned requests (request-based multipath)
*/
static int __multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context,
struct request *rq, struct request **__clone)
static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
union map_info *map_context,
struct request **__clone)
{
struct multipath *m = ti->private;
int r = DM_MAPIO_REQUEUE;
size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
size_t nr_bytes = blk_rq_bytes(rq);
struct pgpath *pgpath;
struct block_device *bdev;
struct dm_mpath_io *mpio;
struct dm_mpath_io *mpio = get_mpio(map_context);
struct request *clone;
/* Do we need to select a new pgpath? */
pgpath = lockless_dereference(m->current_pgpath);
......@@ -556,42 +506,23 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
return r;
}
mpio = set_mpio(m, map_context);
if (!mpio)
/* ENOMEM, requeue */
return r;
memset(mpio, 0, sizeof(*mpio));
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
bdev = pgpath->path.dev->bdev;
if (clone) {
/*
* Old request-based interface: allocated clone is passed in.
* Used by: .request_fn stacked on .request_fn path(s).
*/
clone->q = bdev_get_queue(bdev);
clone->rq_disk = bdev->bd_disk;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
} else {
/*
* blk-mq request-based interface; used by both:
* .request_fn stacked on blk-mq path(s) and
* blk-mq stacked on blk-mq path(s).
*/
clone = blk_mq_alloc_request(bdev_get_queue(bdev),
rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
clear_request_fn_mpio(m, map_context);
return r;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
*__clone = clone;
clone = blk_get_request(bdev_get_queue(bdev),
rq->cmd_flags | REQ_NOMERGE,
GFP_ATOMIC);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
return r;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
*__clone = clone;
if (pgpath->pg->ps.type->start_io)
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
......@@ -600,22 +531,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
return DM_MAPIO_REMAPPED;
}
static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
return __multipath_map(ti, clone, map_context, NULL, NULL);
}
static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
union map_info *map_context,
struct request **clone)
{
return __multipath_map(ti, NULL, map_context, rq, clone);
}
static void multipath_release_clone(struct request *clone)
{
blk_mq_free_request(clone);
blk_put_request(clone);
}
/*
......@@ -1187,7 +1105,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_write_same_bios = 1;
if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
return 0;
......@@ -1610,7 +1528,6 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
clear_request_fn_mpio(m, map_context);
return r;
}
......@@ -2060,7 +1977,6 @@ static struct target_type multipath_target = {
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
.map_rq = multipath_map,
.clone_and_map_rq = multipath_clone_and_map,
.release_clone_rq = multipath_release_clone,
.rq_end_io = multipath_end_io,
......@@ -2080,11 +1996,6 @@ static int __init dm_multipath_init(void)
{
int r;
/* allocate a slab for the dm_mpath_ios */
_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
if (!_mpio_cache)
return -ENOMEM;
r = dm_register_target(&multipath_target);
if (r < 0) {
DMERR("request-based register failed %d", r);
......@@ -2120,8 +2031,6 @@ static int __init dm_multipath_init(void)
bad_alloc_kmultipathd:
dm_unregister_target(&multipath_target);
bad_register_target:
kmem_cache_destroy(_mpio_cache);
return r;
}
......@@ -2131,7 +2040,6 @@ static void __exit dm_multipath_exit(void)
destroy_workqueue(kmultipathd);
dm_unregister_target(&multipath_target);
kmem_cache_destroy(_mpio_cache);
}
module_init(dm_multipath_init);
......
This diff is collapsed.
......@@ -48,7 +48,7 @@ struct dm_rq_clone_bio_info {
bool dm_use_blk_mq_default(void);
bool dm_use_blk_mq(struct mapped_device *md);
int dm_old_init_request_queue(struct mapped_device *md);
int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
void dm_mq_cleanup_mapped_device(struct mapped_device *md);
......
......@@ -131,12 +131,6 @@ static int io_err_map(struct dm_target *tt, struct bio *bio)
return -EIO;
}
static int io_err_map_rq(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
return -EIO;
}
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
union map_info *map_context,
struct request **clone)
......@@ -161,7 +155,6 @@ static struct target_type error_target = {
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
.map_rq = io_err_map_rq,
.clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq,
.direct_access = io_err_direct_access,
......
......@@ -91,7 +91,6 @@ static int dm_numa_node = DM_NUMA_NODE;
*/
struct dm_md_mempools {
mempool_t *io_pool;
mempool_t *rq_pool;
struct bio_set *bs;
};
......@@ -1419,7 +1418,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->kworker_task)
kthread_stop(md->kworker_task);
mempool_destroy(md->io_pool);
mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
......@@ -1595,12 +1593,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
goto out;
}
BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
BUG_ON(!p || md->io_pool || md->bs);
md->io_pool = p->io_pool;
p->io_pool = NULL;
md->rq_pool = p->rq_pool;
p->rq_pool = NULL;
md->bs = p->bs;
p->bs = NULL;
......@@ -1777,7 +1773,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
switch (type) {
case DM_TYPE_REQUEST_BASED:
r = dm_old_init_request_queue(md);
r = dm_old_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based mapped device");
return r;
......@@ -2493,7 +2489,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
unsigned integrity, unsigned per_io_data_size)
{
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
struct kmem_cache *cachep = NULL;
unsigned int pool_size = 0;
unsigned int front_pad;
......@@ -2503,20 +2498,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
switch (type) {
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
cachep = _io_cache;
pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
if (!pools->io_pool)
goto out;
break;
case DM_TYPE_REQUEST_BASED:
cachep = _rq_tio_cache;
pool_size = dm_get_reserved_rq_based_ios();
pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
if (!pools->rq_pool)
goto out;
/* fall through to setup remaining rq-based pools */
case DM_TYPE_MQ_REQUEST_BASED:
if (!pool_size)
pool_size = dm_get_reserved_rq_based_ios();
pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_io_data_size is used for blk-mq pdu at queue allocation */
break;
......@@ -2524,12 +2515,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
BUG();
}
if (cachep) {
pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
if (!pools->io_pool)
goto out;
}
pools->bs = bioset_create_nobvec(pool_size, front_pad);
if (!pools->bs)
goto out;
......@@ -2551,7 +2536,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
return;
mempool_destroy(pools->io_pool);
mempool_destroy(pools->rq_pool);
if (pools->bs)
bioset_free(pools->bs);
......
......@@ -95,8 +95,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
/*
* To check whether the target type is request-based or not (bio-based).
*/
#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \
((t)->type->clone_and_map_rq != NULL))
#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL)
/*
* To check whether the target type is a hybrid (capable of being
......
......@@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
* = 2: The target wants to push back the io
*/
typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
union map_info *map_context);
typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
struct request *rq,
union map_info *map_context,
......@@ -163,7 +161,6 @@ struct target_type {
dm_ctr_fn ctr;
dm_dtr_fn dtr;
dm_map_fn map;
dm_map_request_fn map_rq;
dm_clone_and_map_request_fn clone_and_map_rq;
dm_release_clone_request_fn release_clone_rq;
dm_endio_fn end_io;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment