Commit 3ee16db3 authored by Mike Snitzer's avatar Mike Snitzer

dm: fix IO splitting

Commit 882ec4e6 ("dm table: stack 'chunk_sectors' limit to account
for target-specific splitting") caused a couple regressions:
1) Using lcm_not_zero() when stacking chunk_sectors was a bug because
   chunk_sectors must reflect the most limited of all devices in the
   IO stack.
2) DM targets that set max_io_len but that do _not_ provide an
   .iterate_devices method no longer had there IO split properly.

And commit 5091cdec ("dm: change max_io_len() to use
blk_max_size_offset()") also caused a regression where DM no longer
supported varied (per target) IO splitting. The implication being the
potential for severely reduced performance for IO stacks that use a DM
target like dm-cache to hide performance limitations of a slower
device (e.g. one that requires 4K IO splitting).

Coming full circle: Fix all these issues by discontinuing stacking
chunk_sectors up using ti->max_io_len in dm_calculate_queue_limits(),
add optional chunk_sectors override argument to blk_max_size_offset()
and update DM's max_io_len() to pass ti->max_io_len to its
blk_max_size_offset() call.

Passing in an optional chunk_sectors override to blk_max_size_offset()
allows for code reuse of block's centralized calculation for max IO
size based on provided offset and split boundary.

Fixes: 882ec4e6 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting")
Fixes: 5091cdec ("dm: change max_io_len() to use blk_max_size_offset()")
Cc: stable@vger.kernel.org
Reported-by: default avatarJohn Dorminy <jdorminy@redhat.com>
Reported-by: default avatarBruce Johnston <bjohnsto@redhat.com>
Reported-by: default avatarKirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: default avatarJohn Dorminy <jdorminy@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Reviewed-by: default avatarJens Axboe <axboe@kernel.dk>
parent 857c4c0a
...@@ -144,7 +144,7 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q, ...@@ -144,7 +144,7 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
static inline unsigned get_max_io_size(struct request_queue *q, static inline unsigned get_max_io_size(struct request_queue *q,
struct bio *bio) struct bio *bio)
{ {
unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
unsigned max_sectors = sectors; unsigned max_sectors = sectors;
unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT; unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT; unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/lcm.h>
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/dax.h> #include <linux/dax.h>
...@@ -1449,10 +1448,6 @@ int dm_calculate_queue_limits(struct dm_table *table, ...@@ -1449,10 +1448,6 @@ int dm_calculate_queue_limits(struct dm_table *table,
zone_sectors = ti_limits.chunk_sectors; zone_sectors = ti_limits.chunk_sectors;
} }
/* Stack chunk_sectors if target-specific splitting is required */
if (ti->max_io_len)
ti_limits.chunk_sectors = lcm_not_zero(ti->max_io_len,
ti_limits.chunk_sectors);
/* Set I/O hints portion of queue limits */ /* Set I/O hints portion of queue limits */
if (ti->type->io_hints) if (ti->type->io_hints)
ti->type->io_hints(ti, &ti_limits); ti->type->io_hints(ti, &ti_limits);
......
...@@ -1039,15 +1039,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector) ...@@ -1039,15 +1039,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
sector_t max_len; sector_t max_len;
/* /*
* Does the target need to split even further? * Does the target need to split IO even further?
* - q->limits.chunk_sectors reflects ti->max_io_len so * - varied (per target) IO splitting is a tenet of DM; this
* blk_max_size_offset() provides required splitting. * explains why stacked chunk_sectors based splitting via
* - blk_max_size_offset() also respects q->limits.max_sectors * blk_max_size_offset() isn't possible here. So pass in
* ti->max_io_len to override stacked chunk_sectors.
*/ */
max_len = blk_max_size_offset(ti->table->md->queue, if (ti->max_io_len) {
target_offset); max_len = blk_max_size_offset(ti->table->md->queue,
if (len > max_len) target_offset, ti->max_io_len);
len = max_len; if (len > max_len)
len = max_len;
}
return len; return len;
} }
......
...@@ -1073,11 +1073,12 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, ...@@ -1073,11 +1073,12 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
* file system requests. * file system requests.
*/ */
static inline unsigned int blk_max_size_offset(struct request_queue *q, static inline unsigned int blk_max_size_offset(struct request_queue *q,
sector_t offset) sector_t offset,
unsigned int chunk_sectors)
{ {
unsigned int chunk_sectors = q->limits.chunk_sectors; if (!chunk_sectors && q->limits.chunk_sectors)
chunk_sectors = q->limits.chunk_sectors;
if (!chunk_sectors) else
return q->limits.max_sectors; return q->limits.max_sectors;
if (likely(is_power_of_2(chunk_sectors))) if (likely(is_power_of_2(chunk_sectors)))
...@@ -1101,7 +1102,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, ...@@ -1101,7 +1102,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
req_op(rq) == REQ_OP_SECURE_ERASE) req_op(rq) == REQ_OP_SECURE_ERASE)
return blk_queue_get_max_sectors(q, req_op(rq)); return blk_queue_get_max_sectors(q, req_op(rq));
return min(blk_max_size_offset(q, offset), return min(blk_max_size_offset(q, offset, 0),
blk_queue_get_max_sectors(q, req_op(rq))); blk_queue_get_max_sectors(q, req_op(rq)));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment