Commit f63f1735 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Song Liu

md/raid5: use the atomic queue limit update APIs

Build the queue limits outside the queue and apply them using
queue_limits_set.  To make the code more obvious also split the queue
limits handling into separate helpers.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed--by: default avatarSong Liu <song@kernel.org>
Tested-by: default avatarSong Liu <song@kernel.org>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240303140150.5435-8-hch@lst.de
parent 97894f7d
...@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded ...@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
return 0; return 0;
} }
static void raid5_set_io_opt(struct r5conf *conf) static int raid5_set_limits(struct mddev *mddev)
{ {
blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * struct r5conf *conf = mddev->private;
(conf->raid_disks - conf->max_degraded)); struct queue_limits lim;
int data_disks, stripe;
struct md_rdev *rdev;
/*
* The read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
*/
data_disks = conf->previous_raid_disks - conf->max_degraded;
/*
* We can only discard a whole stripe. It doesn't make sense to
* discard data disk but write parity disk
*/
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
blk_set_stacking_limits(&lim);
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
lim.raid_partial_stripes_expensive = 1;
lim.discard_granularity = stripe;
lim.max_write_zeroes_sectors = 0;
mddev_stack_rdev_limits(mddev, &lim);
rdev_for_each(rdev, mddev)
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
mddev->gendisk->disk_name);
/*
* Zeroing is required for discard, otherwise data could be lost.
*
* Consider a scenario: discard a stripe (the stripe could be
* inconsistent if discard_zeroes_data is 0); write one disk of the
* stripe (the stripe could be inconsistent again depending on which
* disks are used to calculate parity); the disk is broken; The stripe
* data of this disk is lost.
*
* We only allow DISCARD if the sysadmin has confirmed that only safe
* devices are in use by setting a module parameter. A better idea
* might be to turn DISCARD into WRITE_ZEROES requests, as that is
* required to be safe.
*/
if (!devices_handle_discard_safely ||
lim.max_discard_sectors < (stripe >> 9) ||
lim.discard_granularity < stripe)
lim.max_hw_discard_sectors = 0;
/*
* Requests require having a bitmap for each stripe.
* Limit the max sectors based on this.
*/
lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);
/* No restrictions on the number of segments in the request */
lim.max_segments = USHRT_MAX;
return queue_limits_set(mddev->queue, &lim);
} }
static int raid5_run(struct mddev *mddev) static int raid5_run(struct mddev *mddev)
...@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev)
int i; int i;
long long min_offset_diff = 0; long long min_offset_diff = 0;
int first = 1; int first = 1;
int ret = -EIO;
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
...@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev) ...@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev)
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
if (!mddev_is_dm(mddev)) { if (!mddev_is_dm(mddev)) {
int chunk_size; ret = raid5_set_limits(mddev);
/* read-ahead size must cover two whole stripes, which if (ret)
* is 2 * (datadisks) * chunksize where 'n' is the goto abort;
* number of raid devices
*/
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
raid5_set_io_opt(conf);
mddev->queue->limits.raid_partial_stripes_expensive = 1;
/*
* We can only discard a whole stripe. It doesn't make sense to
* discard data disk but write parity disk
*/
stripe = stripe * PAGE_SIZE;
stripe = roundup_pow_of_two(stripe);
mddev->queue->limits.discard_granularity = stripe;
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->new_data_offset << 9);
}
/*
* zeroing is required, otherwise data
* could be lost. Consider a scenario: discard a stripe
* (the stripe could be inconsistent if
* discard_zeroes_data is 0); write one disk of the
* stripe (the stripe could be inconsistent again
* depending on which disks are used to calculate
* parity); the disk is broken; The stripe data of this
* disk is lost.
*
* We only allow DISCARD if the sysadmin has confirmed that
* only safe devices are in use by setting a module parameter.
* A better idea might be to turn DISCARD into WRITE_ZEROES
* requests, as that is required to be safe.
*/
if (!devices_handle_discard_safely ||
mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
mddev->queue->limits.discard_granularity < stripe)
blk_queue_max_discard_sectors(mddev->queue, 0);
/*
* Requests require having a bitmap for each stripe.
* Limit the max sectors based on this.
*/
blk_queue_max_hw_sectors(mddev->queue,
RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
/* No restrictions on the number of segments in the request */
blk_queue_max_segments(mddev->queue, USHRT_MAX);
} }
if (log_init(conf, journal_dev, raid5_has_ppl(conf))) if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
...@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev)
free_conf(conf); free_conf(conf);
mddev->private = NULL; mddev->private = NULL;
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
return -EIO; return ret;
} }
static void raid5_free(struct mddev *mddev, void *priv) static void raid5_free(struct mddev *mddev, void *priv)
...@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf) ...@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
if (!mddev_is_dm(conf->mddev)) mddev_update_io_opt(conf->mddev,
raid5_set_io_opt(conf); conf->raid_disks - conf->max_degraded);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment