Commit 0d129228 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: define ->congested_fn for raid1, raid10, and multipath

raid1, raid10 and multipath don't report their 'congested' status through
bdi_*_congested, but should.

This patch adds the appropriate functions which just check the 'congested'
status of all active members (with appropriate locking).

raid1 read_balance should be modified to prefer devices where
bdi_read_congested returns false.  Then we could use the '&' branch rather
than the '|' branch.  However that should would need some benchmarking first
to make sure it is actually a good idea.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 26be34dc
......@@ -228,6 +228,28 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk,
rcu_read_unlock();
return ret;
}
static int multipath_congested(void *data, int bits)
{
mddev_t *mddev = data;
multipath_conf_t *conf = mddev_to_conf(mddev);
int i, ret = 0;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks ; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
request_queue_t *q = bdev_get_queue(rdev->bdev);
ret |= bdi_congested(&q->backing_dev_info, bits);
/* Just like multipath_map, we just check the
* first available device
*/
break;
}
}
rcu_read_unlock();
return ret;
}
/*
* Careful, this can execute in IRQ contexts as well!
......@@ -509,6 +531,8 @@ static int multipath_run (mddev_t *mddev)
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->issue_flush_fn = multipath_issue_flush;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
return 0;
......
......@@ -601,6 +601,32 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
return ret;
}
static int raid1_congested(void *data, int bits)
{
mddev_t *mddev = data;
conf_t *conf = mddev_to_conf(mddev);
int i, ret = 0;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
request_queue_t *q = bdev_get_queue(rdev->bdev);
/* Note the '|| 1' - when read_balance prefers
* non-congested targets, it can be removed
*/
if ((bits & (1<<BDI_write_congested)) || 1)
ret |= bdi_congested(&q->backing_dev_info, bits);
else
ret &= bdi_congested(&q->backing_dev_info, bits);
}
}
rcu_read_unlock();
return ret;
}
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
......@@ -1965,6 +1991,8 @@ static int run(mddev_t *mddev)
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->issue_flush_fn = raid1_issue_flush;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
return 0;
......
......@@ -648,6 +648,26 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
return ret;
}
static int raid10_congested(void *data, int bits)
{
mddev_t *mddev = data;
conf_t *conf = mddev_to_conf(mddev);
int i, ret = 0;
rcu_read_lock();
for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
request_queue_t *q = bdev_get_queue(rdev->bdev);
ret |= bdi_congested(&q->backing_dev_info, bits);
}
}
rcu_read_unlock();
return ret;
}
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
......@@ -2094,6 +2114,8 @@ static int run(mddev_t *mddev)
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->issue_flush_fn = raid10_issue_flush;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment