[PATCH] md: define ->congested_fn for raid1, raid10, and multipath

raid1, raid10 and multipath don't report their 'congested' status through bdi_*_congested, but should. This patch adds the appropriate functions which just check the 'congested' status of all active members (with appropriate locking). raid1 read_balance should be modified to prefer devices where bdi_read_congested returns false. Then we could use the '&' branch rather than the '|' branch. However that should would need some benchmarking first to make sure it is actually a good idea. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] md: define ->congested_fn for raid1, raid10, and multipath
raid1, raid10 and multipath don't report their 'congested' status through bdi_*_congested, but should. This patch adds the appropriate functions which just check the 'congested' status of all active members (with appropriate locking). raid1 read_balance should be modified to prefer devices where bdi_read_congested returns false. Then we could use the '&' branch rather than the '|' branch. However that should would need some benchmarking first to make sure it is actually a good idea. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
0d129228 · NeilBrown · Linus Torvalds · 26be34dc · 0d129228 · 0d129228
Commit 0d129228 authored Oct 03, 2006 by NeilBrown Committed by Linus Torvalds Oct 03, 2006
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 0 deletions

drivers/md/multipath.c drivers/md/multipath.c +24 -0

drivers/md/raid1.c drivers/md/raid1.c +28 -0

drivers/md/raid10.c drivers/md/raid10.c +22 -0

No files found.
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -228,6 +228,28 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk,
 	rcu_read_unlock();
 	return ret;
 }
+static int multipath_congested(void *data, int bits)
+{
+	mddev_t *mddev = data;
+	multipath_conf_t *conf = mddev_to_conf(mddev);
+	int i, ret = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < mddev->raid_disks ; i++) {
+		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
+		if (rdev && !test_bit(Faulty, &rdev->flags)) {
+			request_queue_t *q = bdev_get_queue(rdev->bdev);
+
+			ret |= bdi_congested(&q->backing_dev_info, bits);
+			/* Just like multipath_map, we just check the
+			 * first available device
+			 */
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}

 /*
 * Careful, this can execute in IRQ contexts as well!
@@ -509,6 +531,8 @@ static int multipath_run (mddev_t *mddev)

 	mddev->queue->unplug_fn = multipath_unplug;
 	mddev->queue->issue_flush_fn = multipath_issue_flush;
+	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
+	mddev->queue->backing_dev_info.congested_data = mddev;

 	return 0;


--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -601,6 +601,32 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }

+static int raid1_congested(void *data, int bits)
+{
+	mddev_t *mddev = data;
+	conf_t *conf = mddev_to_conf(mddev);
+	int i, ret = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < mddev->raid_disks; i++) {
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+		if (rdev && !test_bit(Faulty, &rdev->flags)) {
+			request_queue_t *q = bdev_get_queue(rdev->bdev);
+
+			/* Note the '|| 1' - when read_balance prefers
+			 * non-congested targets, it can be removed
+			 */
+			if ((bits & (1<<BDI_write_congested)) || 1)
+				ret |= bdi_congested(&q->backing_dev_info, bits);
+			else
+				ret &= bdi_congested(&q->backing_dev_info, bits);
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+
 /* Barriers....
 * Sometimes we need to suspend IO while we do something else,
 * either some resync/recovery, or reconfigure the array.
@@ -1965,6 +1991,8 @@ static int run(mddev_t *mddev)

 	mddev->queue->unplug_fn = raid1_unplug;
 	mddev->queue->issue_flush_fn = raid1_issue_flush;
+	mddev->queue->backing_dev_info.congested_fn = raid1_congested;
+	mddev->queue->backing_dev_info.congested_data = mddev;

 	return 0;


--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -648,6 +648,26 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }

+static int raid10_congested(void *data, int bits)
+{
+	mddev_t *mddev = data;
+	conf_t *conf = mddev_to_conf(mddev);
+	int i, ret = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+		if (rdev && !test_bit(Faulty, &rdev->flags)) {
+			request_queue_t *q = bdev_get_queue(rdev->bdev);
+
+			ret |= bdi_congested(&q->backing_dev_info, bits);
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+
 /* Barriers....
 * Sometimes we need to suspend IO while we do something else,
 * either some resync/recovery, or reconfigure the array.
@@ -2094,6 +2114,8 @@ static int run(mddev_t *mddev)

 	mddev->queue->unplug_fn = raid10_unplug;
 	mddev->queue->issue_flush_fn = raid10_issue_flush;
+	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
+	mddev->queue->backing_dev_info.congested_data = mddev;

 	/* Calculate max read-ahead size.
 	 * We need to readahead at least twice a whole stripe....