Commit abbf098e authored by NeilBrown's avatar NeilBrown

md/raid10: preferentially read from replacement device if possible.

When reading (for array reads, not for recovery etc) we read from the
replacement device if it has recovered far enough.
This requires storing the chosen rdev in the 'r10_bio' so we can make
sure to drop the ref on the right device when the read finishes.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 96c3fd1f
...@@ -324,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -324,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private; struct r10bio *r10_bio = bio->bi_private;
int slot, dev; int slot, dev;
struct md_rdev *rdev;
struct r10conf *conf = r10_bio->mddev->private; struct r10conf *conf = r10_bio->mddev->private;
slot = r10_bio->read_slot; slot = r10_bio->read_slot;
dev = r10_bio->devs[slot].devnum; dev = r10_bio->devs[slot].devnum;
rdev = r10_bio->devs[slot].rdev;
/* /*
* this branch is our 'one mirror IO has finished' event handler: * this branch is our 'one mirror IO has finished' event handler:
*/ */
...@@ -346,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -346,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
*/ */
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} else { } else {
/* /*
* oops, read error - keep the refcount on the rdev * oops, read error - keep the refcount on the rdev
...@@ -355,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -355,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
printk_ratelimited(KERN_ERR printk_ratelimited(KERN_ERR
"md/raid10:%s: %s: rescheduling sector %llu\n", "md/raid10:%s: %s: rescheduling sector %llu\n",
mdname(conf->mddev), mdname(conf->mddev),
bdevname(conf->mirrors[dev].rdev->bdev, b), bdevname(rdev->bdev, b),
(unsigned long long)r10_bio->sector); (unsigned long long)r10_bio->sector);
set_bit(R10BIO_ReadError, &r10_bio->state); set_bit(R10BIO_ReadError, &r10_bio->state);
reschedule_retry(r10_bio); reschedule_retry(r10_bio);
...@@ -599,7 +601,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -599,7 +601,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
int sectors = r10_bio->sectors; int sectors = r10_bio->sectors;
int best_good_sectors; int best_good_sectors;
sector_t new_distance, best_dist; sector_t new_distance, best_dist;
struct md_rdev *rdev; struct md_rdev *rdev, *best_rdev;
int do_balance; int do_balance;
int best_slot; int best_slot;
...@@ -608,6 +610,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -608,6 +610,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
retry: retry:
sectors = r10_bio->sectors; sectors = r10_bio->sectors;
best_slot = -1; best_slot = -1;
best_rdev = NULL;
best_dist = MaxSector; best_dist = MaxSector;
best_good_sectors = 0; best_good_sectors = 0;
do_balance = 1; do_balance = 1;
...@@ -629,10 +632,16 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -629,10 +632,16 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (r10_bio->devs[slot].bio == IO_BLOCKED) if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue; continue;
disk = r10_bio->devs[slot].devnum; disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].rdev); rdev = rcu_dereference(conf->mirrors[disk].replacement);
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (rdev == NULL) if (rdev == NULL)
continue; continue;
if (!test_bit(In_sync, &rdev->flags)) if (test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
continue; continue;
dev_sector = r10_bio->devs[slot].addr; dev_sector = r10_bio->devs[slot].addr;
...@@ -657,6 +666,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -657,6 +666,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (good_sectors > best_good_sectors) { if (good_sectors > best_good_sectors) {
best_good_sectors = good_sectors; best_good_sectors = good_sectors;
best_slot = slot; best_slot = slot;
best_rdev = rdev;
} }
if (!do_balance) if (!do_balance)
/* Must read from here */ /* Must read from here */
...@@ -685,16 +695,15 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -685,16 +695,15 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (new_distance < best_dist) { if (new_distance < best_dist) {
best_dist = new_distance; best_dist = new_distance;
best_slot = slot; best_slot = slot;
best_rdev = rdev;
} }
} }
if (slot == conf->copies) if (slot >= conf->copies) {
slot = best_slot; slot = best_slot;
rdev = best_rdev;
}
if (slot >= 0) { if (slot >= 0) {
disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
if (test_bit(Faulty, &rdev->flags)) { if (test_bit(Faulty, &rdev->flags)) {
/* Cannot risk returning a device that failed /* Cannot risk returning a device that failed
...@@ -990,6 +999,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) ...@@ -990,6 +999,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
max_sectors); max_sectors);
r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
read_bio->bi_sector = r10_bio->devs[slot].addr + read_bio->bi_sector = r10_bio->devs[slot].addr +
rdev->data_offset; rdev->data_offset;
...@@ -2088,10 +2098,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) ...@@ -2088,10 +2098,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
{ {
int slot = r10_bio->read_slot; int slot = r10_bio->read_slot;
int mirror = r10_bio->devs[slot].devnum;
struct bio *bio; struct bio *bio;
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
struct md_rdev *rdev; struct md_rdev *rdev = r10_bio->devs[slot].rdev;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
unsigned long do_sync; unsigned long do_sync;
int max_sectors; int max_sectors;
...@@ -2109,7 +2118,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2109,7 +2118,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
fix_read_error(conf, mddev, r10_bio); fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf); unfreeze_array(conf);
} }
rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); rdev_dec_pending(rdev, mddev);
bio = r10_bio->devs[slot].bio; bio = r10_bio->devs[slot].bio;
bdevname(bio->bi_bdev, b); bdevname(bio->bi_bdev, b);
...@@ -2144,6 +2153,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2144,6 +2153,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
r10_bio->sector - bio->bi_sector, r10_bio->sector - bio->bi_sector,
max_sectors); max_sectors);
r10_bio->devs[slot].bio = bio; r10_bio->devs[slot].bio = bio;
r10_bio->devs[slot].rdev = rdev;
bio->bi_sector = r10_bio->devs[slot].addr bio->bi_sector = r10_bio->devs[slot].addr
+ rdev->data_offset; + rdev->data_offset;
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->bdev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment