Commit 4ca40c2c authored by NeilBrown's avatar NeilBrown

md/raid10: Allow replacement device to be replace old drive.

When recovery finish and spare_active is called, check for a
replace that might have just become fully synced and mark it
as such, marking the original as failed.

Then when the original is removed, move the replacement into
its position.

This means that 'replacement' and spontaneously become NULL in some
situations.  Make sure we check for those.
It also means that 'rdev' and 'replacement' could appear to be
identical - check for that too.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 24afd80d
...@@ -397,14 +397,17 @@ static void raid10_end_write_request(struct bio *bio, int error) ...@@ -397,14 +397,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
int dec_rdev = 1; int dec_rdev = 1;
struct r10conf *conf = r10_bio->mddev->private; struct r10conf *conf = r10_bio->mddev->private;
int slot, repl; int slot, repl;
struct md_rdev *rdev; struct md_rdev *rdev = NULL;
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) if (repl)
rdev = conf->mirrors[dev].replacement; rdev = conf->mirrors[dev].replacement;
else if (!rdev) {
smp_rmb();
repl = 0;
rdev = conf->mirrors[dev].rdev; rdev = conf->mirrors[dev].rdev;
}
/* /*
* this branch is our 'one mirror IO has finished' event handler: * this branch is our 'one mirror IO has finished' event handler:
*/ */
...@@ -1089,6 +1092,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) ...@@ -1089,6 +1092,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
struct md_rdev *rrdev = rcu_dereference( struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[d].replacement); conf->mirrors[d].replacement);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev; blocked_rdev = rdev;
...@@ -1170,9 +1175,15 @@ static void make_request(struct mddev *mddev, struct bio * bio) ...@@ -1170,9 +1175,15 @@ static void make_request(struct mddev *mddev, struct bio * bio)
rdev_dec_pending(conf->mirrors[d].rdev, mddev); rdev_dec_pending(conf->mirrors[d].rdev, mddev);
} }
if (r10_bio->devs[j].repl_bio) { if (r10_bio->devs[j].repl_bio) {
struct md_rdev *rdev;
d = r10_bio->devs[j].devnum; d = r10_bio->devs[j].devnum;
rdev_dec_pending( rdev = conf->mirrors[d].replacement;
conf->mirrors[d].replacement, mddev); if (!rdev) {
/* Race with remove_disk */
smp_mb();
rdev = conf->mirrors[d].rdev;
}
rdev_dec_pending(rdev, mddev);
} }
} }
allow_barrier(conf); allow_barrier(conf);
...@@ -1230,6 +1241,10 @@ static void make_request(struct mddev *mddev, struct bio * bio) ...@@ -1230,6 +1241,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
max_sectors); max_sectors);
r10_bio->devs[i].repl_bio = mbio; r10_bio->devs[i].repl_bio = mbio;
/* We are actively writing to the original device
* so it cannot disappear, so the replacement cannot
* become NULL here
*/
mbio->bi_sector = (r10_bio->devs[i].addr+ mbio->bi_sector = (r10_bio->devs[i].addr+
conf->mirrors[d].replacement->data_offset); conf->mirrors[d].replacement->data_offset);
mbio->bi_bdev = conf->mirrors[d].replacement->bdev; mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
...@@ -1404,9 +1419,27 @@ static int raid10_spare_active(struct mddev *mddev) ...@@ -1404,9 +1419,27 @@ static int raid10_spare_active(struct mddev *mddev)
*/ */
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->mirrors + i; tmp = conf->mirrors + i;
if (tmp->rdev if (tmp->replacement
&& !test_bit(Faulty, &tmp->rdev->flags) && tmp->replacement->recovery_offset == MaxSector
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) { && !test_bit(Faulty, &tmp->replacement->flags)
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
/* Replacement has just become active */
if (!tmp->rdev
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
count++;
if (tmp->rdev) {
/* Replaced device not technically faulty,
* but we need to be sure it gets removed
* and never re-added.
*/
set_bit(Faulty, &tmp->rdev->flags);
sysfs_notify_dirent_safe(
tmp->rdev->sysfs_state);
}
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
} else if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
count++; count++;
sysfs_notify_dirent(tmp->rdev->sysfs_state); sysfs_notify_dirent(tmp->rdev->sysfs_state);
} }
...@@ -1506,6 +1539,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1506,6 +1539,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
*/ */
if (!test_bit(Faulty, &rdev->flags) && if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != p->recovery_disabled && mddev->recovery_disabled != p->recovery_disabled &&
(!p->replacement || p->replacement == rdev) &&
enough(conf, -1)) { enough(conf, -1)) {
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
...@@ -1517,7 +1551,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1517,7 +1551,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
*rdevp = rdev; *rdevp = rdev;
goto abort; goto abort;
} } else if (p->replacement) {
/* We must have just cleared 'rdev' */
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
smp_mb(); /* Make sure other CPUs may see both as identical
* but will never see neither -- if they are careful.
*/
p->replacement = NULL;
clear_bit(WantReplacement, &rdev->flags);
} else
/* We might have just remove the Replacement as faulty
* Clear the flag just in case
*/
clear_bit(WantReplacement, &rdev->flags);
err = md_integrity_register(mddev); err = md_integrity_register(mddev);
abort: abort:
...@@ -1595,13 +1643,15 @@ static void end_sync_write(struct bio *bio, int error) ...@@ -1595,13 +1643,15 @@ static void end_sync_write(struct bio *bio, int error)
int bad_sectors; int bad_sectors;
int slot; int slot;
int repl; int repl;
struct md_rdev *rdev; struct md_rdev *rdev = NULL;
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) if (repl)
rdev = conf->mirrors[d].replacement; rdev = conf->mirrors[d].replacement;
else if (!rdev) {
smp_mb();
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
}
if (!uptodate) { if (!uptodate) {
if (repl) if (repl)
...@@ -2368,7 +2418,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2368,7 +2418,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
} }
bio = r10_bio->devs[m].repl_bio; bio = r10_bio->devs[m].repl_bio;
rdev = conf->mirrors[dev].replacement; rdev = conf->mirrors[dev].replacement;
if (bio == IO_MADE_GOOD) { if (rdev && bio == IO_MADE_GOOD) {
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r10_bio->devs[m].addr, r10_bio->devs[m].addr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment