Commit 18b9837e authored by NeilBrown's avatar NeilBrown

md/raid5: fix handling of bad blocks during recovery.

1/ We can only treat a known-bad-block like a read-error if we
   have the data that belongs in that block.  So fix that test.

2/ If we cannot recovery a stripe due to insufficient data,
   don't tell "md_done_sync" that the sync failed unless we really
   did fail something.  If we successfully record bad blocks,
   that is success.
Reported-by: default avatar"majianpeng" <majianpeng@gmail.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 5220ea1e
...@@ -2471,18 +2471,19 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -2471,18 +2471,19 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0; int abort = 0;
int i; int i;
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0; s->syncing = 0;
s->replacing = 0; s->replacing = 0;
/* There is nothing more to do for sync/check/repair. /* There is nothing more to do for sync/check/repair.
* Don't even need to abort as that is handled elsewhere
* if needed, and not always wanted e.g. if there is a known
* bad block here.
* For recover/replace we need to record a bad block on all * For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery * non-sync devices, or abort the recovery
*/ */
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
return; /* During recovery devices cannot be removed, so
/* During recovery devices cannot be removed, so locking and * locking and refcounting of rdevs is not needed
* refcounting of rdevs is not needed
*/ */
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = conf->disks[i].rdev; struct md_rdev *rdev = conf->disks[i].rdev;
...@@ -2500,10 +2501,11 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -2500,10 +2501,11 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
STRIPE_SECTORS, 0)) STRIPE_SECTORS, 0))
abort = 1; abort = 1;
} }
if (abort) { if (abort)
conf->recovery_disabled = conf->mddev->recovery_disabled; conf->recovery_disabled =
set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); conf->mddev->recovery_disabled;
} }
md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
} }
static int want_replace(struct stripe_head *sh, int disk_idx) static int want_replace(struct stripe_head *sh, int disk_idx)
...@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */; /* Not in-sync */;
else if (is_bad) { else if (is_bad) {
/* also not in-sync */ /* also not in-sync */
if (!test_bit(WriteErrorSeen, &rdev->flags)) { if (!test_bit(WriteErrorSeen, &rdev->flags) &&
test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error /* treat as in-sync, but with a read error
* which we can now try to correct * which we can now try to correct
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment