Commit c144c63f authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

btrfs: repair all known bad mirrors

When there is more than a single level of redundancy there can also be
multiple bad mirrors, and the current read repair code only repairs the
last bad one.

Restructure btrfs_repair_one_sector so that it records the originally
failed mirror and the number of copies, and then repair all known bad
copies until we reach the originally failed copy in clean_io_failure.
Note that this also means the read repair reads will always start from
the next bad mirror and not mirror 0.

This fixes btrfs/265 in xfstests.
Reviewed-by: default avatarNikolay Borisov <nborisov@suse.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent d28beb3e
...@@ -2434,6 +2434,20 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) ...@@ -2434,6 +2434,20 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
return ret; return ret;
} }
static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
{
if (cur_mirror == failrec->num_copies)
return cur_mirror + 1 - failrec->num_copies;
return cur_mirror + 1;
}
static int prev_mirror(const struct io_failure_record *failrec, int cur_mirror)
{
if (cur_mirror == 1)
return failrec->num_copies;
return cur_mirror - 1;
}
/* /*
* each time an IO finishes, we do a fast check in the IO failure tree * each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record * to see if we need to process or clean up an io_failure_record
...@@ -2446,7 +2460,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info, ...@@ -2446,7 +2460,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
u64 private; u64 private;
struct io_failure_record *failrec; struct io_failure_record *failrec;
struct extent_state *state; struct extent_state *state;
int num_copies; int mirror;
int ret; int ret;
private = 0; private = 0;
...@@ -2470,20 +2484,19 @@ int clean_io_failure(struct btrfs_fs_info *fs_info, ...@@ -2470,20 +2484,19 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
EXTENT_LOCKED); EXTENT_LOCKED);
spin_unlock(&io_tree->lock); spin_unlock(&io_tree->lock);
if (state && state->start <= failrec->start && if (!state || state->start > failrec->start ||
state->end >= failrec->start + failrec->len - 1) { state->end < failrec->start + failrec->len - 1)
num_copies = btrfs_num_copies(fs_info, failrec->logical, goto out;
failrec->len);
if (num_copies > 1) { mirror = failrec->this_mirror;
repair_io_failure(fs_info, ino, start, failrec->len, do {
failrec->logical, page, pg_offset, mirror = prev_mirror(failrec, mirror);
failrec->failed_mirror); repair_io_failure(fs_info, ino, start, failrec->len,
} failrec->logical, page, pg_offset, mirror);
} } while (mirror != failrec->failed_mirror);
out: out:
free_io_failure(failure_tree, io_tree, failrec); free_io_failure(failure_tree, io_tree, failrec);
return 0; return 0;
} }
...@@ -2522,7 +2535,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end) ...@@ -2522,7 +2535,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
} }
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode, static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
u64 start) u64 start,
int failed_mirror)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct io_failure_record *failrec; struct io_failure_record *failrec;
...@@ -2544,7 +2558,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2544,7 +2558,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
* (e.g. with a list for failed_mirror) to make * (e.g. with a list for failed_mirror) to make
* clean_io_failure() clean all those errors at once. * clean_io_failure() clean all those errors at once.
*/ */
ASSERT(failrec->this_mirror == failed_mirror);
ASSERT(failrec->len == fs_info->sectorsize);
return failrec; return failrec;
} }
...@@ -2554,7 +2569,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2554,7 +2569,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
failrec->start = start; failrec->start = start;
failrec->len = sectorsize; failrec->len = sectorsize;
failrec->this_mirror = 0; failrec->failed_mirror = failed_mirror;
failrec->this_mirror = failed_mirror;
failrec->compress_type = BTRFS_COMPRESS_NONE; failrec->compress_type = BTRFS_COMPRESS_NONE;
read_lock(&em_tree->lock); read_lock(&em_tree->lock);
...@@ -2589,6 +2605,20 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2589,6 +2605,20 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
failrec->logical = logical; failrec->logical = logical;
free_extent_map(em); free_extent_map(em);
failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
if (failrec->num_copies == 1) {
/*
* We only have a single copy of the data, so don't bother with
* all the retry and error correction code that follows. No
* matter what the error is, it is very likely to persist.
*/
btrfs_debug(fs_info,
"cannot repair logical %llu num_copies %d",
failrec->logical, failrec->num_copies);
kfree(failrec);
return ERR_PTR(-EIO);
}
/* Set the bits in the private failure tree */ /* Set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1, ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
EXTENT_LOCKED | EXTENT_DIRTY); EXTENT_LOCKED | EXTENT_DIRTY);
...@@ -2605,54 +2635,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode ...@@ -2605,54 +2635,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
return failrec; return failrec;
} }
static bool btrfs_check_repairable(struct inode *inode,
struct io_failure_record *failrec,
int failed_mirror)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int num_copies;
num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
if (num_copies == 1) {
/*
* we only have a single copy of the data, so don't bother with
* all the retry and error correction code that follows. no
* matter what the error is, it is very likely to persist.
*/
btrfs_debug(fs_info,
"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
num_copies, failrec->this_mirror, failed_mirror);
return false;
}
/* The failure record should only contain one sector */
ASSERT(failrec->len == fs_info->sectorsize);
/*
* There are two premises:
* a) deliver good data to the caller
* b) correct the bad sectors on disk
*
* Since we're only doing repair for one sector, we only need to get
* a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us.
*/
ASSERT(failed_mirror);
failrec->failed_mirror = failed_mirror;
failrec->this_mirror++;
if (failrec->this_mirror == failed_mirror)
failrec->this_mirror++;
if (failrec->this_mirror > num_copies) {
btrfs_debug(fs_info,
"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
num_copies, failrec->this_mirror, failed_mirror);
return false;
}
return true;
}
int btrfs_repair_one_sector(struct inode *inode, int btrfs_repair_one_sector(struct inode *inode,
struct bio *failed_bio, u32 bio_offset, struct bio *failed_bio, u32 bio_offset,
struct page *page, unsigned int pgoff, struct page *page, unsigned int pgoff,
...@@ -2673,12 +2655,24 @@ int btrfs_repair_one_sector(struct inode *inode, ...@@ -2673,12 +2655,24 @@ int btrfs_repair_one_sector(struct inode *inode,
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
failrec = btrfs_get_io_failure_record(inode, start); failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
if (IS_ERR(failrec)) if (IS_ERR(failrec))
return PTR_ERR(failrec); return PTR_ERR(failrec);
/*
if (!btrfs_check_repairable(inode, failrec, failed_mirror)) { * There are two premises:
* a) deliver good data to the caller
* b) correct the bad sectors on disk
*
* Since we're only doing repair for one sector, we only need to get
* a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us.
*/
failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
if (failrec->this_mirror == failrec->failed_mirror) {
btrfs_debug(fs_info,
"failed to repair num_copies %d this_mirror %d failed_mirror %d",
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
free_io_failure(failure_tree, tree, failrec); free_io_failure(failure_tree, tree, failrec);
return -EIO; return -EIO;
} }
......
...@@ -263,6 +263,7 @@ struct io_failure_record { ...@@ -263,6 +263,7 @@ struct io_failure_record {
enum btrfs_compression_type compress_type; enum btrfs_compression_type compress_type;
int this_mirror; int this_mirror;
int failed_mirror; int failed_mirror;
int num_copies;
}; };
int btrfs_repair_one_sector(struct inode *inode, int btrfs_repair_one_sector(struct inode *inode,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment