Commit bacf60e5 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

btrfs: move repair_io_failure to bio.c

repair_io_failure ties directly into all the glory low-level details of
mapping a bio with a logic address to the actual physical location.
Move it right below btrfs_submit_bio to keep all the related logic
together.

Also move btrfs_repair_eb_io_failure to its caller in disk-io.c now that
repair_io_failure is available in a header.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 103c1972
...@@ -276,6 +276,96 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror ...@@ -276,6 +276,96 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
} }
} }
/*
* Submit a repair write.
*
* This bypasses btrfs_submit_bio deliberately, as that writes all copies in a
* RAID setup. Here we only want to write the one bad copy, so we do the
* mapping ourselves and submit the bio directly.
*
* The I/O is issued sychronously to block the repair read completion from
* freeing the bio.
*/
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num)
{
struct btrfs_device *dev;
struct bio_vec bvec;
struct bio bio;
u64 map_length = 0;
u64 sector;
struct btrfs_io_context *bioc = NULL;
int ret = 0;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
BUG_ON(!mirror_num);
if (btrfs_repair_one_zone(fs_info, logical))
return 0;
map_length = length;
/*
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are doing the
* read repair operation.
*/
btrfs_bio_counter_inc_blocked(fs_info);
if (btrfs_is_parity_mirror(fs_info, logical, length)) {
/*
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
* to update all raid stripes, but here we just want to correct
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
* stripe's dev and sector.
*/
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&map_length, &bioc, 0);
if (ret)
goto out_counter_dec;
ASSERT(bioc->mirror_num == 1);
} else {
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
&map_length, &bioc, mirror_num);
if (ret)
goto out_counter_dec;
BUG_ON(mirror_num != bioc->mirror_num);
}
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
dev = bioc->stripes[bioc->mirror_num - 1].dev;
btrfs_put_bioc(bioc);
if (!dev || !dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
ret = -EIO;
goto out_counter_dec;
}
bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = sector;
__bio_add_page(&bio, page, length, pg_offset);
btrfsic_check_bio(&bio);
ret = submit_bio_wait(&bio);
if (ret) {
/* try to remap that extent elsewhere? */
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
goto out_bio_uninit;
}
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
ino, start, btrfs_dev_name(dev), sector);
ret = 0;
out_bio_uninit:
bio_uninit(&bio);
out_counter_dec:
btrfs_bio_counter_dec(fs_info);
return ret;
}
int __init btrfs_bioset_init(void) int __init btrfs_bioset_init(void)
{ {
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
......
...@@ -255,6 +255,30 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level, ...@@ -255,6 +255,30 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
return ret; return ret;
} }
static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
if (sb_rdonly(fs_info->sb))
return -EROFS;
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE,
start, p, start - page_offset(p), mirror_num);
if (ret)
break;
start += PAGE_SIZE;
}
return ret;
}
/* /*
* helper to read a given tree block, doing retries as required when * helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try. * the checksums don't match and we have alternate mirrors to try.
......
...@@ -531,119 +531,6 @@ static void free_io_failure(struct btrfs_inode *inode, ...@@ -531,119 +531,6 @@ static void free_io_failure(struct btrfs_inode *inode,
kfree(rec); kfree(rec);
} }
/*
* this bypasses the standard btrfs submit functions deliberately, as
* the standard behavior is to write all copies in a raid setup. here we only
* want to write the one bad copy. so we do the mapping for ourselves and issue
* submit_bio directly.
* to avoid any synchronization issues, wait for the data after writing, which
* actually prevents the read that triggered the error from finishing.
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num)
{
struct btrfs_device *dev;
struct bio_vec bvec;
struct bio bio;
u64 map_length = 0;
u64 sector;
struct btrfs_io_context *bioc = NULL;
int ret = 0;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
BUG_ON(!mirror_num);
if (btrfs_repair_one_zone(fs_info, logical))
return 0;
map_length = length;
/*
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are doing the
* read repair operation.
*/
btrfs_bio_counter_inc_blocked(fs_info);
if (btrfs_is_parity_mirror(fs_info, logical, length)) {
/*
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
* to update all raid stripes, but here we just want to correct
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
* stripe's dev and sector.
*/
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&map_length, &bioc, 0);
if (ret)
goto out_counter_dec;
ASSERT(bioc->mirror_num == 1);
} else {
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
&map_length, &bioc, mirror_num);
if (ret)
goto out_counter_dec;
BUG_ON(mirror_num != bioc->mirror_num);
}
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
dev = bioc->stripes[bioc->mirror_num - 1].dev;
btrfs_put_bioc(bioc);
if (!dev || !dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
ret = -EIO;
goto out_counter_dec;
}
bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = sector;
__bio_add_page(&bio, page, length, pg_offset);
btrfsic_check_bio(&bio);
ret = submit_bio_wait(&bio);
if (ret) {
/* try to remap that extent elsewhere? */
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
goto out_bio_uninit;
}
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
ino, start, btrfs_dev_name(dev), sector);
ret = 0;
out_bio_uninit:
bio_uninit(&bio);
out_counter_dec:
btrfs_bio_counter_dec(fs_info);
return ret;
}
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
if (sb_rdonly(fs_info->sb))
return -EROFS;
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
start - page_offset(p), mirror_num);
if (ret)
break;
start += PAGE_SIZE;
}
return ret;
}
static int next_mirror(const struct io_failure_record *failrec, int cur_mirror) static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
{ {
if (cur_mirror == failrec->num_copies) if (cur_mirror == failrec->num_copies)
...@@ -691,7 +578,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, ...@@ -691,7 +578,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
mirror = failrec->this_mirror; mirror = failrec->this_mirror;
do { do {
mirror = prev_mirror(failrec, mirror); mirror = prev_mirror(failrec, mirror);
repair_io_failure(fs_info, ino, start, failrec->len, btrfs_repair_io_failure(fs_info, ino, start, failrec->len,
failrec->logical, page, pg_offset, mirror); failrec->logical, page, pg_offset, mirror);
} while (mirror != failrec->failed_mirror); } while (mirror != failrec->failed_mirror);
...@@ -822,7 +709,7 @@ int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_ ...@@ -822,7 +709,7 @@ int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_
* *
* Since we're only doing repair for one sector, we only need to get * Since we're only doing repair for one sector, we only need to get
* a good copy of the failed sector and if we succeed, we have setup * a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us. * everything for btrfs_repair_io_failure to do the rest for us.
*/ */
failrec->this_mirror = next_mirror(failrec, failrec->this_mirror); failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
if (failrec->this_mirror == failrec->failed_mirror) { if (failrec->this_mirror == failrec->failed_mirror) {
......
...@@ -245,7 +245,6 @@ int extent_invalidate_folio(struct extent_io_tree *tree, ...@@ -245,7 +245,6 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array); int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end); void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
/* /*
* When IO fails, either with EIO or csum verification fails, we * When IO fails, either with EIO or csum verification fails, we
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment