Commit 1009254b authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub

Implement the only missing part for scrub: RAID56 P/Q stripe scrub.

The workflow is pretty straightforward for the new function,
scrub_raid56_parity_stripe():

- Go through the regular scrub path for each data stripe

- Wait for the verification and repair to finish

- Writeback the repaired sectors to data stripes

- Make sure all stripes are properly repaired
  If we have sectors unrepaired, we cannot continue, or we could further
  corrupt the P/Q stripe.

- Submit the rbio for P/Q stripe
  The dev-replace would be handled inside
  raid56_parity_submit_scrub_rbio() path.

- Wait for the above bio to finish

Although the old code is no longer used, we still keep the declaration,
as the cleanup can be several times larger than this patch itself.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent e02ee89b
...@@ -98,6 +98,13 @@ enum scrub_stripe_flags { ...@@ -98,6 +98,13 @@ enum scrub_stripe_flags {
/* Set when the read-repair is finished. */ /* Set when the read-repair is finished. */
SCRUB_STRIPE_FLAG_REPAIR_DONE, SCRUB_STRIPE_FLAG_REPAIR_DONE,
/*
* Set for data stripes if it's triggered from P/Q stripe.
* During such scrub, we should not report errors in data stripes, nor
* update the accounting.
*/
SCRUB_STRIPE_FLAG_NO_REPORT,
}; };
#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE) #define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
...@@ -279,6 +286,7 @@ struct scrub_parity { ...@@ -279,6 +286,7 @@ struct scrub_parity {
struct scrub_ctx { struct scrub_ctx {
struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX]; struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
struct scrub_stripe *raid56_data_stripes;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
int first_free; int first_free;
int curr; int curr;
...@@ -2490,6 +2498,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx, ...@@ -2490,6 +2498,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
int nr_repaired_sectors = 0; int nr_repaired_sectors = 0;
int sector_nr; int sector_nr;
if (test_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state))
return;
/* /*
* Init needed infos for error reporting. * Init needed infos for error reporting.
* *
...@@ -3799,11 +3810,8 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx, ...@@ -3799,11 +3810,8 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
return ret; return ret;
} }
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, int scrub_raid56_parity(struct scrub_ctx *sctx, struct map_lookup *map,
struct map_lookup *map, struct btrfs_device *sdev, u64 logic_start, u64 logic_end)
struct btrfs_device *sdev,
u64 logic_start,
u64 logic_end)
{ {
struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_path *path; struct btrfs_path *path;
...@@ -4171,6 +4179,11 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx) ...@@ -4171,6 +4179,11 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx)
sctx->cur_stripe = 0; sctx->cur_stripe = 0;
} }
static void raid56_scrub_wait_endio(struct bio *bio)
{
complete(bio->bi_private);
}
static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg, static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
struct btrfs_device *dev, int mirror_num, struct btrfs_device *dev, int mirror_num,
u64 logical, u32 length, u64 physical) u64 logical, u32 length, u64 physical)
...@@ -4195,6 +4208,165 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * ...@@ -4195,6 +4208,165 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
return 0; return 0;
} }
static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
struct btrfs_block_group *bg,
struct map_lookup *map,
u64 full_stripe_start)
{
DECLARE_COMPLETION_ONSTACK(io_done);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_raid_bio *rbio;
struct btrfs_io_context *bioc = NULL;
struct bio *bio;
struct scrub_stripe *stripe;
bool all_empty = true;
const int data_stripes = nr_data_stripes(map);
unsigned long extent_bitmap = 0;
u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT;
int ret;
ASSERT(sctx->raid56_data_stripes);
for (int i = 0; i < data_stripes; i++) {
int stripe_index;
int rot;
u64 physical;
stripe = &sctx->raid56_data_stripes[i];
rot = div_u64(full_stripe_start - bg->start,
data_stripes) >> BTRFS_STRIPE_LEN_SHIFT;
stripe_index = (i + rot) % map->num_stripes;
physical = map->stripes[stripe_index].physical +
(rot << BTRFS_STRIPE_LEN_SHIFT);
scrub_reset_stripe(stripe);
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
ret = scrub_find_fill_first_stripe(bg,
map->stripes[stripe_index].dev, physical, 1,
full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT),
BTRFS_STRIPE_LEN, stripe);
if (ret < 0)
goto out;
/*
* No extent in this data stripe, need to manually mark them
* initialized to make later read submission happy.
*/
if (ret > 0) {
stripe->logical = full_stripe_start +
(i << BTRFS_STRIPE_LEN_SHIFT);
stripe->dev = map->stripes[stripe_index].dev;
stripe->mirror_num = 1;
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
}
}
/* Check if all data stripes are empty. */
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) {
all_empty = false;
break;
}
}
if (all_empty) {
ret = 0;
goto out;
}
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
scrub_submit_initial_read(sctx, stripe);
}
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
wait_event(stripe->repair_wait,
test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
}
/* For now, no zoned support for RAID56. */
ASSERT(!btrfs_is_zoned(sctx->fs_info));
/* Writeback for the repaired sectors. */
for (int i = 0; i < data_stripes; i++) {
unsigned long repaired;
stripe = &sctx->raid56_data_stripes[i];
bitmap_andnot(&repaired, &stripe->init_error_bitmap,
&stripe->error_bitmap, stripe->nr_sectors);
scrub_write_sectors(sctx, stripe, repaired, false);
}
/* Wait for the above writebacks to finish. */
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
wait_scrub_stripe_io(stripe);
}
/*
* Now all data stripes are properly verified. Check if we have any
* unrepaired, if so abort immediately or we could further corrupt the
* P/Q stripes.
*
* During the loop, also populate extent_bitmap.
*/
for (int i = 0; i < data_stripes; i++) {
unsigned long error;
stripe = &sctx->raid56_data_stripes[i];
/*
* We should only check the errors where there is an extent.
* As we may hit an empty data stripe while it's missing.
*/
bitmap_and(&error, &stripe->error_bitmap,
&stripe->extent_sector_bitmap, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
btrfs_err(fs_info,
"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
&error);
ret = -EIO;
goto out;
}
bitmap_or(&extent_bitmap, &extent_bitmap,
&stripe->extent_sector_bitmap, stripe->nr_sectors);
}
/* Now we can check and regenerate the P/Q stripe. */
bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS);
bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
bio->bi_private = &io_done;
bio->bi_end_io = raid56_scrub_wait_endio;
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
&length, &bioc);
if (ret < 0) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
goto out;
}
rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap,
BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
btrfs_put_bioc(bioc);
if (!rbio) {
ret = -ENOMEM;
btrfs_bio_counter_dec(fs_info);
goto out;
}
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
ret = blk_status_to_errno(bio->bi_status);
bio_put(bio);
btrfs_bio_counter_dec(fs_info);
out:
return ret;
}
/* /*
* Scrub one range which can only has simple mirror based profile. * Scrub one range which can only has simple mirror based profile.
* (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
...@@ -4368,7 +4540,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -4368,7 +4540,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
/* Offset inside the chunk */ /* Offset inside the chunk */
u64 offset; u64 offset;
u64 stripe_logical; u64 stripe_logical;
u64 stripe_end;
int stop_loop = 0; int stop_loop = 0;
wait_event(sctx->list_wait, wait_event(sctx->list_wait,
...@@ -4383,6 +4554,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -4383,6 +4554,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
sctx->flush_all_writes = true; sctx->flush_all_writes = true;
} }
/* Prepare the extra data stripes used by RAID56. */
if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ASSERT(sctx->raid56_data_stripes == NULL);
sctx->raid56_data_stripes = kcalloc(nr_data_stripes(map),
sizeof(struct scrub_stripe),
GFP_KERNEL);
if (!sctx->raid56_data_stripes) {
ret = -ENOMEM;
goto out;
}
for (int i = 0; i < nr_data_stripes(map); i++) {
ret = init_scrub_stripe(fs_info,
&sctx->raid56_data_stripes[i]);
if (ret < 0)
goto out;
sctx->raid56_data_stripes[i].bg = bg;
sctx->raid56_data_stripes[i].sctx = sctx;
}
}
/* /*
* There used to be a big double loop to handle all profiles using the * There used to be a big double loop to handle all profiles using the
* same routine, which grows larger and more gross over time. * same routine, which grows larger and more gross over time.
...@@ -4436,10 +4627,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -4436,10 +4627,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (ret) { if (ret) {
/* it is parity strip */ /* it is parity strip */
stripe_logical += chunk_logical; stripe_logical += chunk_logical;
stripe_end = stripe_logical + increment; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg,
ret = scrub_raid56_parity(sctx, map, scrub_dev, map, stripe_logical);
stripe_logical,
stripe_end);
if (ret) if (ret)
goto out; goto out;
goto next; goto next;
...@@ -4477,6 +4666,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -4477,6 +4666,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
scrub_wr_submit(sctx); scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_lock); mutex_unlock(&sctx->wr_lock);
flush_scrub_stripes(sctx); flush_scrub_stripes(sctx);
if (sctx->raid56_data_stripes) {
for (int i = 0; i < nr_data_stripes(map); i++)
release_scrub_stripe(&sctx->raid56_data_stripes[i]);
kfree(sctx->raid56_data_stripes);
sctx->raid56_data_stripes = NULL;
}
if (sctx->is_dev_replace && ret >= 0) { if (sctx->is_dev_replace && ret >= 0) {
int ret2; int ret2;
......
...@@ -13,4 +13,9 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev); ...@@ -13,4 +13,9 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress); struct btrfs_scrub_progress *progress);
/* Temporary declaration, would be deleted later. */
int scrub_raid56_parity(struct scrub_ctx *sctx, struct map_lookup *map,
struct btrfs_device *sdev, u64 logic_start,
u64 logic_end);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment