Commit 020d5b73 authored by Filipe Manana's avatar Filipe Manana Committed by Chris Mason

Btrfs: fix race between scrub and block group deletion

Scrub can race with the cleaner kthread deleting block groups that are
unused (and with relocation too) leading to a failure with error -EINVAL
that gets returned to user space.

The following diagram illustrates how it happens:

              CPU 1                                 CPU 2

 cleaner kthread
   btrfs_delete_unused_bgs()

     gets block group X from
     fs_info->unused_bgs

     sets block group to RO

       btrfs_remove_chunk(bg X)

         deletes device extents

                                         scrub_enumerate_chunks()

                                           searches device tree using
                                           its commit root

                                           finds device extent for
                                           block group X

                                           gets block group X from the tree
                                           fs_info->block_group_cache_tree
                                           (via btrfs_lookup_block_group())

                                           sets bg X to RO (again)

          btrfs_remove_block_group(bg X)

            deletes block group from
            fs_info->block_group_cache_tree

            removes extent map from
            fs_info->mapping_tree

                                               scrub_chunk(offset X)

                                                 searches fs_info->mapping_tree
                                                 for extent map starting at
                                                 offset X

                                                    --> doesn't find any such
                                                        extent map
                                                    --> returns -EINVAL and scrub
                                                        errors out to userspace
                                                        with -EINVAL

Fix this by dealing with an extent map lookup failure as an indicator of
block group deletion.
Issue reproduced with fstest btrfs/071.
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 31388ab2
......@@ -3432,7 +3432,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
u64 chunk_offset, u64 length,
u64 dev_offset, int is_dev_replace)
u64 dev_offset,
struct btrfs_block_group_cache *cache,
int is_dev_replace)
{
struct btrfs_mapping_tree *map_tree =
&sctx->dev_root->fs_info->mapping_tree;
......@@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
read_unlock(&map_tree->map_tree.lock);
if (!em)
return -EINVAL;
if (!em) {
/*
* Might have been an unused block group deleted by the cleaner
* kthread or relocation.
*/
spin_lock(&cache->lock);
if (!cache->removed)
ret = -EINVAL;
spin_unlock(&cache->lock);
return ret;
}
map = (struct map_lookup *)em->bdev;
if (em->start != chunk_offset)
......@@ -3592,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, is_dev_replace);
found_key.offset, cache, is_dev_replace);
/*
* flush, submit all pending read and write bios, afterwards
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment