Commit 13bb483d authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba

btrfs: zoned: activate metadata block group on write time

In the current implementation, block groups are activated at reservation
time to ensure that all reserved bytes can be written to an active metadata
block group. However, this approach has proven to be less efficient, as it
activates block groups more frequently than necessary, putting pressure on
the active zone resource and leading to potential issues such as early
ENOSPC or hung_task.

Another drawback of the current method is that it hampers metadata
over-commit, and necessitates additional flush operations and block group
allocations, resulting in decreased overall performance.

To address these issues, this commit introduces a write-time activation of
metadata and system block group. This involves reserving at least one
active block group specifically for a metadata and system block group.

Since metadata write-out is always allocated sequentially, when we need to
write to a non-active block group, we can wait for the ongoing IOs to
complete, activate a new block group, and then proceed with writing to the
new block group.

Fixes: b0931513 ("btrfs: zoned: activate metadata block group on flush_space")
CC: stable@vger.kernel.org # 6.1+
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent a7e1ac7b
...@@ -4287,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ...@@ -4287,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl; struct btrfs_caching_control *caching_ctl;
struct rb_node *n; struct rb_node *n;
if (btrfs_is_zoned(info)) {
if (info->active_meta_bg) {
btrfs_put_block_group(info->active_meta_bg);
info->active_meta_bg = NULL;
}
if (info->active_system_bg) {
btrfs_put_block_group(info->active_system_bg);
info->active_system_bg = NULL;
}
}
write_lock(&info->block_group_cache_lock); write_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) { while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next, caching_ctl = list_entry(info->caching_block_groups.next,
......
...@@ -770,6 +770,9 @@ struct btrfs_fs_info { ...@@ -770,6 +770,9 @@ struct btrfs_fs_info {
u64 data_reloc_bg; u64 data_reloc_bg;
struct mutex zoned_data_reloc_io_lock; struct mutex zoned_data_reloc_io_lock;
struct btrfs_block_group *active_meta_bg;
struct btrfs_block_group *active_system_bg;
u64 nr_global_roots; u64 nr_global_roots;
spinlock_t zone_active_bgs_lock; spinlock_t zone_active_bgs_lock;
......
...@@ -65,6 +65,9 @@ ...@@ -65,6 +65,9 @@
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT) #define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
static void wait_eb_writebacks(struct btrfs_block_group *block_group);
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
static inline bool sb_zone_is_full(const struct blk_zone *zone) static inline bool sb_zone_is_full(const struct blk_zone *zone)
{ {
return (zone->cond == BLK_ZONE_COND_FULL) || return (zone->cond == BLK_ZONE_COND_FULL) ||
...@@ -1747,6 +1750,62 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered) ...@@ -1747,6 +1750,62 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
} }
} }
static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
struct btrfs_block_group **active_bg)
{
const struct writeback_control *wbc = ctx->wbc;
struct btrfs_block_group *block_group = ctx->zoned_bg;
struct btrfs_fs_info *fs_info = block_group->fs_info;
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
return true;
if (fs_info->treelog_bg == block_group->start) {
if (!btrfs_zone_activate(block_group)) {
int ret_fin = btrfs_zone_finish_one_bg(fs_info);
if (ret_fin != 1 || !btrfs_zone_activate(block_group))
return false;
}
} else if (*active_bg != block_group) {
struct btrfs_block_group *tgt = *active_bg;
/* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
lockdep_assert_held(&fs_info->zoned_meta_io_lock);
if (tgt) {
/*
* If there is an unsent IO left in the allocated area,
* we cannot wait for them as it may cause a deadlock.
*/
if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
if (wbc->sync_mode == WB_SYNC_NONE ||
(wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
return false;
}
/* Pivot active metadata/system block group. */
btrfs_zoned_meta_io_unlock(fs_info);
wait_eb_writebacks(tgt);
do_zone_finish(tgt, true);
btrfs_zoned_meta_io_lock(fs_info);
if (*active_bg == tgt) {
btrfs_put_block_group(tgt);
*active_bg = NULL;
}
}
if (!btrfs_zone_activate(block_group))
return false;
if (*active_bg != block_group) {
ASSERT(*active_bg == NULL);
*active_bg = block_group;
btrfs_get_block_group(block_group);
}
}
return true;
}
/* /*
* Check if @ctx->eb is aligned to the write pointer. * Check if @ctx->eb is aligned to the write pointer.
* *
...@@ -1781,9 +1840,27 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, ...@@ -1781,9 +1840,27 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
ctx->zoned_bg = block_group; ctx->zoned_bg = block_group;
} }
if (block_group->meta_write_pointer == eb->start) if (block_group->meta_write_pointer == eb->start) {
struct btrfs_block_group **tgt;
if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
return 0; return 0;
if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
tgt = &fs_info->active_system_bg;
else
tgt = &fs_info->active_meta_bg;
if (check_bg_is_active(ctx, tgt))
return 0;
}
/*
* Since we may release fs_info->zoned_meta_io_lock, someone can already
* start writing this eb. In that case, we can just bail out.
*/
if (block_group->meta_write_pointer > eb->start)
return -EBUSY;
/* If for_sync, this hole will be filled with trasnsaction commit. */ /* If for_sync, this hole will be filled with trasnsaction commit. */
if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
return -EAGAIN; return -EAGAIN;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment