Commit 898793d9 authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba

btrfs: zoned: write out partially allocated region

cow_file_range() works in an all-or-nothing way: if it fails to allocate an
extent for a part of the given region, it gives up all the region including
the successfully allocated parts. On cow_file_range(), run_delalloc_zoned()
writes data for the region only when it successfully allocate all the
region.

This all-or-nothing allocation and write-out are problematic when available
space in all the block groups are get tight with the active zone
restriction. btrfs_reserve_extent() try hard to utilize the left space in
the active block groups and gives up finally and fails with
-ENOSPC. However, if we send IOs for the successfully allocated region, we
can finish a zone and can continue on the rest of the allocation on a newly
allocated block group.

This patch implements the partial write-out for run_delalloc_zoned(). With
this patch applied, cow_file_range() returns -EAGAIN to tell the caller to
do something to progress the further allocation, and tells the successfully
allocated region with done_offset. Furthermore, the zoned extent allocator
returns -EAGAIN to tell cow_file_range() going back to the caller side.

Actually, we still need to wait for an IO to complete to continue the
allocation. The next patch implements that part.

CC: stable@vger.kernel.org # 5.16+
Fixes: afba2bc0 ("btrfs: zoned: implement active zone tracking")
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b6a98021
...@@ -3996,6 +3996,16 @@ static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info, ...@@ -3996,6 +3996,16 @@ static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size) if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
return -ENOSPC; return -ENOSPC;
/*
* Even min_alloc_size is not left in any block groups. Since we cannot
* activate a new block group, allocating it may not help. Let's tell a
* caller to try again and hope it progress something by writing some
* parts of the region. That is only possible for data block groups,
* where a part of the region can be written.
*/
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
return -EAGAIN;
/* /*
* We cannot activate a new block group and no enough space left in any * We cannot activate a new block group and no enough space left in any
* block groups. So, allocating a new block group may not help. But, * block groups. So, allocating a new block group may not help. But,
......
...@@ -117,7 +117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback); ...@@ -117,7 +117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static noinline int cow_file_range(struct btrfs_inode *inode, static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page, struct page *locked_page,
u64 start, u64 end, int *page_started, u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock); unsigned long *nr_written, int unlock,
u64 *done_offset);
static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start, u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len, u64 block_len, u64 orig_block_len,
...@@ -921,7 +922,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode, ...@@ -921,7 +922,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
* can directly submit them without interruption. * can directly submit them without interruption.
*/ */
ret = cow_file_range(inode, locked_page, start, end, &page_started, ret = cow_file_range(inode, locked_page, start, end, &page_started,
&nr_written, 0); &nr_written, 0, NULL);
/* Inline extent inserted, page gets unlocked and everything is done */ /* Inline extent inserted, page gets unlocked and everything is done */
if (page_started) { if (page_started) {
ret = 0; ret = 0;
...@@ -1170,7 +1171,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, ...@@ -1170,7 +1171,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
static noinline int cow_file_range(struct btrfs_inode *inode, static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page, struct page *locked_page,
u64 start, u64 end, int *page_started, u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock) unsigned long *nr_written, int unlock,
u64 *done_offset)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
...@@ -1363,6 +1365,21 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ...@@ -1363,6 +1365,21 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_unlock: out_unlock:
/*
* If done_offset is non-NULL and ret == -EAGAIN, we expect the
* caller to write out the successfully allocated region and retry.
*/
if (done_offset && ret == -EAGAIN) {
if (orig_start < start)
*done_offset = start - 1;
else
*done_offset = start;
return ret;
} else if (ret == -EAGAIN) {
/* Convert to -ENOSPC since the caller cannot retry. */
ret = -ENOSPC;
}
/* /*
* Now, we have three regions to clean up: * Now, we have three regions to clean up:
* *
...@@ -1608,19 +1625,37 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, ...@@ -1608,19 +1625,37 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
u64 end, int *page_started, u64 end, int *page_started,
unsigned long *nr_written) unsigned long *nr_written)
{ {
u64 done_offset = end;
int ret; int ret;
bool locked_page_done = false;
ret = cow_file_range(inode, locked_page, start, end, page_started, while (start <= end) {
nr_written, 0); ret = cow_file_range(inode, locked_page, start, end, page_started,
if (ret) nr_written, 0, &done_offset);
return ret; if (ret && ret != -EAGAIN)
return ret;
if (*page_started) if (*page_started) {
return 0; ASSERT(ret == 0);
return 0;
}
if (ret == 0)
done_offset = end;
if (done_offset == start)
return -ENOSPC;
if (!locked_page_done) {
__set_page_dirty_nobuffers(locked_page);
account_page_redirty(locked_page);
}
locked_page_done = true;
extent_write_locked_range(&inode->vfs_inode, start, done_offset);
start = done_offset + 1;
}
__set_page_dirty_nobuffers(locked_page);
account_page_redirty(locked_page);
extent_write_locked_range(&inode->vfs_inode, start, end);
*page_started = 1; *page_started = 1;
return 0; return 0;
...@@ -1712,7 +1747,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, ...@@ -1712,7 +1747,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
} }
return cow_file_range(inode, locked_page, start, end, page_started, return cow_file_range(inode, locked_page, start, end, page_started,
nr_written, 1); nr_written, 1, NULL);
} }
struct can_nocow_file_extent_args { struct can_nocow_file_extent_args {
...@@ -2185,7 +2220,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page ...@@ -2185,7 +2220,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
page_started, nr_written); page_started, nr_written);
else else
ret = cow_file_range(inode, locked_page, start, end, ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1); page_started, nr_written, 1, NULL);
} else { } else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags); set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
ret = cow_file_range_async(inode, wbc, locked_page, start, end, ret = cow_file_range_async(inode, wbc, locked_page, start, end,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment