Commit a785fd28 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.5-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "More fixes, some of them going back to older releases and there are
  fixes for hangs in stress tests regarding space caching:

   - fixes and progress tracking for hangs in free space caching, found
     by test generic/475

   - writeback fixes, write pages in integrity mode and skip writing
     pages that have been written meanwhile

   - properly clear end of extent range after an error

   - relocation fixes:
      - fix race betwen qgroup tree creation and relocation
      - detect and report invalid reloc roots"

* tag 'for-6.5-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: set cache_block_group_error if we find an error
  btrfs: reject invalid reloc tree root keys with stack dump
  btrfs: exit gracefully if reloc roots don't match
  btrfs: avoid race between qgroup tree creation and relocation
  btrfs: properly clear end of the unreserved range in cow_file_range
  btrfs: don't wait for writeback on clean pages in extent_write_cache_pages
  btrfs: don't stop integrity writeback too early
  btrfs: wait for actual caching progress during allocation
parents ae545c32 92fb94b6
...@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, ...@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes) u64 num_bytes)
{ {
struct btrfs_caching_control *caching_ctl; struct btrfs_caching_control *caching_ctl;
int progress;
caching_ctl = btrfs_get_caching_control(cache); caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl) if (!caching_ctl)
return; return;
/*
* We've already failed to allocate from this block group, so even if
* there's enough space in the block group it isn't contiguous enough to
* allow for an allocation, so wait for at least the next wakeup tick,
* or for the thing to be done.
*/
progress = atomic_read(&caching_ctl->progress);
wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
(cache->free_space_ctl->free_space >= num_bytes)); (progress != atomic_read(&caching_ctl->progress) &&
(cache->free_space_ctl->free_space >= num_bytes)));
btrfs_put_caching_control(caching_ctl); btrfs_put_caching_control(caching_ctl);
} }
...@@ -802,8 +812,10 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) ...@@ -802,8 +812,10 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
if (total_found > CACHING_CTL_WAKE_UP) { if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0; total_found = 0;
if (wakeup) if (wakeup) {
atomic_inc(&caching_ctl->progress);
wake_up(&caching_ctl->wait); wake_up(&caching_ctl->wait);
}
} }
} }
path->slots[0]++; path->slots[0]++;
...@@ -910,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) ...@@ -910,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
init_waitqueue_head(&caching_ctl->wait); init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache; caching_ctl->block_group = cache;
refcount_set(&caching_ctl->count, 2); refcount_set(&caching_ctl->count, 2);
atomic_set(&caching_ctl->progress, 0);
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock); spin_lock(&cache->lock);
......
...@@ -90,6 +90,8 @@ struct btrfs_caching_control { ...@@ -90,6 +90,8 @@ struct btrfs_caching_control {
wait_queue_head_t wait; wait_queue_head_t wait;
struct btrfs_work work; struct btrfs_work work;
struct btrfs_block_group *block_group; struct btrfs_block_group *block_group;
/* Track progress of caching during allocation. */
atomic_t progress;
refcount_t count; refcount_t count;
}; };
......
...@@ -1103,7 +1103,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) ...@@ -1103,7 +1103,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
btrfs_drew_lock_init(&root->snapshot_lock); btrfs_drew_lock_init(&root->snapshot_lock);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root)) { !btrfs_is_data_reloc_root(root) &&
is_fstree(root->root_key.objectid)) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state); set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item); btrfs_check_and_init_root_item(&root->root_item);
} }
...@@ -1300,6 +1301,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, ...@@ -1300,6 +1301,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
root = btrfs_get_global_root(fs_info, objectid); root = btrfs_get_global_root(fs_info, objectid);
if (root) if (root)
return root; return root;
/*
* If we're called for non-subvolume trees, and above function didn't
* find one, do not try to read it from disk.
*
* This is namely for free-space-tree and quota tree, which can change
* at runtime and should only be grabbed from fs_info.
*/
if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
return ERR_PTR(-ENOENT);
again: again:
root = btrfs_lookup_fs_root(fs_info, objectid); root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) { if (root) {
......
...@@ -4310,8 +4310,11 @@ static noinline int find_free_extent(struct btrfs_root *root, ...@@ -4310,8 +4310,11 @@ static noinline int find_free_extent(struct btrfs_root *root,
ret = 0; ret = 0;
} }
if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
if (!cache_block_group_error)
cache_block_group_error = -EIO;
goto loop; goto loop;
}
if (!find_free_extent_check_size_class(ffe_ctl, block_group)) if (!find_free_extent_check_size_class(ffe_ctl, block_group))
goto loop; goto loop;
......
...@@ -2145,6 +2145,12 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -2145,6 +2145,12 @@ static int extent_write_cache_pages(struct address_space *mapping,
continue; continue;
} }
if (!folio_test_dirty(folio)) {
/* Someone wrote it for us. */
folio_unlock(folio);
continue;
}
if (wbc->sync_mode != WB_SYNC_NONE) { if (wbc->sync_mode != WB_SYNC_NONE) {
if (folio_test_writeback(folio)) if (folio_test_writeback(folio))
submit_write_bio(bio_ctrl, 0); submit_write_bio(bio_ctrl, 0);
...@@ -2164,11 +2170,12 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -2164,11 +2170,12 @@ static int extent_write_cache_pages(struct address_space *mapping,
} }
/* /*
* the filesystem may choose to bump up nr_to_write. * The filesystem may choose to bump up nr_to_write.
* We have to make sure to honor the new nr_to_write * We have to make sure to honor the new nr_to_write
* at any time * at any time.
*/ */
nr_to_write_done = wbc->nr_to_write <= 0; nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
wbc->nr_to_write <= 0);
} }
folio_batch_release(&fbatch); folio_batch_release(&fbatch);
cond_resched(); cond_resched();
......
...@@ -1654,8 +1654,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ...@@ -1654,8 +1654,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
clear_bits, clear_bits,
page_ops); page_ops);
start += cur_alloc_size; start += cur_alloc_size;
if (start >= end)
return ret;
} }
/* /*
...@@ -1664,9 +1662,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ...@@ -1664,9 +1662,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* space_info's bytes_may_use counter, reserved in * space_info's bytes_may_use counter, reserved in
* btrfs_check_data_free_space(). * btrfs_check_data_free_space().
*/ */
extent_clear_unlock_delalloc(inode, start, end, locked_page, if (start < end) {
clear_bits | EXTENT_CLEAR_DATA_RESV, clear_bits |= EXTENT_CLEAR_DATA_RESV;
page_ops); extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
}
return ret; return ret;
} }
......
...@@ -1916,7 +1916,39 @@ int prepare_to_merge(struct reloc_control *rc, int err) ...@@ -1916,7 +1916,39 @@ int prepare_to_merge(struct reloc_control *rc, int err)
err = PTR_ERR(root); err = PTR_ERR(root);
break; break;
} }
ASSERT(root->reloc_root == reloc_root);
if (unlikely(root->reloc_root != reloc_root)) {
if (root->reloc_root) {
btrfs_err(fs_info,
"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
root->root_key.objectid,
root->reloc_root->root_key.objectid,
root->reloc_root->root_key.type,
root->reloc_root->root_key.offset,
btrfs_root_generation(
&root->reloc_root->root_item),
reloc_root->root_key.objectid,
reloc_root->root_key.type,
reloc_root->root_key.offset,
btrfs_root_generation(
&reloc_root->root_item));
} else {
btrfs_err(fs_info,
"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
root->root_key.objectid,
reloc_root->root_key.objectid,
reloc_root->root_key.type,
reloc_root->root_key.offset,
btrfs_root_generation(
&reloc_root->root_item));
}
list_add(&reloc_root->root_list, &reloc_roots);
btrfs_put_root(root);
btrfs_abort_transaction(trans, -EUCLEAN);
if (!err)
err = -EUCLEAN;
break;
}
/* /*
* set reference count to 1, so btrfs_recover_relocation * set reference count to 1, so btrfs_recover_relocation
...@@ -1989,7 +2021,7 @@ void merge_reloc_roots(struct reloc_control *rc) ...@@ -1989,7 +2021,7 @@ void merge_reloc_roots(struct reloc_control *rc)
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
false); false);
if (btrfs_root_refs(&reloc_root->root_item) > 0) { if (btrfs_root_refs(&reloc_root->root_item) > 0) {
if (IS_ERR(root)) { if (WARN_ON(IS_ERR(root))) {
/* /*
* For recovery we read the fs roots on mount, * For recovery we read the fs roots on mount,
* and if we didn't find the root then we marked * and if we didn't find the root then we marked
...@@ -1998,17 +2030,14 @@ void merge_reloc_roots(struct reloc_control *rc) ...@@ -1998,17 +2030,14 @@ void merge_reloc_roots(struct reloc_control *rc)
* memory. However there's no reason we can't * memory. However there's no reason we can't
* handle the error properly here just in case. * handle the error properly here just in case.
*/ */
ASSERT(0);
ret = PTR_ERR(root); ret = PTR_ERR(root);
goto out; goto out;
} }
if (root->reloc_root != reloc_root) { if (WARN_ON(root->reloc_root != reloc_root)) {
/* /*
* This is actually impossible without something * This can happen if on-disk metadata has some
* going really wrong (like weird race condition * corruption, e.g. bad reloc tree key offset.
* or cosmic rays).
*/ */
ASSERT(0);
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
......
...@@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, ...@@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
btrfs_item_key_to_cpu(leaf, &item_key, slot); btrfs_item_key_to_cpu(leaf, &item_key, slot);
is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
/*
* Bad rootid for reloc trees.
*
* Reloc trees are only for subvolume trees, other trees only need
* to be COWed to be relocated.
*/
if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
!is_fstree(key->offset))) {
generic_err(leaf, slot,
"invalid reloc tree for root %lld, root id is not a subvolume tree",
key->offset);
return -EUCLEAN;
}
/* No such tree id */ /* No such tree id */
if (unlikely(key->objectid == 0)) { if (unlikely(key->objectid == 0)) {
if (is_root_item) if (is_root_item)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment