Commit fc7cbcd4 authored by David Sterba's avatar David Sterba

Revert "btrfs: turn fs_roots_radix in btrfs_fs_info into an XArray"

This reverts commit 48b36a60.

Revert the xarray conversion, there's a problem with potential
sleep-inside-spinlock [1] when calling xa_insert that triggers GFP_NOFS
allocation. The radix tree used the preloading mechanism to avoid
sleeping but this is not available in xarray.

Conversion from spin lock to mutex is possible but at time of rc6 is
riskier than a clean revert.

[1] https://lore.kernel.org/linux-btrfs/cover.1657097693.git.fdmanana@suse.com/Reported-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b3a3b025
......@@ -675,9 +675,8 @@ struct btrfs_fs_info {
rwlock_t global_root_lock;
struct rb_root global_root_tree;
/* The xarray that holds all the FS roots */
spinlock_t fs_roots_lock;
struct xarray fs_roots;
spinlock_t fs_roots_radix_lock;
struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
rwlock_t block_group_cache_lock;
......@@ -1119,8 +1118,7 @@ enum {
*/
BTRFS_ROOT_SHAREABLE,
BTRFS_ROOT_TRACK_DIRTY,
/* The root is tracked in fs_info::fs_roots */
BTRFS_ROOT_REGISTERED,
BTRFS_ROOT_IN_RADIX,
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
BTRFS_ROOT_DEFRAG_RUNNING,
BTRFS_ROOT_FORCE_COW,
......
......@@ -5,6 +5,7 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
......@@ -1210,9 +1211,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
#ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&root->leak_list);
spin_lock(&fs_info->fs_roots_lock);
spin_lock(&fs_info->fs_roots_radix_lock);
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
}
......@@ -1659,11 +1660,12 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *root;
spin_lock(&fs_info->fs_roots_lock);
root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)root_id);
if (root)
root = btrfs_grab_root(root);
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
return root;
}
......@@ -1705,14 +1707,20 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
{
int ret;
spin_lock(&fs_info->fs_roots_lock);
ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
root, GFP_NOFS);
ret = radix_tree_preload(GFP_NOFS);
if (ret)
return ret;
spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
if (ret == 0) {
btrfs_grab_root(root);
set_bit(BTRFS_ROOT_REGISTERED, &root->state);
set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
}
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
return ret;
}
......@@ -2342,9 +2350,9 @@ void btrfs_put_root(struct btrfs_root *root)
btrfs_drew_lock_destroy(&root->snapshot_lock);
free_root_extent_buffers(root);
#ifdef CONFIG_BTRFS_DEBUG
spin_lock(&root->fs_info->fs_roots_lock);
spin_lock(&root->fs_info->fs_roots_radix_lock);
list_del_init(&root->leak_list);
spin_unlock(&root->fs_info->fs_roots_lock);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
#endif
kfree(root);
}
......@@ -2352,21 +2360,28 @@ void btrfs_put_root(struct btrfs_root *root)
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
unsigned long index = 0;
int ret;
struct btrfs_root *gang[8];
int i;
while (!list_empty(&fs_info->dead_roots)) {
root = list_entry(fs_info->dead_roots.next,
struct btrfs_root, root_list);
list_del(&root->root_list);
gang[0] = list_entry(fs_info->dead_roots.next,
struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
btrfs_drop_and_free_fs_root(fs_info, root);
btrfs_put_root(root);
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
btrfs_drop_and_free_fs_root(fs_info, gang[0]);
btrfs_put_root(gang[0]);
}
xa_for_each(&fs_info->fs_roots, index, root) {
btrfs_drop_and_free_fs_root(fs_info, root);
while (1) {
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, 0,
ARRAY_SIZE(gang));
if (!ret)
break;
for (i = 0; i < ret; i++)
btrfs_drop_and_free_fs_root(fs_info, gang[i]);
}
}
......@@ -3134,7 +3149,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
......@@ -3143,7 +3158,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->fs_roots_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
......@@ -3374,7 +3389,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
/*
* btrfs_find_orphan_roots() is responsible for finding all the dead
* roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
* them into the fs_info->fs_roots. This must be done before
* them into the fs_info->fs_roots_radix tree. This must be done before
* calling btrfs_orphan_cleanup() on the tree root. If we don't do it
* first, then btrfs_orphan_cleanup() will delete a dead root's orphan
* item before the root's tree is deleted - this means that if we unmount
......@@ -4498,11 +4513,12 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
{
bool drop_ref = false;
spin_lock(&fs_info->fs_roots_lock);
xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
drop_ref = true;
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
if (BTRFS_FS_ERROR(fs_info)) {
ASSERT(root->log_root == NULL);
......@@ -4518,48 +4534,50 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *roots[8];
unsigned long index = 0;
int i;
u64 root_objectid = 0;
struct btrfs_root *gang[8];
int i = 0;
int err = 0;
int grabbed;
unsigned int ret = 0;
while (1) {
struct btrfs_root *root;
spin_lock(&fs_info->fs_roots_lock);
if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
spin_unlock(&fs_info->fs_roots_lock);
return err;
spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang));
if (!ret) {
spin_unlock(&fs_info->fs_roots_radix_lock);
break;
}
root_objectid = gang[ret - 1]->root_key.objectid + 1;
grabbed = 0;
xa_for_each_start(&fs_info->fs_roots, index, root, index) {
/* Avoid grabbing roots in dead_roots */
if (btrfs_root_refs(&root->root_item) > 0)
roots[grabbed++] = btrfs_grab_root(root);
if (grabbed >= ARRAY_SIZE(roots))
break;
for (i = 0; i < ret; i++) {
/* Avoid to grab roots in dead_roots */
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
gang[i] = NULL;
continue;
}
/* grab all the search result for later use */
gang[i] = btrfs_grab_root(gang[i]);
}
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
for (i = 0; i < grabbed; i++) {
if (!roots[i])
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
index = roots[i]->root_key.objectid;
err = btrfs_orphan_cleanup(roots[i]);
root_objectid = gang[i]->root_key.objectid;
err = btrfs_orphan_cleanup(gang[i]);
if (err)
goto out;
btrfs_put_root(roots[i]);
break;
btrfs_put_root(gang[i]);
}
index++;
root_objectid++;
}
out:
/* Release the roots that remain uncleaned due to error */
for (; i < grabbed; i++) {
if (roots[i])
btrfs_put_root(roots[i]);
/* release the uncleaned roots due to error */
for (; i < ret; i++) {
if (gang[i])
btrfs_put_root(gang[i]);
}
return err;
}
......@@ -4878,28 +4896,31 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
{
unsigned long index = 0;
int grabbed = 0;
struct btrfs_root *roots[8];
struct btrfs_root *gang[8];
u64 root_objectid = 0;
int ret;
spin_lock(&fs_info->fs_roots_radix_lock);
while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang))) != 0) {
int i;
spin_lock(&fs_info->fs_roots_lock);
while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
ULONG_MAX, 8, XA_PRESENT))) {
for (int i = 0; i < grabbed; i++)
roots[i] = btrfs_grab_root(roots[i]);
spin_unlock(&fs_info->fs_roots_lock);
for (i = 0; i < ret; i++)
gang[i] = btrfs_grab_root(gang[i]);
spin_unlock(&fs_info->fs_roots_radix_lock);
for (int i = 0; i < grabbed; i++) {
if (!roots[i])
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
index = roots[i]->root_key.objectid;
btrfs_free_log(NULL, roots[i]);
btrfs_put_root(roots[i]);
root_objectid = gang[i]->root_key.objectid;
btrfs_free_log(NULL, gang[i]);
btrfs_put_root(gang[i]);
}
index++;
spin_lock(&fs_info->fs_roots_lock);
root_objectid++;
spin_lock(&fs_info->fs_roots_radix_lock);
}
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log_root_tree(NULL, fs_info);
}
......
......@@ -5829,7 +5829,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
btrfs_qgroup_free_meta_all_pertrans(root);
if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
btrfs_add_dropped_root(trans, root);
else
btrfs_put_root(root);
......
......@@ -3578,7 +3578,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0;
/* Bail out if the cleanup is already running. */
if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
return 0;
......@@ -3661,17 +3660,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*
* btrfs_find_orphan_roots() ran before us, which has
* found all deleted roots and loaded them into
* fs_info->fs_roots. So here we can find if an
* fs_info->fs_roots_radix. So here we can find if an
* orphan item corresponds to a deleted root by looking
* up the root from that xarray.
* up the root from that radix tree.
*/
spin_lock(&fs_info->fs_roots_lock);
dead_root = xa_load(&fs_info->fs_roots,
(unsigned long)found_key.objectid);
spin_lock(&fs_info->fs_roots_radix_lock);
dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)found_key.objectid);
if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
is_dead_root = 1;
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
if (is_dead_root) {
/* prevent this orphan from being found again */
......
......@@ -186,7 +186,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
if (!root)
return;
/* Will be freed by btrfs_free_fs_roots */
if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state)))
if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
return;
btrfs_global_root_delete(root);
btrfs_put_root(root);
......
......@@ -23,7 +23,7 @@
#include "space-info.h"
#include "zoned.h"
#define BTRFS_ROOT_TRANS_TAG XA_MARK_0
#define BTRFS_ROOT_TRANS_TAG 0
/*
* Transaction states and transitions
......@@ -437,15 +437,15 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
*/
smp_wmb();
spin_lock(&fs_info->fs_roots_lock);
spin_lock(&fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid && !force) {
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
xa_set_mark(&fs_info->fs_roots,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_lock);
radix_tree_tag_set(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_radix_lock);
root->last_trans = trans->transid;
/* this is pretty tricky. We don't want to
......@@ -487,9 +487,11 @@ void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
spin_unlock(&cur_trans->dropped_roots_lock);
/* Make sure we don't try to update the root at commit time */
xa_clear_mark(&fs_info->fs_roots,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_radix_lock);
}
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
......@@ -1402,8 +1404,9 @@ void btrfs_add_dead_root(struct btrfs_root *root)
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root;
unsigned long index;
struct btrfs_root *gang[8];
int i;
int ret;
/*
* At this point no one can be using this transaction to modify any tree
......@@ -1411,46 +1414,57 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
*/
ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
spin_lock(&fs_info->fs_roots_lock);
xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) {
int ret;
/*
* At this point we can neither have tasks logging inodes
* from a root nor trying to commit a log tree.
*/
ASSERT(atomic_read(&root->log_writers) == 0);
ASSERT(atomic_read(&root->log_commit[0]) == 0);
ASSERT(atomic_read(&root->log_commit[1]) == 0);
xa_clear_mark(&fs_info->fs_roots,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_lock);
btrfs_free_log(trans, root);
ret = btrfs_update_reloc_root(trans, root);
if (ret)
return ret;
/* See comments in should_cow_block() */
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_mb__after_atomic();
spin_lock(&fs_info->fs_roots_radix_lock);
while (1) {
ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
(void **)gang, 0,
ARRAY_SIZE(gang),
BTRFS_ROOT_TRANS_TAG);
if (ret == 0)
break;
for (i = 0; i < ret; i++) {
struct btrfs_root *root = gang[i];
int ret2;
/*
* At this point we can neither have tasks logging inodes
* from a root nor trying to commit a log tree.
*/
ASSERT(atomic_read(&root->log_writers) == 0);
ASSERT(atomic_read(&root->log_commit[0]) == 0);
ASSERT(atomic_read(&root->log_commit[1]) == 0);
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
ret2 = btrfs_update_reloc_root(trans, root);
if (ret2)
return ret2;
/* see comments in should_cow_block() */
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_mb__after_atomic();
if (root->commit_root != root->node) {
list_add_tail(&root->dirty_list,
&trans->transaction->switch_commits);
btrfs_set_root_node(&root->root_item,
root->node);
}
if (root->commit_root != root->node) {
list_add_tail(&root->dirty_list,
&trans->transaction->switch_commits);
btrfs_set_root_node(&root->root_item, root->node);
ret2 = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key,
&root->root_item);
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
btrfs_qgroup_free_meta_all_pertrans(root);
}
ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret)
return ret;
spin_lock(&fs_info->fs_roots_lock);
btrfs_qgroup_free_meta_all_pertrans(root);
}
spin_unlock(&fs_info->fs_roots_lock);
spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment