Commit 5c9d028b authored by Liu Bo's avatar Liu Bo Committed by David Sterba

Btrfs: delayed-refs: use rb_first_cached for href_root

rb_first_cached() trades an extra pointer "leftmost" for doing the same
job as rb_first() but in O(1).

Functions manipulating href_root need to get the first entry, this
converts href_root to use rb_first_cached().

This patch is first in the sequenct of similar updates to other rbtrees
and this is analysis of the expected behaviour and improvements.

There's a common pattern:

while (node = rb_first) {
        entry = rb_entry(node)
        next = rb_next(node)
        rb_erase(node)
        cleanup(entry)
}

rb_first needs to traverse the tree up to logN depth, rb_erase can
completely reshuffle the tree. With the caching we'll skip the traversal
in rb_first.  That's a cached memory access vs looped pointer
dereference trade-off that IMHO has a clear winner.

Measurements show there's not much difference in a sample tree with
10000 nodes: 4.5s / rb_first and 4.8s / rb_first_cached. Real effects of
caching and pointer chasing are unpredictable though.

Further optimzations can be done to avoid the expensive rb_erase step.
In some cases it's ok to process the nodes in any order, so the tree can
be traversed in post-order, not rebalancing the children nodes and just
calling free. Care must be taken regarding the next node.
Tested-by: default avatarHolger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: default avatarLiu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
[ update changelog from mail discussions ]
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 3aa7c7a3
...@@ -101,14 +101,15 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1, ...@@ -101,14 +101,15 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
} }
/* insert a new ref to head ref rbtree */ /* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
struct rb_node *node) struct rb_node *node)
{ {
struct rb_node **p = &root->rb_node; struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent_node = NULL; struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_head *entry; struct btrfs_delayed_ref_head *entry;
struct btrfs_delayed_ref_head *ins; struct btrfs_delayed_ref_head *ins;
u64 bytenr; u64 bytenr;
bool leftmost = true;
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
bytenr = ins->bytenr; bytenr = ins->bytenr;
...@@ -117,16 +118,18 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, ...@@ -117,16 +118,18 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
href_node); href_node);
if (bytenr < entry->bytenr) if (bytenr < entry->bytenr) {
p = &(*p)->rb_left; p = &(*p)->rb_left;
else if (bytenr > entry->bytenr) } else if (bytenr > entry->bytenr) {
p = &(*p)->rb_right; p = &(*p)->rb_right;
else leftmost = false;
} else {
return entry; return entry;
} }
}
rb_link_node(node, parent_node, p); rb_link_node(node, parent_node, p);
rb_insert_color(node, root); rb_insert_color_cached(node, root, leftmost);
return NULL; return NULL;
} }
...@@ -164,10 +167,11 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root, ...@@ -164,10 +167,11 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
* If return_bigger is given, the next bigger entry is returned if no exact * If return_bigger is given, the next bigger entry is returned if no exact
* match is found. * match is found.
*/ */
static struct btrfs_delayed_ref_head * static struct btrfs_delayed_ref_head* find_ref_head(
find_ref_head(struct rb_root *root, u64 bytenr, struct btrfs_delayed_ref_root *dr, u64 bytenr,
int return_bigger) int return_bigger)
{ {
struct rb_root *root = &dr->href_root.rb_root;
struct rb_node *n; struct rb_node *n;
struct btrfs_delayed_ref_head *entry; struct btrfs_delayed_ref_head *entry;
...@@ -187,7 +191,7 @@ find_ref_head(struct rb_root *root, u64 bytenr, ...@@ -187,7 +191,7 @@ find_ref_head(struct rb_root *root, u64 bytenr,
if (bytenr > entry->bytenr) { if (bytenr > entry->bytenr) {
n = rb_next(&entry->href_node); n = rb_next(&entry->href_node);
if (!n) if (!n)
n = rb_first(root); n = rb_first_cached(&dr->href_root);
entry = rb_entry(n, struct btrfs_delayed_ref_head, entry = rb_entry(n, struct btrfs_delayed_ref_head,
href_node); href_node);
return entry; return entry;
...@@ -357,12 +361,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) ...@@ -357,12 +361,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
again: again:
start = delayed_refs->run_delayed_start; start = delayed_refs->run_delayed_start;
head = find_ref_head(&delayed_refs->href_root, start, 1); head = find_ref_head(delayed_refs, start, 1);
if (!head && !loop) { if (!head && !loop) {
delayed_refs->run_delayed_start = 0; delayed_refs->run_delayed_start = 0;
start = 0; start = 0;
loop = true; loop = true;
head = find_ref_head(&delayed_refs->href_root, start, 1); head = find_ref_head(delayed_refs, start, 1);
if (!head) if (!head)
return NULL; return NULL;
} else if (!head && loop) { } else if (!head && loop) {
...@@ -903,7 +907,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, ...@@ -903,7 +907,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head * struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
{ {
return find_ref_head(&delayed_refs->href_root, bytenr, 0); return find_ref_head(delayed_refs, bytenr, 0);
} }
void __cold btrfs_delayed_ref_exit(void) void __cold btrfs_delayed_ref_exit(void)
......
...@@ -148,7 +148,7 @@ struct btrfs_delayed_data_ref { ...@@ -148,7 +148,7 @@ struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_root { struct btrfs_delayed_ref_root {
/* head ref rbtree */ /* head ref rbtree */
struct rb_root href_root; struct rb_root_cached href_root;
/* dirty extent records */ /* dirty extent records */
struct rb_root dirty_extent_root; struct rb_root dirty_extent_root;
......
...@@ -4203,7 +4203,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, ...@@ -4203,7 +4203,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret; return ret;
} }
while ((node = rb_first(&delayed_refs->href_root)) != NULL) { while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_head *head;
struct rb_node *n; struct rb_node *n;
bool pin_bytes = false; bool pin_bytes = false;
...@@ -4239,7 +4239,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, ...@@ -4239,7 +4239,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (head->processing == 0) if (head->processing == 0)
delayed_refs->num_heads_ready--; delayed_refs->num_heads_ready--;
atomic_dec(&delayed_refs->num_entries); atomic_dec(&delayed_refs->num_entries);
rb_erase(&head->href_node, &delayed_refs->href_root); rb_erase_cached(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node); RB_CLEAR_NODE(&head->href_node);
spin_unlock(&head->lock); spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
......
...@@ -2454,7 +2454,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, ...@@ -2454,7 +2454,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
return 1; return 1;
} }
delayed_refs->num_heads--; delayed_refs->num_heads--;
rb_erase(&head->href_node, &delayed_refs->href_root); rb_erase_cached(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node); RB_CLEAR_NODE(&head->href_node);
spin_unlock(&head->lock); spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
...@@ -2940,7 +2940,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -2940,7 +2940,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
btrfs_create_pending_block_groups(trans); btrfs_create_pending_block_groups(trans);
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
node = rb_first(&delayed_refs->href_root); node = rb_first_cached(&delayed_refs->href_root);
if (!node) { if (!node) {
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
goto out; goto out;
...@@ -6929,7 +6929,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, ...@@ -6929,7 +6929,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
* at this point we have a head with no other entries. Go * at this point we have a head with no other entries. Go
* ahead and process it. * ahead and process it.
*/ */
rb_erase(&head->href_node, &delayed_refs->href_root); rb_erase_cached(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node); RB_CLEAR_NODE(&head->href_node);
atomic_dec(&delayed_refs->num_entries); atomic_dec(&delayed_refs->num_entries);
......
...@@ -44,7 +44,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) ...@@ -44,7 +44,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
WARN_ON(refcount_read(&transaction->use_count) == 0); WARN_ON(refcount_read(&transaction->use_count) == 0);
if (refcount_dec_and_test(&transaction->use_count)) { if (refcount_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list)); BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.href_root.rb_root));
if (transaction->delayed_refs.pending_csums) if (transaction->delayed_refs.pending_csums)
btrfs_err(transaction->fs_info, btrfs_err(transaction->fs_info,
"pending csums is %llu", "pending csums is %llu",
...@@ -245,7 +246,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, ...@@ -245,7 +246,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
cur_trans->delayed_refs.href_root = RB_ROOT; cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
atomic_set(&cur_trans->delayed_refs.num_entries, 0); atomic_set(&cur_trans->delayed_refs.num_entries, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment