Commit c4537686 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Pipeline binary searches and linear searches

This makes prefetching for the linear search at the end of the lookup
much more effective, and is a couple percent speedup.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent fab4f8c6
...@@ -1326,6 +1326,25 @@ static int bset_search_tree_slowpath(const struct btree *b, ...@@ -1326,6 +1326,25 @@ static int bset_search_tree_slowpath(const struct btree *b,
packed_search, search) < 0; packed_search, search) < 0;
} }
static inline void prefetch_four_cachelines(void *p)
{
#ifdef CONFIG_X86_64
asm(".intel_syntax noprefix;"
"prefetcht0 [%0 - 127 + 64 * 0];"
"prefetcht0 [%0 - 127 + 64 * 1];"
"prefetcht0 [%0 - 127 + 64 * 2];"
"prefetcht0 [%0 - 127 + 64 * 3];"
".att_syntax prefix;"
:
: "r" (p + 127));
#else
prefetch(p + L1_CACHE_BYTES * 0);
prefetch(p + L1_CACHE_BYTES * 1);
prefetch(p + L1_CACHE_BYTES * 2);
prefetch(p + L1_CACHE_BYTES * 3);
#endif
}
__flatten __flatten
static struct bkey_packed *bset_search_tree(const struct btree *b, static struct bkey_packed *bset_search_tree(const struct btree *b,
struct bset_tree *t, struct bset_tree *t,
...@@ -1333,34 +1352,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, ...@@ -1333,34 +1352,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
const struct bkey_packed *packed_search) const struct bkey_packed *packed_search)
{ {
struct ro_aux_tree *base = ro_aux_tree_base(b, t); struct ro_aux_tree *base = ro_aux_tree_base(b, t);
struct bkey_float *f = bkey_float_get(base, 1); struct bkey_float *f;
void *p;
unsigned inorder, n = 1; unsigned inorder, n = 1;
while (1) { do {
if (likely(n << 4 < t->size)) { if (likely(n << 4 < t->size))
p = bkey_float_get(base, n << 4); prefetch(bkey_float_get(base, n << 4));
prefetch(p);
} else if (n << 3 < t->size) {
inorder = __eytzinger1_to_inorder(n, t->size, t->extra);
p = bset_cacheline(b, t, inorder);
#ifdef CONFIG_X86_64
asm(".intel_syntax noprefix;"
"prefetcht0 [%0 - 127 + 64 * 0];"
"prefetcht0 [%0 - 127 + 64 * 1];"
"prefetcht0 [%0 - 127 + 64 * 2];"
"prefetcht0 [%0 - 127 + 64 * 3];"
".att_syntax prefix;"
:
: "r" (p + 127));
#else
prefetch(p + L1_CACHE_BYTES * 0);
prefetch(p + L1_CACHE_BYTES * 1);
prefetch(p + L1_CACHE_BYTES * 2);
prefetch(p + L1_CACHE_BYTES * 3);
#endif
} else if (n >= t->size)
break;
f = bkey_float_get(base, n); f = bkey_float_get(base, n);
...@@ -1391,17 +1388,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, ...@@ -1391,17 +1388,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
} }
} }
/* static __always_inline __flatten
* Returns the first key greater than or equal to @search struct bkey_packed *__bch2_bset_search(struct btree *b,
*/
__always_inline __flatten
static struct bkey_packed *bch2_bset_search(struct btree *b,
struct bset_tree *t, struct bset_tree *t,
struct bpos *search, struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search) const struct bkey_packed *lossy_packed_search)
{ {
struct bkey_packed *m;
/* /*
* First, we search for a cacheline, then lastly we do a linear search * First, we search for a cacheline, then lastly we do a linear search
...@@ -1420,11 +1412,9 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, ...@@ -1420,11 +1412,9 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
switch (bset_aux_tree_type(t)) { switch (bset_aux_tree_type(t)) {
case BSET_NO_AUX_TREE: case BSET_NO_AUX_TREE:
m = btree_bkey_first(b, t); return btree_bkey_first(b, t);
break;
case BSET_RW_AUX_TREE: case BSET_RW_AUX_TREE:
m = bset_search_write_set(b, t, search, lossy_packed_search); return bset_search_write_set(b, t, search, lossy_packed_search);
break;
case BSET_RO_AUX_TREE: case BSET_RO_AUX_TREE:
/* /*
* Each node in the auxiliary search tree covers a certain range * Each node in the auxiliary search tree covers a certain range
...@@ -1436,10 +1426,20 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, ...@@ -1436,10 +1426,20 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
if (bkey_cmp(*search, t->max_key) > 0) if (bkey_cmp(*search, t->max_key) > 0)
return btree_bkey_last(b, t); return btree_bkey_last(b, t);
m = bset_search_tree(b, t, search, lossy_packed_search); return bset_search_tree(b, t, search, lossy_packed_search);
break; default:
unreachable();
} }
}
static __always_inline __flatten
struct bkey_packed *bch2_bset_search_linear(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search,
struct bkey_packed *m)
{
if (lossy_packed_search) if (lossy_packed_search)
while (m != btree_bkey_last(b, t) && while (m != btree_bkey_last(b, t) &&
bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search, bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
...@@ -1462,6 +1462,23 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, ...@@ -1462,6 +1462,23 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
return m; return m;
} }
/*
* Returns the first key greater than or equal to @search
*/
static __always_inline __flatten
struct bkey_packed *bch2_bset_search(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search)
{
struct bkey_packed *m = __bch2_bset_search(b, t, search,
lossy_packed_search);
return bch2_bset_search_linear(b, t, search,
packed_search, lossy_packed_search, m);
}
/* Btree node iterator */ /* Btree node iterator */
static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
...@@ -1552,9 +1569,10 @@ __flatten ...@@ -1552,9 +1569,10 @@ __flatten
void bch2_btree_node_iter_init(struct btree_node_iter *iter, void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct btree *b, struct bpos *search) struct btree *b, struct bpos *search)
{ {
struct bset_tree *t;
struct bkey_packed p, *packed_search = NULL; struct bkey_packed p, *packed_search = NULL;
struct btree_node_iter_set *pos = iter->data; struct btree_node_iter_set *pos = iter->data;
struct bkey_packed *k[MAX_BSETS];
unsigned i;
EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0); EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
bset_aux_tree_verify(b); bset_aux_tree_verify(b);
...@@ -1573,14 +1591,20 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, ...@@ -1573,14 +1591,20 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
return; return;
} }
for_each_bset(b, t) { for (i = 0; i < b->nsets; i++) {
struct bkey_packed *k = bch2_bset_search(b, t, search, k[i] = __bch2_bset_search(b, b->set + i, search, &p);
packed_search, &p); prefetch_four_cachelines(k[i]);
}
for (i = 0; i < b->nsets; i++) {
struct bset_tree *t = b->set + i;
struct bkey_packed *end = btree_bkey_last(b, t); struct bkey_packed *end = btree_bkey_last(b, t);
if (k != end) k[i] = bch2_bset_search_linear(b, t, search,
packed_search, &p, k[i]);
if (k[i] != end)
*pos++ = (struct btree_node_iter_set) { *pos++ = (struct btree_node_iter_set) {
__btree_node_key_to_offset(b, k), __btree_node_key_to_offset(b, k[i]),
__btree_node_key_to_offset(b, end) __btree_node_key_to_offset(b, end)
}; };
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment