Commit ad44bdc3 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: bkey noops

For upcoming inline data extents, we're going to need to be able to
shorten the value of existing bkeys in the btree - and to make that work
we're going to be able to need to pad out the space the value previously
took up with something.

This patch changes the various code that iterates over bkeys to handle
k->u64s == 0 as meaning "skip the next 8 bytes".
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent aef90ce0
...@@ -41,6 +41,16 @@ struct bkey_s { ...@@ -41,6 +41,16 @@ struct bkey_s {
#define bkey_next(_k) vstruct_next(_k) #define bkey_next(_k) vstruct_next(_k)
static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
struct bkey_packed *end)
{
k = bkey_next(k);
while (k != end && !k->u64s)
k = (void *) ((u64 *) k + 1);
return k;
}
#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s) #define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
static inline size_t bkey_val_bytes(const struct bkey *k) static inline size_t bkey_val_bytes(const struct bkey *k)
......
...@@ -75,6 +75,10 @@ static void sort_key_next(struct btree_node_iter_large *iter, ...@@ -75,6 +75,10 @@ static void sort_key_next(struct btree_node_iter_large *iter,
{ {
i->k += __btree_node_offset_to_key(b, i->k)->u64s; i->k += __btree_node_offset_to_key(b, i->k)->u64s;
while (i->k != i->end &&
!__btree_node_offset_to_key(b, i->k)->u64s)
i->k++;
if (i->k == i->end) if (i->k == i->end)
*i = iter->data[--iter->used]; *i = iter->data[--iter->used];
} }
...@@ -119,7 +123,7 @@ static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) ...@@ -119,7 +123,7 @@ static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
{ {
iter->data->k = bkey_next(iter->data->k); iter->data->k = bkey_next_skip_noops(iter->data->k, iter->data->end);
BUG_ON(iter->data->k > iter->data->end); BUG_ON(iter->data->k > iter->data->end);
......
...@@ -64,7 +64,7 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set) ...@@ -64,7 +64,7 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
for (_k = i->start, k = bkey_unpack_key(b, _k); for (_k = i->start, k = bkey_unpack_key(b, _k);
_k < vstruct_last(i); _k < vstruct_last(i);
_k = _n, k = n) { _k = _n, k = n) {
_n = bkey_next(_k); _n = bkey_next_skip_noops(_k, vstruct_last(i));
bch2_bkey_to_text(&PBUF(buf), &k); bch2_bkey_to_text(&PBUF(buf), &k);
printk(KERN_ERR "block %u key %5u: %s\n", set, printk(KERN_ERR "block %u key %5u: %s\n", set,
...@@ -132,9 +132,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b) ...@@ -132,9 +132,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
struct btree_nr_keys nr = { 0 }; struct btree_nr_keys nr = { 0 };
for_each_bset(b, t) for_each_bset(b, t)
for (k = btree_bkey_first(b, t); bset_tree_for_each_key(b, t, k)
k != btree_bkey_last(b, t);
k = bkey_next(k))
if (!bkey_whiteout(k)) if (!bkey_whiteout(k))
btree_keys_account_key_add(&nr, t - b->set, k); btree_keys_account_key_add(&nr, t - b->set, k);
...@@ -595,7 +593,7 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b, ...@@ -595,7 +593,7 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b,
rw_aux_tree(b, t)[j - 1].offset); rw_aux_tree(b, t)[j - 1].offset);
} }
k = bkey_next(k); k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
BUG_ON(k >= btree_bkey_last(b, t)); BUG_ON(k >= btree_bkey_last(b, t));
} }
} }
...@@ -786,9 +784,7 @@ static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t) ...@@ -786,9 +784,7 @@ static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
rw_aux_tree(b, t)[0].offset = rw_aux_tree(b, t)[0].offset =
__btree_node_key_to_offset(b, btree_bkey_first(b, t)); __btree_node_key_to_offset(b, btree_bkey_first(b, t));
for (k = btree_bkey_first(b, t); bset_tree_for_each_key(b, t, k) {
k != btree_bkey_last(b, t);
k = bkey_next(k)) {
if (t->size == bset_rw_tree_capacity(b, t)) if (t->size == bset_rw_tree_capacity(b, t))
break; break;
...@@ -821,7 +817,7 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t) ...@@ -821,7 +817,7 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
/* First we figure out where the first key in each cacheline is */ /* First we figure out where the first key in each cacheline is */
eytzinger1_for_each(j, t->size) { eytzinger1_for_each(j, t->size) {
while (bkey_to_cacheline(b, t, k) < cacheline) while (bkey_to_cacheline(b, t, k) < cacheline)
prev = k, k = bkey_next(k); prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
if (k >= btree_bkey_last(b, t)) { if (k >= btree_bkey_last(b, t)) {
/* XXX: this path sucks */ /* XXX: this path sucks */
...@@ -837,10 +833,10 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t) ...@@ -837,10 +833,10 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
EBUG_ON(tree_to_bkey(b, t, j) != k); EBUG_ON(tree_to_bkey(b, t, j) != k);
} }
while (bkey_next(k) != btree_bkey_last(b, t)) while (k != btree_bkey_last(b, t))
k = bkey_next(k); prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
t->max_key = bkey_unpack_pos(b, k); t->max_key = bkey_unpack_pos(b, prev);
/* Then we build the tree */ /* Then we build the tree */
eytzinger1_for_each(j, t->size) eytzinger1_for_each(j, t->size)
...@@ -966,7 +962,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, ...@@ -966,7 +962,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
struct bkey_packed *p, *i, *ret = NULL, *orig_k = k; struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
while ((p = __bkey_prev(b, t, k)) && !ret) { while ((p = __bkey_prev(b, t, k)) && !ret) {
for (i = p; i != k; i = bkey_next(i)) for (i = p; i != k; i = bkey_next_skip_noops(i, k))
if (i->type >= min_key_type) if (i->type >= min_key_type)
ret = i; ret = i;
...@@ -976,9 +972,11 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, ...@@ -976,9 +972,11 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
if (btree_keys_expensive_checks(b)) { if (btree_keys_expensive_checks(b)) {
BUG_ON(ret >= orig_k); BUG_ON(ret >= orig_k);
for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t); for (i = ret
? bkey_next_skip_noops(ret, orig_k)
: btree_bkey_first(b, t);
i != orig_k; i != orig_k;
i = bkey_next(i)) i = bkey_next_skip_noops(i, orig_k))
BUG_ON(i->type >= min_key_type); BUG_ON(i->type >= min_key_type);
} }
...@@ -1013,7 +1011,7 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b, ...@@ -1013,7 +1011,7 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
/* signal to make_bfloat() that they're uninitialized: */ /* signal to make_bfloat() that they're uninitialized: */
min_key.u64s = max_key.u64s = 0; min_key.u64s = max_key.u64s = 0;
if (bkey_next(k) == btree_bkey_last(b, t)) { if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
t->max_key = bkey_unpack_pos(b, k); t->max_key = bkey_unpack_pos(b, k);
for (j = 1; j < t->size; j = j * 2 + 1) for (j = 1; j < t->size; j = j * 2 + 1)
...@@ -1137,7 +1135,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b, ...@@ -1137,7 +1135,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
struct bkey_packed *k = start; struct bkey_packed *k = start;
while (1) { while (1) {
k = bkey_next(k); k = bkey_next_skip_noops(k, end);
if (k == end) if (k == end)
break; break;
...@@ -1386,12 +1384,12 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b, ...@@ -1386,12 +1384,12 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
while (m != btree_bkey_last(b, t) && while (m != btree_bkey_last(b, t) &&
bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search, bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
m) > 0) m) > 0)
m = bkey_next(m); m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
if (!packed_search) if (!packed_search)
while (m != btree_bkey_last(b, t) && while (m != btree_bkey_last(b, t) &&
bkey_iter_pos_cmp(b, search, m) > 0) bkey_iter_pos_cmp(b, search, m) > 0)
m = bkey_next(m); m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
if (btree_keys_expensive_checks(b)) { if (btree_keys_expensive_checks(b)) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
...@@ -1625,6 +1623,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, ...@@ -1625,6 +1623,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
EBUG_ON(iter->data->k > iter->data->end); EBUG_ON(iter->data->k > iter->data->end);
while (!__btree_node_iter_set_end(iter, 0) &&
!__bch2_btree_node_iter_peek_all(iter, b)->u64s)
iter->data->k++;
if (unlikely(__btree_node_iter_set_end(iter, 0))) { if (unlikely(__btree_node_iter_set_end(iter, 0))) {
bch2_btree_node_iter_set_drop(iter, iter->data); bch2_btree_node_iter_set_drop(iter, iter->data);
return; return;
......
...@@ -287,6 +287,11 @@ static inline struct bkey_s __bkey_disassemble(struct btree *b, ...@@ -287,6 +287,11 @@ static inline struct bkey_s __bkey_disassemble(struct btree *b,
#define for_each_bset(_b, _t) \ #define for_each_bset(_b, _t) \
for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++) for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
#define bset_tree_for_each_key(_b, _t, _k) \
for (_k = btree_bkey_first(_b, _t); \
_k != btree_bkey_last(_b, _t); \
_k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
static inline bool bset_has_ro_aux_tree(struct bset_tree *t) static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
{ {
return bset_aux_tree_type(t) == BSET_RO_AUX_TREE; return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
......
...@@ -924,7 +924,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, ...@@ -924,7 +924,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
k < vstruct_last(s2) && k < vstruct_last(s2) &&
vstruct_blocks_plus(n1->data, c->block_bits, vstruct_blocks_plus(n1->data, c->block_bits,
u64s + k->u64s) <= blocks; u64s + k->u64s) <= blocks;
k = bkey_next(k)) { k = bkey_next_skip_noops(k, vstruct_last(s2))) {
last = k; last = k;
u64s += k->u64s; u64s += k->u64s;
} }
......
...@@ -25,34 +25,33 @@ static void verify_no_dups(struct btree *b, ...@@ -25,34 +25,33 @@ static void verify_no_dups(struct btree *b,
struct bkey_packed *end) struct bkey_packed *end)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
struct bkey_packed *k; struct bkey_packed *k, *p;
if (start == end)
return;
for (k = start; k != end && bkey_next(k) != end; k = bkey_next(k)) { for (p = start, k = bkey_next_skip_noops(start, end);
struct bkey l = bkey_unpack_key(b, k); k != end;
struct bkey r = bkey_unpack_key(b, bkey_next(k)); p = k, k = bkey_next_skip_noops(k, end)) {
struct bkey l = bkey_unpack_key(b, p);
struct bkey r = bkey_unpack_key(b, k);
BUG_ON(btree_node_is_extents(b) BUG_ON(btree_node_is_extents(b)
? bkey_cmp(l.p, bkey_start_pos(&r)) > 0 ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
: bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
//BUG_ON(bkey_cmp_packed(&b->format, k, bkey_next(k)) >= 0); //BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
} }
#endif #endif
} }
static void clear_needs_whiteout(struct bset *i) static void set_needs_whiteout(struct bset *i, int v)
{
struct bkey_packed *k;
for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = false;
}
static void set_needs_whiteout(struct bset *i)
{ {
struct bkey_packed *k; struct bkey_packed *k;
for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) for (k = i->start;
k->needs_whiteout = true; k != vstruct_last(i);
k = bkey_next_skip_noops(k, vstruct_last(i)))
k->needs_whiteout = v;
} }
static void btree_bounce_free(struct bch_fs *c, unsigned order, static void btree_bounce_free(struct bch_fs *c, unsigned order,
...@@ -167,7 +166,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, ...@@ -167,7 +166,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
out = i->start; out = i->start;
for (k = start; k != end; k = n) { for (k = start; k != end; k = n) {
n = bkey_next(k); n = bkey_next_skip_noops(k, end);
if (bkey_deleted(k) && btree_node_is_extents(b)) if (bkey_deleted(k) && btree_node_is_extents(b))
continue; continue;
...@@ -260,7 +259,7 @@ static bool bch2_drop_whiteouts(struct btree *b) ...@@ -260,7 +259,7 @@ static bool bch2_drop_whiteouts(struct btree *b)
out = i->start; out = i->start;
for (k = start; k != end; k = n) { for (k = start; k != end; k = n) {
n = bkey_next(k); n = bkey_next_skip_noops(k, end);
if (!bkey_whiteout(k)) { if (!bkey_whiteout(k)) {
bkey_copy(out, k); bkey_copy(out, k);
...@@ -679,14 +678,6 @@ static int validate_bset(struct bch_fs *c, struct btree *b, ...@@ -679,14 +678,6 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
struct bkey tmp; struct bkey tmp;
const char *invalid; const char *invalid;
if (btree_err_on(!k->u64s,
BTREE_ERR_FIXABLE, c, b, i,
"KEY_U64s 0: %zu bytes of metadata lost",
vstruct_end(i) - (void *) k)) {
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
if (btree_err_on(bkey_next(k) > vstruct_last(i), if (btree_err_on(bkey_next(k) > vstruct_last(i),
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, b, i,
"key extends past end of bset")) { "key extends past end of bset")) {
...@@ -755,7 +746,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b, ...@@ -755,7 +746,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
prev_pos = u.k->p; prev_pos = u.k->p;
prev = k; prev = k;
k = bkey_next(k); k = bkey_next_skip_noops(k, vstruct_last(i));
} }
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
...@@ -914,12 +905,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -914,12 +905,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
continue; continue;
} }
k = bkey_next(k); k = bkey_next_skip_noops(k, vstruct_last(i));
} }
bch2_bset_build_aux_tree(b, b->set, false); bch2_bset_build_aux_tree(b, b->set, false);
set_needs_whiteout(btree_bset_first(b)); set_needs_whiteout(btree_bset_first(b), true);
btree_node_reset_sib_u64s(b); btree_node_reset_sib_u64s(b);
out: out:
...@@ -1424,7 +1415,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -1424,7 +1415,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
: bch2_sort_keys(i->start, &sort_iter, false); : bch2_sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s); le16_add_cpu(&i->u64s, u64s);
clear_needs_whiteout(i); set_needs_whiteout(i, false);
/* do we have data to write? */ /* do we have data to write? */
if (b->written && !i->u64s) if (b->written && !i->u64s)
...@@ -1579,7 +1570,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) ...@@ -1579,7 +1570,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
} }
for_each_bset(b, t) for_each_bset(b, t)
set_needs_whiteout(bset(b, t)); set_needs_whiteout(bset(b, t), true);
bch2_btree_verify(c, b); bch2_btree_verify(c, b);
......
...@@ -79,9 +79,7 @@ void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) ...@@ -79,9 +79,7 @@ void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
bch2_bkey_format_add_pos(s, b->data->min_key); bch2_bkey_format_add_pos(s, b->data->min_key);
for_each_bset(b, t) for_each_bset(b, t)
for (k = btree_bkey_first(b, t); bset_tree_for_each_key(b, t, k)
k != btree_bkey_last(b, t);
k = bkey_next(k))
if (!bkey_whiteout(k)) { if (!bkey_whiteout(k)) {
uk = bkey_unpack_key(b, k); uk = bkey_unpack_key(b, k);
bch2_bkey_format_add_key(s, &uk); bch2_bkey_format_add_key(s, &uk);
...@@ -1240,7 +1238,9 @@ static struct btree *__btree_split_node(struct btree_update *as, ...@@ -1240,7 +1238,9 @@ static struct btree *__btree_split_node(struct btree_update *as,
*/ */
k = set1->start; k = set1->start;
while (1) { while (1) {
if (bkey_next(k) == vstruct_last(set1)) struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
if (n == vstruct_last(set1))
break; break;
if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5) if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
break; break;
...@@ -1251,7 +1251,7 @@ static struct btree *__btree_split_node(struct btree_update *as, ...@@ -1251,7 +1251,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
nr_unpacked++; nr_unpacked++;
prev = k; prev = k;
k = bkey_next(k); k = n;
} }
BUG_ON(!prev); BUG_ON(!prev);
...@@ -1315,7 +1315,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, ...@@ -1315,7 +1315,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
{ {
struct btree_node_iter node_iter; struct btree_node_iter node_iter;
struct bkey_i *k = bch2_keylist_front(keys); struct bkey_i *k = bch2_keylist_front(keys);
struct bkey_packed *p; struct bkey_packed *src, *dst, *n;
struct bset *i; struct bset *i;
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
...@@ -1340,16 +1340,18 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, ...@@ -1340,16 +1340,18 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
* for the pivot: * for the pivot:
*/ */
i = btree_bset_first(b); i = btree_bset_first(b);
p = i->start; src = dst = i->start;
while (p != vstruct_last(i)) while (src != vstruct_last(i)) {
if (bkey_deleted(p)) { n = bkey_next_skip_noops(src, vstruct_last(i));
le16_add_cpu(&i->u64s, -p->u64s); if (!bkey_deleted(src)) {
memmove_u64s_down(dst, src, src->u64s);
dst = bkey_next(dst);
}
src = n;
}
i->u64s = cpu_to_le16((u64 *) dst - i->_data);
set_btree_bset_end(b, b->set); set_btree_bset_end(b, b->set);
memmove_u64s_down(p, bkey_next(p),
(u64 *) vstruct_last(i) -
(u64 *) p);
} else
p = bkey_next(p);
BUG_ON(b->nsets != 1 || BUG_ON(b->nsets != 1 ||
b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s)); b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment