Commit e62d65f2 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: trans_commit() path can now insert to interior nodes

This will be needed for the upcoming patches to journal updates to
interior btree nodes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 47143a75
...@@ -134,7 +134,7 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, ...@@ -134,7 +134,7 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
{ {
if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0) if (bkey_cmp(k.k->p, b->data->min_key) < 0)
return "key before start of btree node"; return "key before start of btree node";
if (bkey_cmp(k.k->p, b->data->max_key) > 0) if (bkey_cmp(k.k->p, b->data->max_key) > 0)
......
...@@ -589,6 +589,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) ...@@ -589,6 +589,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
struct btree_iter *iter, struct btree_iter *iter,
const struct bkey_i *k, const struct bkey_i *k,
enum btree_id btree_id,
unsigned level, unsigned level,
enum six_lock_type lock_type, enum six_lock_type lock_type,
bool sync) bool sync)
...@@ -601,7 +602,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, ...@@ -601,7 +602,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* Parent node must be locked, else we could read in a btree node that's * Parent node must be locked, else we could read in a btree node that's
* been freed: * been freed:
*/ */
if (!bch2_btree_node_relock(iter, level + 1)) if (iter && !bch2_btree_node_relock(iter, level + 1))
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
b = bch2_btree_node_mem_alloc(c); b = bch2_btree_node_mem_alloc(c);
...@@ -609,7 +610,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, ...@@ -609,7 +610,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return b; return b;
bkey_copy(&b->key, k); bkey_copy(&b->key, k);
if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) { if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */ /* raced with another fill: */
/* mark as unhashed... */ /* mark as unhashed... */
...@@ -629,7 +630,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, ...@@ -629,7 +630,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* *
* XXX: ideally should be dropping all btree node locks here * XXX: ideally should be dropping all btree node locks here
*/ */
if (btree_node_read_locked(iter, level + 1)) if (iter && btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1); btree_node_unlock(iter, level + 1);
bch2_btree_node_read(c, b, sync); bch2_btree_node_read(c, b, sync);
...@@ -677,7 +678,8 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, ...@@ -677,7 +678,8 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
* else we could read in a btree node from disk that's been * else we could read in a btree node from disk that's been
* freed: * freed:
*/ */
b = bch2_btree_node_fill(c, iter, k, level, lock_type, true); b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
level, lock_type, true);
/* We raced and found the btree node in the cache */ /* We raced and found the btree node in the cache */
if (!b) if (!b)
...@@ -763,6 +765,74 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, ...@@ -763,6 +765,74 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
return b; return b;
} }
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
const struct bkey_i *k,
enum btree_id btree_id,
unsigned level)
{
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
EBUG_ON(level >= BTREE_MAX_DEPTH);
b = btree_node_mem_ptr(k);
if (b)
goto lock_node;
retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
b = bch2_btree_node_fill(c, NULL, k, btree_id,
level, SIX_LOCK_read, true);
/* We raced and found the btree node in the cache */
if (!b)
goto retry;
if (IS_ERR(b))
return b;
} else {
lock_node:
six_lock_read(&b->c.lock, NULL, NULL);
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->c.btree_id != btree_id ||
b->c.level != level)) {
six_unlock_read(&b->c.lock);
goto retry;
}
}
/* XXX: waiting on IO with btree locks held: */
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
TASK_UNINTERRUPTIBLE);
prefetch(b->aux_data);
for_each_bset(b, t) {
void *p = (u64 *) b->aux_data + t->aux_data_offset;
prefetch(p + L1_CACHE_BYTES * 0);
prefetch(p + L1_CACHE_BYTES * 1);
prefetch(p + L1_CACHE_BYTES * 2);
}
/* avoid atomic set bit if it's not needed: */
if (!btree_node_accessed(b))
set_btree_node_accessed(b);
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
return ERR_PTR(-EIO);
}
EBUG_ON(b->c.btree_id != btree_id ||
BTREE_NODE_LEVEL(b->data) != level ||
bkey_cmp(b->data->max_key, k->k.p));
return b;
}
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_iter *iter, struct btree_iter *iter,
struct btree *b, struct btree *b,
...@@ -877,7 +947,8 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, ...@@ -877,7 +947,8 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
if (b) if (b)
return; return;
bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false); bch2_btree_node_fill(c, iter, k, iter->btree_id,
level, SIX_LOCK_read, false);
} }
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
......
...@@ -25,6 +25,9 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, ...@@ -25,6 +25,9 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned, const struct bkey_i *, unsigned,
enum six_lock_type); enum six_lock_type);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned);
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, enum btree_node_sibling); struct btree *, enum btree_node_sibling);
......
...@@ -186,16 +186,8 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, ...@@ -186,16 +186,8 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret; return ret;
} }
static bool pos_in_journal_keys(struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos)
{
struct journal_key *k = journal_key_search(journal_keys, id, pos);
return k && k->btree_id == id && !bkey_cmp(k->k->k.p, pos);
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
struct journal_keys *journal_keys, bool initial) bool initial)
{ {
struct btree_node_iter iter; struct btree_node_iter iter;
struct bkey unpacked; struct bkey unpacked;
...@@ -209,10 +201,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, ...@@ -209,10 +201,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
for_each_btree_node_key_unpack(b, k, &iter, for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) { &unpacked) {
if (!b->c.level && journal_keys &&
pos_in_journal_keys(journal_keys, b->c.btree_id, k.k->p))
continue;
bch2_bkey_debugcheck(c, b, k); bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, max_stale, initial); ret = bch2_gc_mark_key(c, k, max_stale, initial);
...@@ -224,7 +212,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, ...@@ -224,7 +212,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
} }
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
struct journal_keys *journal_keys,
bool initial, bool metadata_only) bool initial, bool metadata_only)
{ {
struct btree_trans trans; struct btree_trans trans;
...@@ -252,8 +239,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, ...@@ -252,8 +239,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
gc_pos_set(c, gc_pos_btree_node(b)); gc_pos_set(c, gc_pos_btree_node(b));
ret = btree_gc_mark_node(c, b, &max_stale, ret = btree_gc_mark_node(c, b, &max_stale, initial);
journal_keys, initial);
if (ret) if (ret)
break; break;
...@@ -289,6 +275,78 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, ...@@ -289,6 +275,78 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
return ret; return ret;
} }
static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys,
unsigned target_depth)
{
struct btree_and_journal_iter iter;
struct bkey_s_c k;
u8 max_stale = 0;
int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, &max_stale, true);
if (ret)
break;
if (b->c.level > target_depth) {
struct btree *child;
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
child = bch2_btree_node_get_noiter(c, &tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
bch2_gc_btree_init_recurse(c, child,
journal_keys, target_depth);
six_unlock_read(&child->c.lock);
}
bch2_btree_and_journal_iter_advance(&iter);
}
return ret;
}
static int bch2_gc_btree_init(struct bch_fs *c,
struct journal_keys *journal_keys,
enum btree_id btree_id,
bool metadata_only)
{
struct btree *b;
unsigned target_depth = metadata_only ? 1
: expensive_debug_checks(c) ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
u8 max_stale = 0;
int ret = 0;
b = c->btree_roots[btree_id].b;
if (btree_node_fake(b))
return 0;
six_lock_read(&b->c.lock, NULL, NULL);
if (b->c.level >= target_depth)
ret = bch2_gc_btree_init_recurse(c, b,
journal_keys, target_depth);
if (!ret)
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
&max_stale, true);
six_unlock_read(&b->c.lock);
return ret;
}
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
{ {
return (int) btree_id_to_gc_phase(l) - return (int) btree_id_to_gc_phase(l) -
...@@ -307,27 +365,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, ...@@ -307,27 +365,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
for (i = 0; i < BTREE_ID_NR; i++) { for (i = 0; i < BTREE_ID_NR; i++) {
enum btree_id id = ids[i]; enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id); int ret = initial
? bch2_gc_btree_init(c, journal_keys,
int ret = bch2_gc_btree(c, id, journal_keys, id, metadata_only)
initial, metadata_only); : bch2_gc_btree(c, id, initial, metadata_only);
if (ret) if (ret)
return ret; return ret;
if (journal_keys && !metadata_only &&
btree_node_type_needs_gc(type)) {
struct journal_key *j;
u8 max_stale;
int ret;
for_each_journal_key(*journal_keys, j)
if (j->btree_id == id) {
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(j->k),
&max_stale, initial);
if (ret)
return ret;
}
}
} }
return 0; return 0;
......
...@@ -264,6 +264,11 @@ static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter) ...@@ -264,6 +264,11 @@ static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter)
return iter->flags & BTREE_ITER_TYPE; return iter->flags & BTREE_ITER_TYPE;
} }
static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
{
return iter->l + iter->level;
}
struct btree_insert_entry { struct btree_insert_entry {
unsigned trigger_flags; unsigned trigger_flags;
unsigned trans_triggers_run:1; unsigned trans_triggers_run:1;
......
...@@ -1630,7 +1630,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, ...@@ -1630,7 +1630,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
unsigned flags) unsigned flags)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct btree *b = iter->l[0].b; struct btree *b = iter_l(iter)->b;
struct btree_update *as; struct btree_update *as;
struct closure cl; struct closure cl;
int ret = 0; int ret = 0;
......
...@@ -24,7 +24,7 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, ...@@ -24,7 +24,7 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i) struct btree_insert_entry *i)
{ {
return i != trans->updates2 && return i != trans->updates2 &&
i[0].iter->l[0].b == i[-1].iter->l[0].b; iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
} }
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
...@@ -172,13 +172,12 @@ static void bch2_btree_journal_key(struct btree_trans *trans, ...@@ -172,13 +172,12 @@ static void bch2_btree_journal_key(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct journal *j = &c->journal; struct journal *j = &c->journal;
struct btree *b = iter->l[0].b; struct btree *b = iter_l(iter)->b;
struct btree_write *w = btree_current_write(b); struct btree_write *w = btree_current_write(b);
u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
? trans->journal_res.seq ? trans->journal_res.seq
: j->replay_journal_seq; : j->replay_journal_seq;
EBUG_ON(iter->level || b->c.level);
EBUG_ON(trans->journal_res.ref != EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
...@@ -205,17 +204,15 @@ static void btree_insert_key_leaf(struct btree_trans *trans, ...@@ -205,17 +204,15 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
struct bkey_i *insert) struct bkey_i *insert)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree *b = iter->l[0].b; struct btree *b = iter_l(iter)->b;
struct bset_tree *t = bset_tree_last(b); struct bset_tree *t = bset_tree_last(b);
int old_u64s = bset_u64s(t); int old_u64s = bset_u64s(t);
int old_live_u64s = b->nr.live_u64s; int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added; int live_u64s_added, u64s_added;
EBUG_ON(iter->level);
insert->k.needs_whiteout = false; insert->k.needs_whiteout = false;
if (likely(bch2_btree_bset_insert_key(iter, b, &iter->l[0].iter, insert))) if (likely(bch2_btree_bset_insert_key(iter, b, &iter_l(iter)->iter, insert)))
bch2_btree_journal_key(trans, iter, insert); bch2_btree_journal_key(trans, iter, insert);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
...@@ -241,7 +238,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, ...@@ -241,7 +238,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
BUG_ON(iter->level);
BUG_ON(bkey_cmp(insert->k.p, iter->pos)); BUG_ON(bkey_cmp(insert->k.p, iter->pos));
BUG_ON(debug_check_bkeys(c) && BUG_ON(debug_check_bkeys(c) &&
bch2_bkey_invalid(c, bkey_i_to_s_c(insert), iter->btree_id)); bch2_bkey_invalid(c, bkey_i_to_s_c(insert), iter->btree_id));
...@@ -290,7 +286,7 @@ btree_key_can_insert(struct btree_trans *trans, ...@@ -290,7 +286,7 @@ btree_key_can_insert(struct btree_trans *trans,
unsigned *u64s) unsigned *u64s)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree *b = iter->l[0].b; struct btree *b = iter_l(iter)->b;
static enum btree_insert_ret ret; static enum btree_insert_ret ret;
if (unlikely(btree_node_fake(b))) if (unlikely(btree_node_fake(b)))
...@@ -345,7 +341,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) ...@@ -345,7 +341,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
struct btree_insert_entry *i; struct btree_insert_entry *i;
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) if (gc_visited(c, gc_pos_btree_node(iter_l(i->iter)->b)))
bch2_mark_update(trans, i->iter, i->k, NULL, bch2_mark_update(trans, i->iter, i->k, NULL,
i->trigger_flags|BTREE_TRIGGER_GC); i->trigger_flags|BTREE_TRIGGER_GC);
} }
...@@ -461,7 +457,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ...@@ -461,7 +457,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
int ret; int ret;
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
BUG_ON(!btree_node_intent_locked(i->iter, 0)); BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
ret = bch2_journal_preres_get(&trans->c->journal, ret = bch2_journal_preres_get(&trans->c->journal,
&trans->journal_preres, trans->journal_preres_u64s, &trans->journal_preres, trans->journal_preres_u64s,
...@@ -495,13 +491,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ...@@ -495,13 +491,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
if (!same_leaf_as_prev(trans, i)) if (!same_leaf_as_prev(trans, i))
bch2_btree_node_lock_for_insert(trans->c, bch2_btree_node_lock_for_insert(trans->c,
i->iter->l[0].b, i->iter); iter_l(i->iter)->b, i->iter);
ret = bch2_trans_commit_write_locked(trans, stopped_at); ret = bch2_trans_commit_write_locked(trans, stopped_at);
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
if (!same_leaf_as_prev(trans, i)) if (!same_leaf_as_prev(trans, i))
bch2_btree_node_unlock_write_inlined(i->iter->l[0].b, bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
i->iter); i->iter);
/* /*
......
...@@ -27,30 +27,78 @@ ...@@ -27,30 +27,78 @@
/* iterate over keys read from the journal: */ /* iterate over keys read from the journal: */
struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
enum btree_id id, unsigned level,
struct bpos pos)
{ {
while (iter->k) { size_t l = 0, r = journal_keys->nr, m;
if (iter->k->btree_id == iter->btree_id)
return bkey_i_to_s_c(iter->k->k);
iter->k++; while (l < r) {
if (iter->k == iter->keys->d + iter->keys->nr) m = l + ((r - l) >> 1);
iter->k = NULL; if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
cmp_int(level, journal_keys->d[m].level) ?:
bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
l = m + 1;
else
r = m;
} }
return bkey_s_c_null; BUG_ON(l < journal_keys->nr &&
(cmp_int(id, journal_keys->d[l].btree_id) ?:
cmp_int(level, journal_keys->d[l].level) ?:
bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
BUG_ON(l &&
(cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
cmp_int(level, journal_keys->d[l - 1].level) ?:
bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
return l < journal_keys->nr ? journal_keys->d + l : NULL;
} }
struct bkey_s_c bch2_journal_iter_next(struct journal_iter *iter) static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
{ {
if (!iter->k) if (iter->k &&
return bkey_s_c_null; iter->k < iter->keys->d + iter->keys->nr &&
iter->k->btree_id == iter->btree_id &&
iter->k->level == iter->level)
return iter->k->k;
iter->k++;
if (iter->k == iter->keys->d + iter->keys->nr)
iter->k = NULL; iter->k = NULL;
return NULL;
}
static void bch2_journal_iter_advance(struct journal_iter *iter)
{
if (iter->k)
iter->k++;
}
return bch2_journal_iter_peek(iter); static void bch2_journal_iter_init(struct journal_iter *iter,
struct journal_keys *journal_keys,
enum btree_id id, unsigned level,
struct bpos pos)
{
iter->btree_id = id;
iter->level = level;
iter->keys = journal_keys;
iter->k = journal_key_search(journal_keys, id, level, pos);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
{
return iter->btree
? bch2_btree_iter_peek(iter->btree)
: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
iter->b, &iter->unpacked);
}
static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
{
if (iter->btree)
bch2_btree_iter_next(iter->btree);
else
bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
} }
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
...@@ -59,10 +107,10 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) ...@@ -59,10 +107,10 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
case none: case none:
break; break;
case btree: case btree:
bch2_btree_iter_next(iter->btree); bch2_journal_iter_advance_btree(iter);
break; break;
case journal: case journal:
bch2_journal_iter_next(&iter->journal); bch2_journal_iter_advance(&iter->journal);
break; break;
} }
...@@ -74,14 +122,16 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * ...@@ -74,14 +122,16 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
struct bkey_s_c ret; struct bkey_s_c ret;
while (1) { while (1) {
struct bkey_s_c btree_k = bch2_btree_iter_peek(iter->btree); struct bkey_s_c btree_k =
struct bkey_s_c journal_k = bch2_journal_iter_peek(&iter->journal); bch2_journal_iter_peek_btree(iter);
struct bkey_s_c journal_k =
bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
if (btree_k.k && journal_k.k) { if (btree_k.k && journal_k.k) {
int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p); int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
if (!cmp) if (!cmp)
bch2_btree_iter_next(iter->btree); bch2_journal_iter_advance_btree(iter);
iter->last = cmp < 0 ? btree : journal; iter->last = cmp < 0 ? btree : journal;
} else if (btree_k.k) { } else if (btree_k.k) {
...@@ -94,6 +144,14 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * ...@@ -94,6 +144,14 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
} }
ret = iter->last == journal ? journal_k : btree_k; ret = iter->last == journal ? journal_k : btree_k;
if (iter->b &&
bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
iter->journal.k = NULL;
iter->last = none;
return bkey_s_c_null;
}
if (!bkey_deleted(ret.k)) if (!bkey_deleted(ret.k))
break; break;
...@@ -110,41 +168,32 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter * ...@@ -110,41 +168,32 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *
return bch2_btree_and_journal_iter_peek(iter); return bch2_btree_and_journal_iter_peek(iter);
} }
struct journal_key *journal_key_search(struct journal_keys *journal_keys, void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
struct btree_trans *trans,
struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos) enum btree_id id, struct bpos pos)
{ {
size_t l = 0, r = journal_keys->nr, m; memset(iter, 0, sizeof(*iter));
while (l < r) { iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
m = l + ((r - l) >> 1); bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
l = m + 1;
else
r = m;
}
BUG_ON(l < journal_keys->nr &&
(cmp_int(id, journal_keys->d[l].btree_id) ?:
bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
BUG_ON(l &&
(cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
return l < journal_keys->nr ? journal_keys->d + l : NULL;
} }
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter, void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
struct btree_trans *trans,
struct journal_keys *journal_keys, struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos) struct btree *b)
{ {
iter->journal.keys = journal_keys; struct bpos start = b->data->min_key;
iter->journal.k = journal_key_search(journal_keys, id, pos);
iter->journal.btree_id = id;
iter->btree = bch2_trans_get_iter(trans, id, pos, 0); if (btree_node_type_is_extents(b->c.btree_id))
start = bkey_successor(start);
memset(iter, 0, sizeof(*iter));
iter->b = b;
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
bch2_journal_iter_init(&iter->journal, journal_keys,
b->c.btree_id, b->c.level, start);
} }
/* sort and dedup all keys in the journal: */ /* sort and dedup all keys in the journal: */
...@@ -170,6 +219,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) ...@@ -170,6 +219,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
const struct journal_key *r = _r; const struct journal_key *r = _r;
return cmp_int(l->btree_id, r->btree_id) ?: return cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p) ?: bkey_cmp(l->k->k.p, r->k->k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset); cmp_int(l->journal_offset, r->journal_offset);
...@@ -182,6 +232,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) ...@@ -182,6 +232,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq, r->journal_seq) ?: return cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->btree_id, r->btree_id) ?: cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p); bkey_cmp(l->k->k.p, r->k->k.p);
} }
...@@ -218,6 +269,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) ...@@ -218,6 +269,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
for_each_jset_key(k, _n, entry, &p->j) for_each_jset_key(k, _n, entry, &p->j)
keys.d[keys.nr++] = (struct journal_key) { keys.d[keys.nr++] = (struct journal_key) {
.btree_id = entry->btree_id, .btree_id = entry->btree_id,
.level = entry->level,
.k = k, .k = k,
.journal_seq = le64_to_cpu(p->j.seq) - .journal_seq = le64_to_cpu(p->j.seq) -
keys.journal_seq_base, keys.journal_seq_base,
...@@ -230,6 +282,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) ...@@ -230,6 +282,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
while (src < keys.d + keys.nr) { while (src < keys.d + keys.nr) {
while (src + 1 < keys.d + keys.nr && while (src + 1 < keys.d + keys.nr &&
src[0].btree_id == src[1].btree_id && src[0].btree_id == src[1].btree_id &&
src[0].level == src[1].level &&
!bkey_cmp(src[0].k->k.p, src[1].k->k.p)) !bkey_cmp(src[0].k->k.p, src[1].k->k.p))
src++; src++;
...@@ -864,7 +917,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -864,7 +917,7 @@ int bch2_fs_recovery(struct bch_fs *c)
*/ */
bch_info(c, "starting metadata mark and sweep"); bch_info(c, "starting metadata mark and sweep");
err = "error in mark and sweep"; err = "error in mark and sweep";
ret = bch2_gc(c, NULL, true, true); ret = bch2_gc(c, &journal_keys, true, true);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "mark and sweep done"); bch_verbose(c, "mark and sweep done");
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
struct journal_keys { struct journal_keys {
struct journal_key { struct journal_key {
enum btree_id btree_id:8; enum btree_id btree_id:8;
unsigned level:8;
struct bkey_i *k; struct bkey_i *k;
u32 journal_seq; u32 journal_seq;
u32 journal_offset; u32 journal_offset;
...@@ -17,15 +18,23 @@ struct journal_keys { ...@@ -17,15 +18,23 @@ struct journal_keys {
for (i = (keys).d; i < (keys).d + (keys).nr; (i)++) for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
struct journal_iter { struct journal_iter {
enum btree_id btree_id;
unsigned level;
struct journal_keys *keys; struct journal_keys *keys;
struct journal_key *k; struct journal_key *k;
enum btree_id btree_id;
}; };
struct btree_and_journal_iter { /*
enum btree_id btree_id; * Iterate over keys in the btree, with keys from the journal overlaid on top:
*/
struct btree_and_journal_iter {
struct btree_iter *btree; struct btree_iter *btree;
struct btree *b;
struct btree_node_iter node_iter;
struct bkey unpacked;
struct journal_iter journal; struct journal_iter journal;
enum last_key_returned { enum last_key_returned {
...@@ -38,12 +47,14 @@ struct btree_and_journal_iter { ...@@ -38,12 +47,14 @@ struct btree_and_journal_iter {
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
struct journal_key *journal_key_search(struct journal_keys *,
enum btree_id, struct bpos);
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *, void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
struct btree_trans *, struct btree_trans *,
struct journal_keys *, struct journal_keys *,
enum btree_id, struct bpos); enum btree_id, struct bpos);
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct journal_keys *,
struct btree *);
int bch2_fs_recovery(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment