Commit e3e464ac authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Move extent overwrite handling out of core btree code

Ever since the btree code was first written, handling of overwriting
existing extents - including partially overwriting and splittin existing
extents - was handled as part of the core btree insert path. The modern
transaction and iterator infrastructure didn't exist then, so that was
the only way for it to be done.

This patch moves that outside of the core btree code to a pass that runs
at transaction commit time.

This is a significant simplification to the btree code and overall
reduction in code size, but more importantly it gets us much closer to
the core btree code being completely independent of extents and is
important prep work for snapshots.

This introduces a new feature bit; the old and new extent update models
are incompatible when the filesystem needs journal replay.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 57b0b3db
......@@ -1315,12 +1315,14 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
x(inline_data, 8) \
x(new_extent_overwrite, 9) \
x(incompressible, 10) \
x(btree_ptr_v2, 11)
x(btree_ptr_v2, 11) \
x(extents_above_btree_updates, 12)
#define BCH_SB_FEATURES_ALL \
((1ULL << BCH_FEATURE_new_siphash)| \
(1ULL << BCH_FEATURE_new_extent_overwrite)| \
(1ULL << BCH_FEATURE_btree_ptr_v2))
(1ULL << BCH_FEATURE_btree_ptr_v2)| \
(1ULL << BCH_FEATURE_extents_above_btree_updates))
enum bch_sb_feature {
#define x(f, n) BCH_FEATURE_##f,
......
......@@ -186,8 +186,16 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret;
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
u8 *max_stale, bool initial)
static bool pos_in_journal_keys(struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos)
{
struct journal_key *k = journal_key_search(journal_keys, id, pos);
return k && k->btree_id == id && !bkey_cmp(k->k->k.p, pos);
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
struct journal_keys *journal_keys, bool initial)
{
struct btree_node_iter iter;
struct bkey unpacked;
......@@ -201,6 +209,10 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
if (!b->c.level && journal_keys &&
pos_in_journal_keys(journal_keys, b->c.btree_id, k.k->p))
continue;
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, max_stale, initial);
......@@ -212,6 +224,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
struct journal_keys *journal_keys,
bool initial, bool metadata_only)
{
struct btree_trans trans;
......@@ -239,7 +252,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
gc_pos_set(c, gc_pos_btree_node(b));
ret = btree_gc_mark_node(c, b, &max_stale, initial);
ret = btree_gc_mark_node(c, b, &max_stale,
journal_keys, initial);
if (ret)
break;
......@@ -281,36 +295,6 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
(int) btree_id_to_gc_phase(r);
}
static int mark_journal_key(struct bch_fs *c, enum btree_id id,
struct bkey_i *insert)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
u8 max_stale;
int ret = 0;
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(insert), &max_stale, true);
if (ret)
return ret;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
BTREE_ITER_SLOTS, k, ret) {
percpu_down_read(&c->mark_lock);
ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL,
BTREE_TRIGGER_GC|
BTREE_TRIGGER_NOATOMIC);
percpu_up_read(&c->mark_lock);
if (!ret)
break;
}
return bch2_trans_exit(&trans) ?: ret;
}
static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
bool initial, bool metadata_only)
{
......@@ -325,18 +309,21 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id);
int ret = bch2_gc_btree(c, id, initial, metadata_only);
int ret = bch2_gc_btree(c, id, journal_keys,
initial, metadata_only);
if (ret)
return ret;
if (journal_keys && !metadata_only &&
btree_node_type_needs_gc(type)) {
struct journal_key *j;
u8 max_stale;
int ret;
for_each_journal_key(*journal_keys, j)
if (j->btree_id == id) {
ret = mark_journal_key(c, id, j->k);
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(j->k),
&max_stale, initial);
if (ret)
return ret;
}
......
......@@ -708,9 +708,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
unsigned *whiteout_u64s, int write,
bool have_retry)
{
struct bkey_packed *k;
struct bkey prev = KEY(0, 0, 0);
struct bpos prev_data = POS_MIN;
struct bkey_packed *k, *prev = NULL;
bool seen_non_whiteout = false;
unsigned version;
const char *err;
......@@ -852,15 +850,15 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
if (!seen_non_whiteout &&
(!bkey_whiteout(k) ||
(bkey_cmp(prev.p, bkey_start_pos(u.k)) > 0))) {
(prev && bkey_iter_cmp(b, prev, k) > 0))) {
*whiteout_u64s = k->_data - i->_data;
seen_non_whiteout = true;
} else if (bkey_cmp(prev_data, bkey_start_pos(u.k)) > 0 ||
bkey_cmp(prev.p, u.k->p) > 0) {
} else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
char buf1[80];
char buf2[80];
struct bkey up = bkey_unpack_key(b, prev);
bch2_bkey_to_text(&PBUF(buf1), &prev);
bch2_bkey_to_text(&PBUF(buf1), &up);
bch2_bkey_to_text(&PBUF(buf2), u.k);
bch2_dump_bset(b, i, 0);
......@@ -870,10 +868,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
/* XXX: repair this */
}
if (!bkey_deleted(u.k))
prev_data = u.k->p;
prev = *u.k;
prev = k;
k = bkey_next_skip_noops(k, vstruct_last(i));
}
......
......@@ -1504,12 +1504,12 @@ static struct bkey_s_c __btree_trans_updates_peek(struct btree_iter *iter)
struct btree_trans *trans = iter->trans;
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
trans_for_each_update2(trans, i)
if ((cmp_int(iter->btree_id, i->iter->btree_id) ?:
bkey_cmp(pos, i->k->k.p)) <= 0)
break;
return i < trans->updates + trans->nr_updates &&
return i < trans->updates2 + trans->nr_updates2 &&
iter->btree_id == i->iter->btree_id
? bkey_i_to_s_c(i->k)
: bkey_s_c_null;
......@@ -1821,7 +1821,7 @@ int bch2_trans_iter_free(struct btree_trans *trans,
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
void *new_iters, *new_updates;
void *p, *new_iters, *new_updates, *new_updates2;
size_t iters_bytes;
size_t updates_bytes;
......@@ -1839,21 +1839,27 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
iters_bytes = sizeof(struct btree_iter) * new_size;
updates_bytes = sizeof(struct btree_insert_entry) * new_size;
new_iters = kmalloc(iters_bytes + updates_bytes, GFP_NOFS);
if (new_iters)
p = kmalloc(iters_bytes +
updates_bytes +
updates_bytes, GFP_NOFS);
if (p)
goto success;
new_iters = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
new_size = BTREE_ITER_MAX;
trans->used_mempool = true;
success:
new_updates = new_iters + iters_bytes;
new_iters = p; p += iters_bytes;
new_updates = p; p += updates_bytes;
new_updates2 = p; p += updates_bytes;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
memcpy(new_updates, trans->updates,
sizeof(struct btree_insert_entry) * trans->nr_updates);
memcpy(new_updates2, trans->updates2,
sizeof(struct btree_insert_entry) * trans->nr_updates2);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
memset(trans->iters, POISON_FREE,
......@@ -1865,6 +1871,7 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
trans->iters = new_iters;
trans->updates = new_updates;
trans->updates2 = new_updates2;
trans->size = new_size;
if (trans->iters_live) {
......@@ -2126,6 +2133,7 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
trans->need_reset = 0;
trans->nr_updates = 0;
trans->nr_updates2 = 0;
trans->mem_top = 0;
if (trans->fs_usage_deltas) {
......@@ -2157,6 +2165,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
trans->updates2 = trans->updates2_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
......@@ -2194,5 +2203,5 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * nr +
sizeof(u8) * nr);
sizeof(struct btree_insert_entry) * nr);
}
......@@ -283,6 +283,7 @@ struct btree_trans {
u8 nr_iters;
u8 nr_updates;
u8 nr_updates2;
u8 size;
unsigned used_mempool:1;
unsigned error:1;
......@@ -295,6 +296,7 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
struct btree_insert_entry *updates2;
/* update path: */
struct journal_res journal_res;
......@@ -308,6 +310,7 @@ struct btree_trans {
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[2];
struct btree_insert_entry updates2_onstack[2];
};
#define BTREE_FLAG(flag) \
......
......@@ -132,4 +132,9 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
(_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++)
#define trans_for_each_update2(_trans, _i) \
for ((_i) = (_trans)->updates2; \
(_i) < (_trans)->updates2 + (_trans)->nr_updates2; \
(_i)++)
#endif /* _BCACHEFS_BTREE_UPDATE_H */
......@@ -303,18 +303,23 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
}
static inline void push_whiteout(struct bch_fs *c, struct btree *b,
struct bkey_packed *k)
struct bpos pos)
{
unsigned u64s = bkeyp_key_u64s(&b->format, k);
struct bkey_packed *dst;
struct bkey_packed k;
BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b));
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
b->whiteout_u64s += bkeyp_key_u64s(&b->format, k);
dst = unwritten_whiteouts_start(c, b);
memcpy_u64s(dst, k, u64s);
dst->u64s = u64s;
dst->type = KEY_TYPE_deleted;
if (!bkey_pack_pos(&k, pos, b)) {
struct bkey *u = (void *) &k;
bkey_init(u);
u->p = pos;
}
k.needs_whiteout = true;
b->whiteout_u64s += k.u64s;
bkey_copy(unwritten_whiteouts_start(c, b), &k);
}
/*
......
This diff is collapsed.
......@@ -1254,21 +1254,21 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_i *new,
struct bch_fs_usage *fs_usage,
unsigned flags)
unsigned flags,
bool is_extents)
{
struct bch_fs *c = trans->c;
struct btree *b = iter->l[0].b;
unsigned offset = 0;
s64 sectors = 0;
s64 sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (btree_node_is_extents(b)
if (is_extents
? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
: bkey_cmp(new->k.p, old.k->p))
return 0;
if (btree_node_is_extents(b)) {
if (is_extents) {
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
offset = 0;
......@@ -1341,7 +1341,8 @@ int bch2_mark_update(struct btree_trans *trans,
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
ret = bch2_mark_overwrite(trans, iter, k, insert,
fs_usage, flags);
fs_usage, flags,
btree_node_type_is_extents(iter->btree_id));
if (ret <= 0)
break;
......
......@@ -268,7 +268,7 @@ int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage_online *,
int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
struct bkey_s_c, struct bkey_i *,
struct bch_fs_usage *, unsigned);
struct bch_fs_usage *, unsigned, bool);
int bch2_mark_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct bch_fs_usage *, unsigned);
......
This diff is collapsed.
......@@ -11,9 +11,6 @@ int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
enum btree_insert_ret
bch2_extent_can_insert(struct btree_trans *, struct btree_iter *,
struct bkey_i *, unsigned *);
void bch2_insert_fixup_extent(struct btree_trans *,
struct btree_iter *,
struct bkey_i *);
struct bkey_i *);
#endif /* _BCACHEFS_EXTENT_UPDATE_H */
......@@ -422,6 +422,42 @@ static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
POS(inode_nr + 1, 0), NULL);
}
static int bch2_fix_overlapping_extent(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k, struct bpos cut_at)
{
struct btree_iter *u_iter;
struct bkey_i *u;
int ret;
u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(u);
if (ret)
return ret;
bkey_reassemble(u, k);
bch2_cut_front(cut_at, u);
u_iter = bch2_trans_copy_iter(trans, iter);
ret = PTR_ERR_OR_ZERO(u_iter);
if (ret)
return ret;
/*
* We don't want to go through the
* extent_handle_overwrites path:
*/
__bch2_btree_iter_set_pos(u_iter, u->k.p, false);
/*
* XXX: this is going to leave disk space
* accounting slightly wrong
*/
ret = bch2_trans_update(trans, u_iter, u, 0);
bch2_trans_iter_put(trans, u_iter);
return ret;
}
/*
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
* that i_size an i_sectors are consistent
......@@ -433,6 +469,7 @@ static int check_extents(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey prev = KEY(0, 0, 0);
u64 i_sectors;
int ret = 0;
......@@ -444,6 +481,25 @@ static int check_extents(struct bch_fs *c)
POS(BCACHEFS_ROOT_INO, 0), 0);
retry:
for_each_btree_key_continue(iter, 0, k, ret) {
if (bkey_cmp(prev.p, bkey_start_pos(k.k)) > 0) {
char buf1[100];
char buf2[100];
bch2_bkey_to_text(&PBUF(buf1), &prev);
bch2_bkey_to_text(&PBUF(buf2), k.k);
if (fsck_err(c, "overlapping extents: %s, %s", buf1, buf2)) {
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_fix_overlapping_extent(&trans,
iter, k, prev.p));
if (ret)
goto err;
}
}
prev = *k.k;
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
break;
......
......@@ -161,13 +161,16 @@ static void journal_entries_free(struct list_head *list)
}
}
/*
* When keys compare equal, oldest compares first:
*/
static int journal_sort_key_cmp(const void *_l, const void *_r)
{
const struct journal_key *l = _l;
const struct journal_key *r = _r;
return cmp_int(l->btree_id, r->btree_id) ?:
bkey_cmp(l->pos, r->pos) ?:
bkey_cmp(l->k->k.p, r->k->k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset);
}
......@@ -179,25 +182,11 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->btree_id, r->btree_id) ?:
bkey_cmp(l->pos, r->pos);
}
static void journal_keys_sift(struct journal_keys *keys, struct journal_key *i)
{
while (i + 1 < keys->d + keys->nr &&
journal_sort_key_cmp(i, i + 1) > 0) {
swap(i[0], i[1]);
i++;
}
bkey_cmp(l->k->k.p, r->k->k.p);
}
static void journal_keys_free(struct journal_keys *keys)
{
struct journal_key *i;
for_each_journal_key(*keys, i)
if (i->allocated)
kfree(i->k);
kvfree(keys->d);
keys->d = NULL;
keys->nr = 0;
......@@ -208,15 +197,15 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
struct journal_replay *p;
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct journal_keys keys = { NULL }, keys_deduped = { NULL };
struct journal_key *i;
struct journal_keys keys = { NULL };
struct journal_key *src, *dst;
size_t nr_keys = 0;
list_for_each_entry(p, journal_entries, list)
for_each_jset_key(k, _n, entry, &p->j)
nr_keys++;
keys.journal_seq_base = keys_deduped.journal_seq_base =
keys.journal_seq_base =
le64_to_cpu(list_first_entry(journal_entries,
struct journal_replay,
list)->j.seq);
......@@ -225,96 +214,31 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
if (!keys.d)
goto err;
keys_deduped.d = kvmalloc(sizeof(keys.d[0]) * nr_keys * 2, GFP_KERNEL);
if (!keys_deduped.d)
goto err;
list_for_each_entry(p, journal_entries, list)
for_each_jset_key(k, _n, entry, &p->j) {
if (bkey_deleted(&k->k) &&
btree_node_type_is_extents(entry->btree_id))
continue;
for_each_jset_key(k, _n, entry, &p->j)
keys.d[keys.nr++] = (struct journal_key) {
.btree_id = entry->btree_id,
.pos = bkey_start_pos(&k->k),
.k = k,
.journal_seq = le64_to_cpu(p->j.seq) -
keys.journal_seq_base,
.journal_offset = k->_data - p->j._data,
};
}
sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
i = keys.d;
while (i < keys.d + keys.nr) {
if (i + 1 < keys.d + keys.nr &&
i[0].btree_id == i[1].btree_id &&
!bkey_cmp(i[0].pos, i[1].pos)) {
if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) <= 0) {
i++;
} else {
bch2_cut_front(i[1].k->k.p, i[0].k);
i[0].pos = i[1].k->k.p;
journal_keys_sift(&keys, i);
}
continue;
}
if (i + 1 < keys.d + keys.nr &&
i[0].btree_id == i[1].btree_id &&
bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)) > 0) {
if ((cmp_int(i[0].journal_seq, i[1].journal_seq) ?:
cmp_int(i[0].journal_offset, i[1].journal_offset)) < 0) {
if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) <= 0) {
bch2_cut_back(bkey_start_pos(&i[1].k->k), i[0].k);
} else {
struct bkey_i *split =
kmalloc(bkey_bytes(i[0].k), GFP_KERNEL);
if (!split)
goto err;
bkey_copy(split, i[0].k);
bch2_cut_back(bkey_start_pos(&i[1].k->k), split);
keys_deduped.d[keys_deduped.nr++] = (struct journal_key) {
.btree_id = i[0].btree_id,
.allocated = true,
.pos = bkey_start_pos(&split->k),
.k = split,
.journal_seq = i[0].journal_seq,
.journal_offset = i[0].journal_offset,
};
bch2_cut_front(i[1].k->k.p, i[0].k);
i[0].pos = i[1].k->k.p;
journal_keys_sift(&keys, i);
continue;
}
} else {
if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) >= 0) {
i[1] = i[0];
i++;
continue;
} else {
bch2_cut_front(i[0].k->k.p, i[1].k);
i[1].pos = i[0].k->k.p;
journal_keys_sift(&keys, i + 1);
continue;
}
}
}
src = dst = keys.d;
while (src < keys.d + keys.nr) {
while (src + 1 < keys.d + keys.nr &&
src[0].btree_id == src[1].btree_id &&
!bkey_cmp(src[0].k->k.p, src[1].k->k.p))
src++;
keys_deduped.d[keys_deduped.nr++] = *i++;
*dst++ = *src++;
}
kvfree(keys.d);
return keys_deduped;
keys.nr = dst - keys.d;
err:
journal_keys_free(&keys_deduped);
kvfree(keys.d);
return (struct journal_keys) { NULL };
return keys;
}
/* journal replay: */
......@@ -365,11 +289,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
split_iter = bch2_trans_copy_iter(&trans, iter);
ret = PTR_ERR_OR_ZERO(split_iter);
if (ret)
goto err;
split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
ret = PTR_ERR_OR_ZERO(split);
if (ret)
......@@ -388,12 +307,25 @@ static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
}
bkey_copy(split, k);
bch2_cut_front(split_iter->pos, split);
bch2_cut_front(iter->pos, split);
bch2_cut_back(atomic_end, split);
split_iter = bch2_trans_copy_iter(&trans, iter);
ret = PTR_ERR_OR_ZERO(split_iter);
if (ret)
goto err;
/*
* It's important that we don't go through the
* extent_handle_overwrites() and extent_update_to_keys() path
* here: journal replay is supposed to treat extents like
* regular keys
*/
__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
bch2_trans_update(&trans, split_iter, split, !remark
? BTREE_TRIGGER_NORUN
: BTREE_TRIGGER_NOOVERWRITES);
bch2_btree_iter_set_pos(iter, split->k.p);
} while (bkey_cmp(iter->pos, k->k.p) < 0);
......@@ -424,11 +356,18 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
struct btree_iter *iter;
int ret;
iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
iter = bch2_trans_get_iter(trans, id, k->k.p, BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter);
/*
* iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
* extent_handle_overwrites() and extent_update_to_keys() - but we don't
* want that here, journal replay is supposed to treat extents like
* regular keys:
*/
__bch2_btree_iter_set_pos(iter, k->k.p, false);
ret = bch2_btree_iter_traverse(iter) ?:
bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
bch2_trans_iter_put(trans, iter);
......@@ -459,7 +398,7 @@ static int bch2_journal_replay(struct bch_fs *c,
if (i->btree_id == BTREE_ID_ALLOC)
ret = bch2_alloc_replay_key(c, i->k);
else if (btree_node_type_is_extents(i->btree_id))
else if (i->k->k.size)
ret = bch2_extent_replay_key(c, i->btree_id, i->k);
else
ret = bch2_journal_replay_key(c, i->btree_id, i->k);
......@@ -859,6 +798,15 @@ int bch2_fs_recovery(struct bch_fs *c)
journal_seq = le64_to_cpu(clean->journal_seq) + 1;
}
if (!c->sb.clean &&
!(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
ret = -EINVAL;
goto err;
}
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
ret = journal_replay_early(c, clean, &journal_entries);
if (ret)
goto err;
......
......@@ -5,8 +5,6 @@
struct journal_keys {
struct journal_key {
enum btree_id btree_id:8;
unsigned allocated:1;
struct bpos pos;
struct bkey_i *k;
u32 journal_seq;
u32 journal_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment