Commit b2930396 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fix reading of alloc info after unclean shutdown

When updates to interior nodes started being journalled, that meant that
after an unclean shutdown, until journal replay is done we can't walk
the btree without overlaying the updates from the journal.

The initial btree gc was changed to walk the btree overlaying keys from
the journal - but bch2_alloc_read() and bch2_stripes_read() were missed.
Major whoops...
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 692c3f06
......@@ -208,29 +208,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
get_alloc_field(a.v, &d, i));
}
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
{
struct btree_trans trans;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
BTREE_ID_ALLOC, POS_MIN);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (!level)
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
bch2_btree_and_journal_iter_advance(&iter);
}
return 0;
}
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
{
struct bch_dev *ca;
unsigned i;
int ret = 0;
ret = bch2_trans_exit(&trans) ?: ret;
ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
NULL, bch2_alloc_read_fn);
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
......
......@@ -1273,38 +1273,28 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
return ret;
}
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
{
struct btree_trans trans;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
int ret;
ret = bch2_fs_ec_start(c);
if (ret)
return ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
BTREE_ID_EC, POS_MIN);
int ret = 0;
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type == KEY_TYPE_stripe)
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
bch2_btree_and_journal_iter_advance(&iter);
}
return ret;
}
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
{
int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
NULL, bch2_stripes_read_fn);
if (ret)
bch_err(c, "error reading stripes: %i", ret);
return ret;
}
return 0;
return ret;
}
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
......@@ -1343,11 +1333,6 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
return 0;
}
int bch2_fs_ec_start(struct bch_fs *c)
{
return bch2_ec_mem_alloc(c, false);
}
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;
......
......@@ -157,8 +157,6 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
int bch2_fs_ec_start(struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);
......
......@@ -191,6 +191,78 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
b->c.btree_id, b->c.level, b->data->min_key);
}
/* Walk btree, overlaying keys from the journal: */
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
btree_walk_key_fn key_fn)
{
struct btree_and_journal_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
ret = key_fn(c, btree_id, b->c.level, k);
if (ret)
break;
if (b->c.level) {
struct btree *child;
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
if (b->c.level > 0) {
child = bch2_btree_node_get_noiter(c, &tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn);
six_unlock_read(&child->c.lock);
if (ret)
break;
}
} else {
bch2_btree_and_journal_iter_advance(&iter);
}
}
return ret;
}
int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
btree_walk_key_fn key_fn)
{
struct btree *b = c->btree_roots[btree_id].b;
int ret = 0;
if (btree_node_fake(b))
return 0;
six_lock_read(&b->c.lock, NULL, NULL);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
node_fn, key_fn) ?:
key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
six_unlock_read(&b->c.lock);
return ret;
}
/* sort and dedup all keys in the journal: */
void bch2_journal_entries_free(struct list_head *list)
......
......@@ -44,6 +44,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct journal_keys *,
struct btree *);
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k);
int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
btree_walk_node_fn, btree_walk_key_fn);
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct list_head *);
......
......@@ -199,6 +199,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
goto nowrote_alloc;
bch_verbose(c, "writing alloc info");
do {
wrote = false;
......@@ -229,6 +231,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
clean_passes = wrote ? 0 : clean_passes + 1;
} while (clean_passes < 2);
bch_verbose(c, "writing alloc info complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
nowrote_alloc:
for_each_member_device(ca, c, i)
......@@ -313,8 +316,10 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags) &&
test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
!c->opts.norecovery)
!c->opts.norecovery) {
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
}
clear_bit(BCH_FS_RW, &c->flags);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment