Commit f2b542ba authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Go RW before check_alloc_info()

It's possible to do btree updates before going RW by adding them to the
list of updates for journal replay to do, but this is limited by what
fits in RAM. This patch switches the second alloc info phase to run
after going RW - btree_gc has already ensured the alloc btree itself is
correct - and tweaks the allocation path to deal with the potential
small inconsistencies.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 5f5c7466
...@@ -583,6 +583,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, ...@@ -583,6 +583,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
goto err; goto err;
if (ca->mi.freespace_initialized && if (ca->mi.freespace_initialized &&
test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) &&
bch2_trans_inconsistent_on(old.k->type != old_type, trans, bch2_trans_inconsistent_on(old.k->type != old_type, trans,
"incorrect key when %s %s btree (got %s should be %s)\n" "incorrect key when %s %s btree (got %s should be %s)\n"
" for %s", " for %s",
...@@ -1028,21 +1029,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, ...@@ -1028,21 +1029,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto write; goto write;
} }
if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans, if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
bch2_trans_inconsistent(trans,
"clearing need_discard but journal_seq %llu > flushed_seq %llu\n" "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
"%s", "%s",
a->v.journal_seq, a->v.journal_seq,
c->journal.flushed_seq_ondisk, c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EIO; ret = -EIO;
}
goto out; goto out;
} }
if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans, if (a->v.data_type != BCH_DATA_need_discard) {
if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
bch2_trans_inconsistent(trans,
"bucket incorrectly set in need_discard btree\n" "bucket incorrectly set in need_discard btree\n"
"%s", "%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EIO; ret = -EIO;
}
goto out; goto out;
} }
......
...@@ -316,28 +316,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc ...@@ -316,28 +316,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
a = bch2_alloc_to_v4(k, &a_convert); a = bch2_alloc_to_v4(k, &a_convert);
if (genbits != (alloc_freespace_genbits(*a) >> 56)) { if (a->data_type != BCH_DATA_free) {
prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
" freespace key ", ob = NULL;
genbits, alloc_freespace_genbits(*a) >> 56); goto err;
}
prt_printf(&buf, "non free bucket in freespace btree\n"
" freespace key ");
bch2_bkey_val_to_text(&buf, c, freespace_k); bch2_bkey_val_to_text(&buf, c, freespace_k);
prt_printf(&buf, "\n "); prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&buf, c, k);
bch2_trans_inconsistent(trans, "%s", buf.buf); bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO); ob = ERR_PTR(-EIO);
goto err; goto err;
} }
if (a->data_type != BCH_DATA_free) { if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
prt_printf(&buf, "non free bucket in freespace btree\n" test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
" freespace key "); prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
" freespace key ",
genbits, alloc_freespace_genbits(*a) >> 56);
bch2_bkey_val_to_text(&buf, c, freespace_k); bch2_bkey_val_to_text(&buf, c, freespace_k);
prt_printf(&buf, "\n "); prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&buf, c, k);
bch2_trans_inconsistent(trans, "%s", buf.buf); bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO); ob = ERR_PTR(-EIO);
goto err; goto err;
} }
ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl); ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl);
...@@ -505,6 +511,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -505,6 +511,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL; struct open_bucket *ob = NULL;
bool freespace = READ_ONCE(ca->mi.freespace_initialized);
u64 avail; u64 avail;
struct bucket_alloc_state s = { 0 }; struct bucket_alloc_state s = { 0 };
bool waiting = false; bool waiting = false;
...@@ -543,13 +550,18 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -543,13 +550,18 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
if (ob) if (ob)
return ob; return ob;
} }
alloc:
ob = likely(ca->mi.freespace_initialized) ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
: bch2_bucket_alloc_early(trans, ca, reserve, &s, cl); : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl);
if (s.skipped_need_journal_commit * 2 > avail) if (s.skipped_need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL); bch2_journal_flush_async(&c->journal, NULL);
if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
freespace = false;
goto alloc;
}
err: err:
if (!ob) if (!ob)
ob = ERR_PTR(-BCH_ERR_no_buckets_found); ob = ERR_PTR(-BCH_ERR_no_buckets_found);
......
...@@ -549,6 +549,7 @@ enum { ...@@ -549,6 +549,7 @@ enum {
/* fsck passes: */ /* fsck passes: */
BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
BCH_FS_CHECK_ALLOC_DONE,
BCH_FS_CHECK_LRUS_DONE, BCH_FS_CHECK_LRUS_DONE,
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
BCH_FS_FSCK_DONE, BCH_FS_FSCK_DONE,
......
...@@ -1260,13 +1260,6 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -1260,13 +1260,6 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
bch_info(c, "checking need_discard and freespace btrees");
err = "error checking need_discard and freespace btrees";
ret = bch2_check_alloc_info(c);
if (ret)
goto err;
bch_verbose(c, "done checking need_discard and freespace btrees");
if (c->sb.version < bcachefs_metadata_version_snapshot_2) { if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
err = "error creating root snapshot node"; err = "error creating root snapshot node";
ret = bch2_fs_initialize_subvolumes(c); ret = bch2_fs_initialize_subvolumes(c);
...@@ -1291,6 +1284,15 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -1291,6 +1284,15 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.verbose || !c->sb.clean) if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done"); bch_info(c, "journal replay done");
bch_info(c, "checking need_discard and freespace btrees");
err = "error checking need_discard and freespace btrees";
ret = bch2_check_alloc_info(c);
if (ret)
goto err;
bch_verbose(c, "done checking need_discard and freespace btrees");
set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
bch_info(c, "checking lrus"); bch_info(c, "checking lrus");
err = "error checking lrus"; err = "error checking lrus";
ret = bch2_check_lrus(c); ret = bch2_check_lrus(c);
...@@ -1308,6 +1310,7 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -1308,6 +1310,7 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
} else { } else {
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
set_bit(BCH_FS_FSCK_DONE, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment