bcachefs: Mark superblocks transactionally

More work towards getting rid of the in memory struct bucket: this path adds code for marking superblock and journal buckets via the btree, and uses it in the device add and journal resize paths. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

bcachefs: Mark superblocks transactionally
More work towards getting rid of the in memory struct bucket: this path adds code for marking superblock and journal buckets via the btree, and uses it in the device add and journal resize paths. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
bfcf840d · Kent Overstreet · Kent Overstreet · 9afc6652 · bfcf840d · bfcf840d
Commit bfcf840d authored Jan 22, 2021 by Kent Overstreet Committed by Kent Overstreet Oct 22, 2023
6 changed files
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -323,48 +323,36 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 	return ret;
 }

-int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
 {
 	struct btree_trans trans;
 	struct btree_iter *iter;
-	u64 first_bucket	= ca->mi.first_bucket;
-	u64 nbuckets		= ca->mi.nbuckets;
+	struct bch_dev *ca;
+	unsigned i;
 	int ret = 0;

 	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);

-	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-				   POS(ca->dev_idx, first_bucket),
+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
 				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);

-	while (iter->pos.offset < nbuckets) {
-		bch2_trans_cond_resched(&trans);
-
-		ret = bch2_alloc_write_key(&trans, iter, flags);
-		if (ret)
-			break;
-		bch2_btree_iter_next_slot(iter);
-	}
-
-	bch2_trans_exit(&trans);
-
-	return ret;
-}
+	for_each_member_device(ca, c, i) {
+		bch2_btree_iter_set_pos(iter,
+			POS(ca->dev_idx, ca->mi.first_bucket));

-int bch2_alloc_write(struct bch_fs *c, unsigned flags)
-{
-	struct bch_dev *ca;
-	unsigned i;
-	int ret = 0;
+		while (iter->pos.offset < ca->mi.nbuckets) {
+			bch2_trans_cond_resched(&trans);

-	for_each_member_device(ca, c, i) {
-		bch2_dev_alloc_write(c, ca, flags);
-		if (ret) {
-			percpu_ref_put(&ca->io_ref);
-			break;
+			ret = bch2_alloc_write_key(&trans, iter, flags);
+			if (ret) {
+				percpu_ref_put(&ca->io_ref);
+				goto err;
+			}
+			bch2_btree_iter_next_slot(iter);
 		}
 	}
-
+err:
+	bch2_trans_exit(&trans);
 	return ret;
 }


--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -98,7 +98,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);

-int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
 int bch2_alloc_write(struct bch_fs *, unsigned);
 void bch2_fs_allocator_background_init(struct bch_fs *);


--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -2060,6 +2060,168 @@ int bch2_trans_mark_update(struct btree_trans *trans,
 	return ret;
 }

+static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+				    struct bch_dev *ca, size_t b,
+				    enum bch_data_type type,
+				    unsigned sectors)
+{
+	struct bch_fs *c = trans->c;
+	struct btree_iter *iter;
+	struct bkey_alloc_unpacked u;
+	struct bkey_i_alloc *a;
+	struct bch_extent_ptr ptr = {
+		.dev = ca->dev_idx,
+		.offset = bucket_to_sector(ca, b),
+	};
+	int ret = 0;
+
+	a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+	ret = PTR_ERR_OR_ZERO(a);
+	if (ret)
+		return ret;
+
+	ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u);
+	if (ret)
+		return ret;
+
+	if (u.data_type && u.data_type != type) {
+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
+			"while marking %s",
+			iter->pos.inode, iter->pos.offset, u.gen,
+			bch2_data_types[u.data_type],
+			bch2_data_types[type],
+			bch2_data_types[type]);
+		ret = -EIO;
+		goto out;
+	}
+
+	if ((unsigned) (u.dirty_sectors + sectors) > ca->mi.bucket_size) {
+		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			"bucket %llu:%llu gen %u data type %s sector count overflow: %u + %u > %u\n"
+			"while marking %s",
+			iter->pos.inode, iter->pos.offset, u.gen,
+			bch2_data_types[u.data_type ?: type],
+			u.dirty_sectors, sectors, ca->mi.bucket_size,
+			bch2_data_types[type]);
+		ret = -EIO;
+		goto out;
+	}
+
+	if (u.data_type		== type &&
+	    u.dirty_sectors	== sectors)
+		goto out;
+
+	u.data_type	= type;
+	u.dirty_sectors	= sectors;
+
+	bkey_alloc_init(&a->k_i);
+	a->k.p = iter->pos;
+	bch2_alloc_pack(a, u);
+	bch2_trans_update(trans, iter, &a->k_i, 0);
+out:
+	bch2_trans_iter_put(trans, iter);
+	return ret;
+}
+
+int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+				    struct disk_reservation *res,
+				    struct bch_dev *ca, size_t b,
+				    enum bch_data_type type,
+				    unsigned sectors)
+{
+	return __bch2_trans_do(trans, res, NULL, 0,
+			__bch2_trans_mark_metadata_bucket(trans, ca, b, BCH_DATA_journal,
+							ca->mi.bucket_size));
+
+}
+
+static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
+					    struct disk_reservation *res,
+					    struct bch_dev *ca,
+					    u64 start, u64 end,
+					    enum bch_data_type type,
+					    u64 *bucket, unsigned *bucket_sectors)
+{
+	int ret;
+
+	do {
+		u64 b = sector_to_bucket(ca, start);
+		unsigned sectors =
+			min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
+
+		if (b != *bucket) {
+			if (*bucket_sectors) {
+				ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+						*bucket, type, *bucket_sectors);
+				if (ret)
+					return ret;
+			}
+
+			*bucket		= b;
+			*bucket_sectors	= 0;
+		}
+
+		*bucket_sectors	+= sectors;
+		start += sectors;
+	} while (!ret && start < end);
+
+	return 0;
+}
+
+static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
+			     struct disk_reservation *res,
+			     struct bch_dev *ca)
+{
+	struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+	u64 bucket = 0;
+	unsigned i, bucket_sectors = 0;
+	int ret;
+
+	for (i = 0; i < layout->nr_superblocks; i++) {
+		u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+		if (offset == BCH_SB_SECTOR) {
+			ret = bch2_trans_mark_metadata_sectors(trans, res, ca,
+						0, BCH_SB_SECTOR,
+						BCH_DATA_sb, &bucket, &bucket_sectors);
+			if (ret)
+				return ret;
+		}
+
+		ret = bch2_trans_mark_metadata_sectors(trans, res, ca, offset,
+				      offset + (1 << layout->sb_max_size_bits),
+				      BCH_DATA_sb, &bucket, &bucket_sectors);
+		if (ret)
+			return ret;
+	}
+
+	if (bucket_sectors) {
+		ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+				bucket, BCH_DATA_sb, bucket_sectors);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < ca->journal.nr; i++) {
+		ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+				ca->journal.buckets[i],
+				BCH_DATA_journal, ca->mi.bucket_size);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int bch2_trans_mark_dev_sb(struct bch_fs *c,
+			   struct disk_reservation *res,
+			   struct bch_dev *ca)
+{
+	return bch2_trans_do(c, res, NULL, 0,
+			__bch2_trans_mark_dev_sb(&trans, res, ca));
+}
+
 /* Disk reservations: */

 #define SECTORS_CACHE	1024

--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -259,6 +259,12 @@ int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
 			   struct bkey_i *insert, unsigned);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);

+int bch2_trans_mark_metadata_bucket(struct btree_trans *,
+			struct disk_reservation *, struct bch_dev *,
+			size_t, enum bch_data_type, unsigned);
+int bch2_trans_mark_dev_sb(struct bch_fs *, struct disk_reservation *,
+			   struct bch_dev *);
+
 /* disk reservations: */

 static inline void bch2_disk_reservation_put(struct bch_fs *c,

--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -9,6 +9,7 @@
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_gc.h"
+#include "btree_update.h"
 #include "buckets.h"
 #include "journal.h"
 #include "journal_io.h"
@@ -823,18 +824,28 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 		if (pos <= ja->cur_idx)
 			ja->cur_idx = (ja->cur_idx + 1) % ja->nr;

-		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
-					  ca->mi.bucket_size,
-					  gc_phase(GC_PHASE_SB),
-					  0);
+		if (!c || new_fs)
+			bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
+						  ca->mi.bucket_size,
+						  gc_phase(GC_PHASE_SB),
+						  0);

 		if (c) {
 			spin_unlock(&c->journal.lock);
 			percpu_up_read(&c->mark_lock);
 		}

+		if (c && !new_fs)
+			ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
+				bch2_trans_mark_metadata_bucket(&trans, NULL, ca,
+						bucket, BCH_DATA_journal,
+						ca->mi.bucket_size));
+
 		if (!new_fs)
 			bch2_open_bucket_put(c, ob);
+
+		if (ret)
+			goto err;
 	}
 err:
 	bch2_sb_resize_journal(&ca->disk_sb,

--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1220,13 +1220,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
 	if (ret)
 		return ret;

-	if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
-	    !percpu_u64_get(&ca->usage[0]->d[BCH_DATA_sb].buckets)) {
-		mutex_lock(&c->sb_lock);
-		bch2_mark_dev_superblock(ca->fs, ca, 0);
-		mutex_unlock(&c->sb_lock);
-	}
-
 	bch2_dev_sysfs_online(c, ca);

 	if (c->sb.nr_devices == 1)
@@ -1600,7 +1593,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	 * allocate the journal, reset all the marks, then remark after we
 	 * attach...
 	 */
-	bch2_mark_dev_superblock(ca->fs, ca, 0);
+	bch2_mark_dev_superblock(NULL, ca, 0);

 	err = "journal alloc failed";
 	ret = bch2_dev_journal_alloc(ca);
@@ -1659,15 +1652,13 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	ca->disk_sb.sb->dev_idx	= dev_idx;
 	bch2_dev_attach(c, ca, dev_idx);

-	bch2_mark_dev_superblock(c, ca, 0);
-
 	bch2_write_super(c);
 	mutex_unlock(&c->sb_lock);

-	err = "alloc write failed";
-	ret = bch2_dev_alloc_write(c, ca, 0);
+	err = "error marking superblock";
+	ret = bch2_trans_mark_dev_sb(c, NULL, ca);
 	if (ret)
-		goto err;
+		goto err_late;

 	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
 		err = __bch2_dev_read_write(c, ca);
@@ -1688,6 +1679,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	bch_err(c, "Unable to add device: %s", err);
 	return ret;
 err_late:
+	up_write(&c->state_lock);
 	bch_err(c, "Error going rw after adding device: %s", err);
 	return -EINVAL;
 }
@@ -1723,6 +1715,12 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 	}

 	ca = bch_dev_locked(c, dev_idx);
+
+	if (bch2_trans_mark_dev_sb(c, NULL, ca)) {
+		err = "bch2_trans_mark_dev_sb() error";
+		goto err;
+	}
+
 	if (ca->mi.state == BCH_MEMBER_STATE_RW) {
 		err = __bch2_dev_read_write(c, ca);
 		if (err)