bcachefs: Kill some dependencies on ei_inode

Moving bch2_extent_update() to io.c will be greatly simplified if we no longer have to keep ei_inode.bi_size/bi_sectors up to date. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

bcachefs: Kill some dependencies on ei_inode
Moving bch2_extent_update() to io.c will be greatly simplified if we no longer have to keep ei_inode.bi_size/bi_sectors up to date. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
e0541a93 · Kent Overstreet · Kent Overstreet · daf3fe50 · e0541a93 · e0541a93
Commit e0541a93 authored Oct 09, 2019 by Kent Overstreet Committed by Kent Overstreet Oct 22, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 62 additions and 58 deletions

fs/bcachefs/fs-io.c fs/bcachefs/fs-io.c +62 -30

fs/bcachefs/fs.c fs/bcachefs/fs.c +0 -28

No files found.
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -2453,14 +2453,18 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
 				    from, round_up(from, PAGE_SIZE));
 }

-static int bch2_extend(struct bch_inode_info *inode, struct iattr *iattr)
+static int bch2_extend(struct bch_inode_info *inode,
+		       struct bch_inode_unpacked *inode_u,
+		       struct iattr *iattr)
 {
 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 	struct address_space *mapping = inode->v.i_mapping;
 	int ret;

-	ret = filemap_write_and_wait_range(mapping,
-			inode->ei_inode.bi_size, S64_MAX);
+	/*
+	 * sync appends:
+	 */
+	ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
 	if (ret)
 		return ret;

@@ -2501,19 +2505,31 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
 {
 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 	struct address_space *mapping = inode->v.i_mapping;
+	struct bch_inode_unpacked inode_u;
+	struct btree_trans trans;
+	struct btree_iter *iter;
 	u64 new_i_size = iattr->ia_size;
-	bool shrink;
 	int ret = 0;

 	inode_dio_wait(&inode->v);
 	bch2_pagecache_block_get(&inode->ei_pagecache_lock);

-	BUG_ON(inode->v.i_size < inode->ei_inode.bi_size);
+	/*
+	 * fetch current on disk i_size: inode is locked, i_size can only
+	 * increase underneath us:
+	 */
+	bch2_trans_init(&trans, c, 0, 0);
+	iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
+	ret = PTR_ERR_OR_ZERO(iter);
+	bch2_trans_exit(&trans);
+
+	if (ret)
+		goto err;

-	shrink = iattr->ia_size <= inode->v.i_size;
+	BUG_ON(inode->v.i_size < inode_u.bi_size);

-	if (!shrink) {
-		ret = bch2_extend(inode, iattr);
+	if (iattr->ia_size > inode->v.i_size) {
+		ret = bch2_extend(inode, &inode_u, iattr);
 		goto err;
 	}

@@ -2531,9 +2547,9 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
 	 * userspace has to redirty it and call .mkwrite -> set_page_dirty
 	 * again to allocate the part of the page that was extended.
 	 */
-	if (iattr->ia_size > inode->ei_inode.bi_size)
+	if (iattr->ia_size > inode_u.bi_size)
 		ret = filemap_write_and_wait_range(mapping,
-				inode->ei_inode.bi_size,
+				inode_u.bi_size,
 				iattr->ia_size - 1);
 	else if (iattr->ia_size & (PAGE_SIZE - 1))
 		ret = filemap_write_and_wait_range(mapping,
@@ -2935,33 +2951,49 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
 		if (ret)
 			goto err;
 	}
-	bch2_trans_unlock(&trans);

-	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-	    end > inode->v.i_size) {
-		i_size_write(&inode->v, end);
+	/*
+	 * Do we need to extend the file?
+	 *
+	 * If we zeroed up to the end of the file, we dropped whatever writes
+	 * were going to write out the current i_size, so we have to extend
+	 * manually even if FL_KEEP_SIZE was set:
+	 */
+	if (end >= inode->v.i_size &&
+	    (!(mode & FALLOC_FL_KEEP_SIZE) ||
+	     (mode & FALLOC_FL_ZERO_RANGE))) {
+		struct btree_iter *inode_iter;
+		struct bch_inode_unpacked inode_u;
+
+		do {
+			bch2_trans_begin(&trans);
+			inode_iter = bch2_inode_peek(&trans, &inode_u,
+						     inode->v.i_ino, 0);
+			ret = PTR_ERR_OR_ZERO(inode_iter);
+		} while (ret == -EINTR);

-		mutex_lock(&inode->ei_update_lock);
-		ret = bch2_write_inode_size(c, inode, inode->v.i_size, 0);
-		mutex_unlock(&inode->ei_update_lock);
-	}
+		bch2_trans_unlock(&trans);
+
+		if (ret)
+			goto err;

-	/* blech */
-	if ((mode & FALLOC_FL_KEEP_SIZE) &&
-	    (mode & FALLOC_FL_ZERO_RANGE) &&
-	    inode->ei_inode.bi_size != inode->v.i_size) {
-		/* sync appends.. */
+		/*
+		 * Sync existing appends before extending i_size,
+		 * as in bch2_extend():
+		 */
 		ret = filemap_write_and_wait_range(mapping,
-					inode->ei_inode.bi_size, S64_MAX);
+					inode_u.bi_size, S64_MAX);
 		if (ret)
 			goto err;

-		if (inode->ei_inode.bi_size != inode->v.i_size) {
-			mutex_lock(&inode->ei_update_lock);
-			ret = bch2_write_inode_size(c, inode,
-						    inode->v.i_size, 0);
-			mutex_unlock(&inode->ei_update_lock);
-		}
+		if (mode & FALLOC_FL_KEEP_SIZE)
+			end = inode->v.i_size;
+		else
+			i_size_write(&inode->v, end);
+
+		mutex_lock(&inode->ei_update_lock);
+		ret = bch2_write_inode_size(c, inode, end, 0);
+		mutex_unlock(&inode->ei_update_lock);
 	}
 err:
 	bch2_trans_exit(&trans);

--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -99,34 +99,6 @@ void bch2_pagecache_block_get(struct pagecache_lock *lock)
 	__pagecache_lock_get(lock, -1);
 }

-/*
- * I_SIZE_DIRTY requires special handling:
- *
- * To the recovery code, the flag means that there is stale data past i_size
- * that needs to be deleted; it's used for implementing atomic appends and
- * truncates.
- *
- * On append, we set I_SIZE_DIRTY before doing the write, then after the write
- * we clear I_SIZE_DIRTY atomically with updating i_size to the new larger size
- * that exposes the data we just wrote.
- *
- * On truncate, it's the reverse: We set I_SIZE_DIRTY atomically with setting
- * i_size to the new smaller size, then we delete the data that we just made
- * invisible, and then we clear I_SIZE_DIRTY.
- *
- * Because there can be multiple appends in flight at a time, we need a refcount
- * (i_size_dirty_count) instead of manipulating the flag directly. Nonzero
- * refcount means I_SIZE_DIRTY is set, zero means it's cleared.
- *
- * Because write_inode() can be called at any time, i_size_dirty_count means
- * something different to the runtime code - it means to write_inode() "don't
- * update i_size yet".
- *
- * We don't clear I_SIZE_DIRTY directly, we let write_inode() clear it when
- * i_size_dirty_count is zero - but the reverse is not true, I_SIZE_DIRTY must
- * be set explicitly.
- */
-
 void bch2_inode_update_after_write(struct bch_fs *c,
 				   struct bch_inode_info *inode,
 				   struct bch_inode_unpacked *bi,