xfs: simplify inode flush error handling

The inode flush code has several layers of error handling between the inode and cluster flushing code. If the inode flush fails before acquiring the backing buffer, the inode flush is aborted. If the cluster flush fails, the current inode flush is aborted and the cluster buffer is failed to handle the initial inode and any others that might have been attached before the error. Since xfs_iflush() is the only caller of xfs_iflush_cluster(), the error handling between the two can be condensed in the top-level function. If we update xfs_iflush_int() to always fall through to the log item update and attach the item completion handler to the buffer, any errors that occur after the first call to xfs_iflush_int() can be handled with a buffer I/O failure. Lift the error handling from xfs_iflush_cluster() into xfs_iflush() and consolidate with the existing error handling. This also replaces the need to release the buffer because failing the buffer with XBF_ASYNC drops the current reference. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Allison Collins <allison.henderson@oracle.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

xfs: simplify inode flush error handling
The inode flush code has several layers of error handling between the inode and cluster flushing code. If the inode flush fails before acquiring the backing buffer, the inode flush is aborted. If the cluster flush fails, the current inode flush is aborted and the cluster buffer is failed to handle the initial inode and any others that might have been attached before the error. Since xfs_iflush() is the only caller of xfs_iflush_cluster(), the error handling between the two can be condensed in the top-level function. If we update xfs_iflush_int() to always fall through to the log item update and attach the item completion handler to the buffer, any errors that occur after the first call to xfs_iflush_int() can be handled with a buffer I/O failure. Lift the error handling from xfs_iflush_cluster() into xfs_iflush() and consolidate with the existing error handling. This also replaces the need to release the buffer because failing the buffer with XBF_ASYNC drops the current reference. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Allison Collins <allison.henderson@oracle.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
f2019299 · Brian Foster · Darrick J. Wong · 54b3b1f6 · f2019299
Commit f2019299 authored May 06, 2020 by Brian Foster Committed by Darrick J. Wong May 07, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 72 deletions

fs/xfs/xfs_inode.c fs/xfs/xfs_inode.c +45 -72

No files found.
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3496,6 +3496,7 @@ xfs_iflush_cluster(
 	struct xfs_inode	**cilist;
 	struct xfs_inode	*cip;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
+	int			error = 0;
 	int			nr_found;
 	int			clcount = 0;
 	int			i;
@@ -3588,11 +3589,10 @@ xfs_iflush_cluster(
 		 * re-check that it's dirty before flushing.
 		 */
 		if (!xfs_inode_clean(cip)) {
-			int	error;
 			error = xfs_iflush_int(cip, bp);
 			if (error) {
 				xfs_iunlock(cip, XFS_ILOCK_SHARED);
-				goto cluster_corrupt_out;
+				goto out_free;
 			}
 			clcount++;
 		} else {
@@ -3611,33 +3611,7 @@ xfs_iflush_cluster(
 	kmem_free(cilist);
 out_put:
 	xfs_perag_put(pag);
-	return 0;
+	return error;
-cluster_corrupt_out:
-	/*
-	 * Corruption detected in the clustering loop.  Invalidate the
-	 * inode buffer and shut down the filesystem.
-	 */
-	rcu_read_unlock();
-	/*
-	 * We'll always have an inode attached to the buffer for completion
-	 * process by the time we are called from xfs_iflush(). Hence we have
-	 * always need to do IO completion processing to abort the inodes
-	 * attached to the buffer.  handle them just like the shutdown case in
-	 * xfs_buf_submit().
-	 */
-	ASSERT(bp->b_iodone);
-	bp->b_flags |= XBF_ASYNC;
-	xfs_buf_ioend_fail(bp);
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-	/* abort the corrupt inode, as it was not attached to the buffer */
-	xfs_iflush_abort(cip, false);
-	kmem_free(cilist);
-	xfs_perag_put(pag);
-	return -EFSCORRUPTED;
 }
 /*
@@ -3693,17 +3667,16 @@ xfs_iflush(
 	 */
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		error = -EIO;
-		goto abort_out;
+		goto abort;
 	}
 	/*
 	 * Get the buffer containing the on-disk inode. We are doing a try-lock
-	 * operation here, so we may get  an EAGAIN error. In that case, we
+	 * operation here, so we may get an EAGAIN error. In that case, return
-	 * simply want to return with the inode still dirty.
+	 * leaving the inode dirty.
 	 *
 	 * If we get any other error, we effectively have a corruption situation
-	 * and we cannot flush the inode, so we treat it the same as failing
+	 * and we cannot flush the inode. Abort the flush and shut down.
-	 * xfs_iflush_int().
 	 */
 	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
 			       0);
@@ -3712,14 +3685,7 @@ xfs_iflush(
 		return error;
 	}
 	if (error)
-		goto corrupt_out;
+		goto abort;
-	/*
-	 * First flush out the inode that xfs_iflush was called with.
-	 */
-	error = xfs_iflush_int(ip, bp);
-	if (error)
-		goto corrupt_out;
 	/*
 	 * If the buffer is pinned then push on the log now so we won't
@@ -3729,28 +3695,29 @@ xfs_iflush(
 		xfs_log_force(mp, 0);
 	/*
-	 * inode clustering: try to gather other inodes into this write
+	 * Flush the provided inode then attempt to gather others from the
+	 * cluster into the write.
 	 *
-	 * Note: Any error during clustering will result in the filesystem
+	 * Note: Once we attempt to flush an inode, we must run buffer
-	 * being shut down and completion callbacks run on the cluster buffer.
+	 * completion callbacks on any failure. If this fails, simulate an I/O
-	 * As we have already flushed and attached this inode to the buffer,
+	 * failure on the buffer and shut down.
-	 * it has already been aborted and released by xfs_iflush_cluster() and
-	 * so we have no further error handling to do here.
 	 */
-	error = xfs_iflush_cluster(ip, bp);
+	error = xfs_iflush_int(ip, bp);
-	if (error)
+	if (!error)
-		return error;
+		error = xfs_iflush_cluster(ip, bp);
+	if (error) {
+		bp->b_flags |= XBF_ASYNC;
+		xfs_buf_ioend_fail(bp);
+		goto shutdown;
+	}
 	*bpp = bp;
 	return 0;
-corrupt_out:
+abort:
-	if (bp)
-		xfs_buf_relse(bp);
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-abort_out:
-	/* abort the corrupt inode, as it was not attached to the buffer */
 	xfs_iflush_abort(ip, false);
+shutdown:
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 	return error;
 }
@@ -3792,6 +3759,7 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
@@ -3799,15 +3767,21 @@ xfs_iflush_int(
 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 	ASSERT(iip != NULL && iip->ili_fields != 0);
-	/* set *dip = inode's place in the buffer */
 	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
+	/*
+	 * We don't flush the inode if any of the following checks fail, but we
+	 * do still update the log item and attach to the backing buffer as if
+	 * the flush happened. This is a formality to facilitate predictable
+	 * error handling as the caller will shutdown and fail the buffer.
+	 */
+	error = -EFSCORRUPTED;
 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
 			       mp, XFS_ERRTAG_IFLUSH_1)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 	if (S_ISREG(VFS_I(ip)->i_mode)) {
 		if (XFS_TEST_ERROR(
@@ -3817,7 +3791,7 @@ xfs_iflush_int(
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad regular inode %Lu, ptr "PTR_FMT,
 				__func__, ip->i_ino, ip);
-			goto corrupt_out;
+			goto flush_out;
 		}
 	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
 		if (XFS_TEST_ERROR(
@@ -3828,7 +3802,7 @@ xfs_iflush_int(
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad directory inode %Lu, ptr "PTR_FMT,
 				__func__, ip->i_ino, ip);
-			goto corrupt_out;
+			goto flush_out;
 		}
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
@@ -3839,14 +3813,14 @@ xfs_iflush_int(
 			__func__, ip->i_ino,
 			ip->i_d.di_nextents + ip->i_d.di_anextents,
 			ip->i_d.di_nblocks, ip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
 				mp, XFS_ERRTAG_IFLUSH_6)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 	/*
@@ -3863,7 +3837,7 @@ xfs_iflush_int(
 	/* Check the inline fork data before we write out. */
 	if (!xfs_inode_verify_forks(ip))
-		goto corrupt_out;
+		goto flush_out;
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
@@ -3906,6 +3880,8 @@ xfs_iflush_int(
 	 * need the AIL lock, because it is a 64 bit value that cannot be read
 	 * atomically.
 	 */
+	error = 0;
+flush_out:
 	iip->ili_last_fields = iip->ili_fields;
 	iip->ili_fields = 0;
 	iip->ili_fsync_fields = 0;
@@ -3915,10 +3891,10 @@ xfs_iflush_int(
 				&iip->ili_item.li_lsn);
 	/*
-	 * Attach the function xfs_iflush_done to the inode's
+	 * Attach the inode item callback to the buffer whether the flush
-	 * buffer.  This will remove the inode from the AIL
+	 * succeeded or not. If not, the caller will shut down and fail I/O
-	 * and unlock the inode's flush lock when the inode is
+	 * completion on the buffer to remove the inode from the AIL and release
-	 * completely written to disk.
+	 * the flush lock.
 	 */
 	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
@@ -3927,10 +3903,7 @@ xfs_iflush_int(
 	ASSERT(!list_empty(&bp->b_li_list));
 	ASSERT(bp->b_iodone != NULL);
-	return 0;
+	return error;
-corrupt_out:
-	return -EFSCORRUPTED;
 }
 /* Release an inode. */