Commit f2019299 authored by Brian Foster's avatar Brian Foster Committed by Darrick J. Wong

xfs: simplify inode flush error handling

The inode flush code has several layers of error handling between
the inode and cluster flushing code. If the inode flush fails before
acquiring the backing buffer, the inode flush is aborted. If the
cluster flush fails, the current inode flush is aborted and the
cluster buffer is failed to handle the initial inode and any others
that might have been attached before the error.

Since xfs_iflush() is the only caller of xfs_iflush_cluster(), the
error handling between the two can be condensed in the top-level
function. If we update xfs_iflush_int() to always fall through to
the log item update and attach the item completion handler to the
buffer, any errors that occur after the first call to
xfs_iflush_int() can be handled with a buffer I/O failure.

Lift the error handling from xfs_iflush_cluster() into xfs_iflush()
and consolidate with the existing error handling. This also replaces
the need to release the buffer because failing the buffer with
XBF_ASYNC drops the current reference.
Signed-off-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarAllison Collins <allison.henderson@oracle.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 54b3b1f6
...@@ -3496,6 +3496,7 @@ xfs_iflush_cluster( ...@@ -3496,6 +3496,7 @@ xfs_iflush_cluster(
struct xfs_inode **cilist; struct xfs_inode **cilist;
struct xfs_inode *cip; struct xfs_inode *cip;
struct xfs_ino_geometry *igeo = M_IGEO(mp); struct xfs_ino_geometry *igeo = M_IGEO(mp);
int error = 0;
int nr_found; int nr_found;
int clcount = 0; int clcount = 0;
int i; int i;
...@@ -3588,11 +3589,10 @@ xfs_iflush_cluster( ...@@ -3588,11 +3589,10 @@ xfs_iflush_cluster(
* re-check that it's dirty before flushing. * re-check that it's dirty before flushing.
*/ */
if (!xfs_inode_clean(cip)) { if (!xfs_inode_clean(cip)) {
int error;
error = xfs_iflush_int(cip, bp); error = xfs_iflush_int(cip, bp);
if (error) { if (error) {
xfs_iunlock(cip, XFS_ILOCK_SHARED); xfs_iunlock(cip, XFS_ILOCK_SHARED);
goto cluster_corrupt_out; goto out_free;
} }
clcount++; clcount++;
} else { } else {
...@@ -3611,33 +3611,7 @@ xfs_iflush_cluster( ...@@ -3611,33 +3611,7 @@ xfs_iflush_cluster(
kmem_free(cilist); kmem_free(cilist);
out_put: out_put:
xfs_perag_put(pag); xfs_perag_put(pag);
return 0; return error;
cluster_corrupt_out:
/*
* Corruption detected in the clustering loop. Invalidate the
* inode buffer and shut down the filesystem.
*/
rcu_read_unlock();
/*
* We'll always have an inode attached to the buffer for completion
* process by the time we are called from xfs_iflush(). Hence we have
* always need to do IO completion processing to abort the inodes
* attached to the buffer. handle them just like the shutdown case in
* xfs_buf_submit().
*/
ASSERT(bp->b_iodone);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
/* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(cip, false);
kmem_free(cilist);
xfs_perag_put(pag);
return -EFSCORRUPTED;
} }
/* /*
...@@ -3693,17 +3667,16 @@ xfs_iflush( ...@@ -3693,17 +3667,16 @@ xfs_iflush(
*/ */
if (XFS_FORCED_SHUTDOWN(mp)) { if (XFS_FORCED_SHUTDOWN(mp)) {
error = -EIO; error = -EIO;
goto abort_out; goto abort;
} }
/* /*
* Get the buffer containing the on-disk inode. We are doing a try-lock * Get the buffer containing the on-disk inode. We are doing a try-lock
* operation here, so we may get an EAGAIN error. In that case, we * operation here, so we may get an EAGAIN error. In that case, return
* simply want to return with the inode still dirty. * leaving the inode dirty.
* *
* If we get any other error, we effectively have a corruption situation * If we get any other error, we effectively have a corruption situation
* and we cannot flush the inode, so we treat it the same as failing * and we cannot flush the inode. Abort the flush and shut down.
* xfs_iflush_int().
*/ */
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0); 0);
...@@ -3712,14 +3685,7 @@ xfs_iflush( ...@@ -3712,14 +3685,7 @@ xfs_iflush(
return error; return error;
} }
if (error) if (error)
goto corrupt_out; goto abort;
/*
* First flush out the inode that xfs_iflush was called with.
*/
error = xfs_iflush_int(ip, bp);
if (error)
goto corrupt_out;
/* /*
* If the buffer is pinned then push on the log now so we won't * If the buffer is pinned then push on the log now so we won't
...@@ -3729,28 +3695,29 @@ xfs_iflush( ...@@ -3729,28 +3695,29 @@ xfs_iflush(
xfs_log_force(mp, 0); xfs_log_force(mp, 0);
/* /*
* inode clustering: try to gather other inodes into this write * Flush the provided inode then attempt to gather others from the
* cluster into the write.
* *
* Note: Any error during clustering will result in the filesystem * Note: Once we attempt to flush an inode, we must run buffer
* being shut down and completion callbacks run on the cluster buffer. * completion callbacks on any failure. If this fails, simulate an I/O
* As we have already flushed and attached this inode to the buffer, * failure on the buffer and shut down.
* it has already been aborted and released by xfs_iflush_cluster() and
* so we have no further error handling to do here.
*/ */
error = xfs_iflush_cluster(ip, bp); error = xfs_iflush_int(ip, bp);
if (error) if (!error)
return error; error = xfs_iflush_cluster(ip, bp);
if (error) {
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
goto shutdown;
}
*bpp = bp; *bpp = bp;
return 0; return 0;
corrupt_out: abort:
if (bp)
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
abort_out:
/* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(ip, false); xfs_iflush_abort(ip, false);
shutdown:
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error; return error;
} }
...@@ -3792,6 +3759,7 @@ xfs_iflush_int( ...@@ -3792,6 +3759,7 @@ xfs_iflush_int(
struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip; struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip)); ASSERT(xfs_isiflocked(ip));
...@@ -3799,15 +3767,21 @@ xfs_iflush_int( ...@@ -3799,15 +3767,21 @@ xfs_iflush_int(
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip != NULL && iip->ili_fields != 0); ASSERT(iip != NULL && iip->ili_fields != 0);
/* set *dip = inode's place in the buffer */
dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
/*
* We don't flush the inode if any of the following checks fail, but we
* do still update the log item and attach to the backing buffer as if
* the flush happened. This is a formality to facilitate predictable
* error handling as the caller will shutdown and fail the buffer.
*/
error = -EFSCORRUPTED;
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1)) { mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH, xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto corrupt_out; goto flush_out;
} }
if (S_ISREG(VFS_I(ip)->i_mode)) { if (S_ISREG(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR( if (XFS_TEST_ERROR(
...@@ -3817,7 +3791,7 @@ xfs_iflush_int( ...@@ -3817,7 +3791,7 @@ xfs_iflush_int(
xfs_alert_tag(mp, XFS_PTAG_IFLUSH, xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad regular inode %Lu, ptr "PTR_FMT, "%s: Bad regular inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip); __func__, ip->i_ino, ip);
goto corrupt_out; goto flush_out;
} }
} else if (S_ISDIR(VFS_I(ip)->i_mode)) { } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR( if (XFS_TEST_ERROR(
...@@ -3828,7 +3802,7 @@ xfs_iflush_int( ...@@ -3828,7 +3802,7 @@ xfs_iflush_int(
xfs_alert_tag(mp, XFS_PTAG_IFLUSH, xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad directory inode %Lu, ptr "PTR_FMT, "%s: Bad directory inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip); __func__, ip->i_ino, ip);
goto corrupt_out; goto flush_out;
} }
} }
if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
...@@ -3839,14 +3813,14 @@ xfs_iflush_int( ...@@ -3839,14 +3813,14 @@ xfs_iflush_int(
__func__, ip->i_ino, __func__, ip->i_ino,
ip->i_d.di_nextents + ip->i_d.di_anextents, ip->i_d.di_nextents + ip->i_d.di_anextents,
ip->i_d.di_nblocks, ip); ip->i_d.di_nblocks, ip);
goto corrupt_out; goto flush_out;
} }
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
mp, XFS_ERRTAG_IFLUSH_6)) { mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH, xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_d.di_forkoff, ip); __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out; goto flush_out;
} }
/* /*
...@@ -3863,7 +3837,7 @@ xfs_iflush_int( ...@@ -3863,7 +3837,7 @@ xfs_iflush_int(
/* Check the inline fork data before we write out. */ /* Check the inline fork data before we write out. */
if (!xfs_inode_verify_forks(ip)) if (!xfs_inode_verify_forks(ip))
goto corrupt_out; goto flush_out;
/* /*
* Copy the dirty parts of the inode into the on-disk inode. We always * Copy the dirty parts of the inode into the on-disk inode. We always
...@@ -3906,6 +3880,8 @@ xfs_iflush_int( ...@@ -3906,6 +3880,8 @@ xfs_iflush_int(
* need the AIL lock, because it is a 64 bit value that cannot be read * need the AIL lock, because it is a 64 bit value that cannot be read
* atomically. * atomically.
*/ */
error = 0;
flush_out:
iip->ili_last_fields = iip->ili_fields; iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0; iip->ili_fields = 0;
iip->ili_fsync_fields = 0; iip->ili_fsync_fields = 0;
...@@ -3915,10 +3891,10 @@ xfs_iflush_int( ...@@ -3915,10 +3891,10 @@ xfs_iflush_int(
&iip->ili_item.li_lsn); &iip->ili_item.li_lsn);
/* /*
* Attach the function xfs_iflush_done to the inode's * Attach the inode item callback to the buffer whether the flush
* buffer. This will remove the inode from the AIL * succeeded or not. If not, the caller will shut down and fail I/O
* and unlock the inode's flush lock when the inode is * completion on the buffer to remove the inode from the AIL and release
* completely written to disk. * the flush lock.
*/ */
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
...@@ -3927,10 +3903,7 @@ xfs_iflush_int( ...@@ -3927,10 +3903,7 @@ xfs_iflush_int(
ASSERT(!list_empty(&bp->b_li_list)); ASSERT(!list_empty(&bp->b_li_list));
ASSERT(bp->b_iodone != NULL); ASSERT(bp->b_iodone != NULL);
return 0; return error;
corrupt_out:
return -EFSCORRUPTED;
} }
/* Release an inode. */ /* Release an inode. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment