xfs: make forced shutdown processing atomic

The running of a forced shutdown is a bit of a mess. It does racy checks for XFS_MOUNT_SHUTDOWN in xfs_do_force_shutdown(), then does more racy checks in xfs_log_force_unmount() before finally setting XFS_MOUNT_SHUTDOWN and XLOG_IO_ERROR under the log->icloglock. Move the checking and setting of XFS_MOUNT_SHUTDOWN into xfs_do_force_shutdown() so we only process a shutdown once and once only. Serialise this with the mp->m_sb_lock spinlock so that the state change is atomic and won't race. Move all the mount specific shutdown state changes from xfs_log_force_unmount() to xfs_do_force_shutdown() so they are done atomically with setting XFS_MOUNT_SHUTDOWN. Then get rid of the racy xlog_is_shutdown() check from xlog_force_shutdown(), and gate the log shutdown on the test_and_set_bit(XLOG_IO_ERROR) test under the icloglock. This means that the log is shutdown once and once only, and code that needs to prevent races with shutdown can do so by holding the icloglock and checking the return value of xlog_is_shutdown(). This results in a predictable shutdown execution process - we set the shutdown flags once and process the shutdown once rather than the current "as many concurrent shutdowns as can race to the flag setting" situation we have now. Also, now that shutdown is atomic, alway emit a stack trace when the error level for the filesystem is high enough. This means that we always get a stack trace when trying to diagnose the cause of shutdowns in the field, rather than just for SHUTDOWN_CORRUPT_INCORE cases. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Darrick J. Wong <djwong@kernel.org>

xfs: make forced shutdown processing atomic
The running of a forced shutdown is a bit of a mess. It does racy checks for XFS_MOUNT_SHUTDOWN in xfs_do_force_shutdown(), then does more racy checks in xfs_log_force_unmount() before finally setting XFS_MOUNT_SHUTDOWN and XLOG_IO_ERROR under the log->icloglock. Move the checking and setting of XFS_MOUNT_SHUTDOWN into xfs_do_force_shutdown() so we only process a shutdown once and once only. Serialise this with the mp->m_sb_lock spinlock so that the state change is atomic and won't race. Move all the mount specific shutdown state changes from xfs_log_force_unmount() to xfs_do_force_shutdown() so they are done atomically with setting XFS_MOUNT_SHUTDOWN. Then get rid of the racy xlog_is_shutdown() check from xlog_force_shutdown(), and gate the log shutdown on the test_and_set_bit(XLOG_IO_ERROR) test under the icloglock. This means that the log is shutdown once and once only, and code that needs to prevent races with shutdown can do so by holding the icloglock and checking the return value of xlog_is_shutdown(). This results in a predictable shutdown execution process - we set the shutdown flags once and process the shutdown once rather than the current "as many concurrent shutdowns as can race to the flag setting" situation we have now. Also, now that shutdown is atomic, alway emit a stack trace when the error level for the filesystem is high enough. This means that we always get a stack trace when trying to diagnose the cause of shutdowns in the field, rather than just for SHUTDOWN_CORRUPT_INCORE cases. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
b36d4651 · Dave Chinner · Darrick J. Wong · e1d06e5f · b36d4651 · b36d4651
Commit b36d4651 authored Aug 10, 2021 by Dave Chinner Committed by Darrick J. Wong Aug 16, 2021
Show whitespace changes
Inline Side-by-side

Showing with 76 additions and 89 deletions

fs/xfs/xfs_fsops.c fs/xfs/xfs_fsops.c +30 -33

fs/xfs/xfs_log.c fs/xfs/xfs_log.c +45 -55

fs/xfs/xfs_log.h fs/xfs/xfs_log.h +1 -1

No files found.
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -511,6 +511,11 @@ xfs_fs_goingdown(
 * consistent. We don't do an unmount here; just shutdown the shop, make sure
 * that absolutely nothing persistent happens to this filesystem after this
 * point.
+ *
+ * The shutdown state change is atomic, resulting in the first and only the
+ * first shutdown call processing the shutdown. This means we only shutdown the
+ * log once as it requires, and we don't spam the logs when multiple concurrent
+ * shutdowns race to set the shutdown flags.
 */
 void
 xfs_do_force_shutdown(
@@ -519,48 +524,40 @@ xfs_do_force_shutdown(
 	char		*fname,
 	int		lnnum)
 {
-	bool		logerror = flags & SHUTDOWN_LOG_IO_ERROR;
+	int		tag;
+	const char	*why;
-	/*
-	 * No need to duplicate efforts.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
-		return;
-	/*
-	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
-	 * queue up anybody new on the log reservations, and wakes up
-	 * everybody who's sleeping on log reservations to tell them
-	 * the bad news.
-	 */
-	if (xfs_log_force_umount(mp, logerror))
-		return;
-	if (flags & SHUTDOWN_FORCE_UMOUNT) {
+	spin_lock(&mp->m_sb_lock);
-		xfs_alert(mp,
+	if (XFS_FORCED_SHUTDOWN(mp)) {
-"User initiated shutdown (0x%x) received. Shutting down filesystem",
+		spin_unlock(&mp->m_sb_lock);
-				flags);
 		return;
 	}
+	mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
+	if (mp->m_sb_bp)
+		mp->m_sb_bp->b_flags |= XBF_DONE;
+	spin_unlock(&mp->m_sb_lock);
-	if (flags & SHUTDOWN_CORRUPT_INCORE) {
+	if (flags & SHUTDOWN_FORCE_UMOUNT)
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
+		xfs_alert(mp, "User initiated shutdown received.");
-"Corruption of in-memory data (0x%x) detected at %pS (%s:%d).  Shutting down filesystem",
-				flags, __return_address, fname, lnnum);
+	if (xlog_force_shutdown(mp->m_log, flags)) {
-		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
+		tag = XFS_PTAG_SHUTDOWN_LOGERROR;
-			xfs_stack_trace();
+		why = "Log I/O Error";
-	} else if (logerror) {
+	} else if (flags & SHUTDOWN_CORRUPT_INCORE) {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
+		tag = XFS_PTAG_SHUTDOWN_CORRUPT;
-"Log I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem",
+		why = "Corruption of in-memory data";
-				flags, __return_address, fname, lnnum);
 	} else {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
+		tag = XFS_PTAG_SHUTDOWN_IOERROR;
-"I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem",
+		why = "Metadata I/O Error";
-				flags, __return_address, fname, lnnum);
 	}
+	xfs_alert_tag(mp, tag,
+"%s (0x%x) detected at %pS (%s:%d).  Shutting down filesystem.",
+			why, flags, __return_address, fname, lnnum);
 	xfs_alert(mp,
 		"Please unmount the filesystem and rectify the problem(s)");
+	if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+		xfs_stack_trace();
 }
 /*

--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3794,76 +3794,66 @@ xlog_verify_iclog(
 #endif
 /*
- * This is called from xfs_force_shutdown, when we're forcibly
+ * Perform a forced shutdown on the log. This should be called once and once
- * shutting down the filesystem, typically because of an IO error.
+ * only by the high level filesystem shutdown code to shut the log subsystem
- * Our main objectives here are to make sure that:
+ * down cleanly.
- *	a. if !logerror, flush the logs to disk. Anything modified
- *	   after this is ignored.
- *	b. the filesystem gets marked 'SHUTDOWN' for all interested
- *	   parties to find out, 'atomically'.
- *	c. those who're sleeping on log reservations, pinned objects and
- *	    other resources get woken up, and be told the bad news.
- *	d. nothing new gets queued up after (b) and (c) are done.
 *
- * Note: for the !logerror case we need to flush the regions held in memory out
+ * Our main objectives here are to make sure that:
- * to disk first. This needs to be done before the log is marked as shutdown,
+ *	a. if the shutdown was not due to a log IO error, flush the logs to
- * otherwise the iclog writes will fail.
+ *	   disk. Anything modified after this is ignored.
+ *	b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested
+ *	   parties to find out. Nothing new gets queued after this is done.
+ *	c. Tasks sleeping on log reservations, pinned objects and
+ *	   other resources get woken up.
 *
- * Return non-zero if log shutdown transition had already happened.
+ * Return true if the shutdown cause was a log IO error and we actually shut the
+ * log down.
 */
-int
+bool
-xfs_log_force_umount(
+xlog_force_shutdown(
-	struct xfs_mount	*mp,
+	struct xlog	*log,
-	int			logerror)
+	int		shutdown_flags)
 {
-	struct xlog	*log;
+	bool		log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR);
-	int		retval = 0;
-	log = mp->m_log;
 	/*
-	 * If this happens during log recovery, don't worry about
+	 * If this happens during log recovery then we aren't using the runtime
-	 * locking; the log isn't open for business yet.
+	 * log mechanisms yet so there's nothing to shut down.
 	 */
-	if (!log || xlog_in_recovery(log)) {
+	if (!log || xlog_in_recovery(log))
-		mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
+		return false;
-		if (mp->m_sb_bp)
-			mp->m_sb_bp->b_flags |= XBF_DONE;
-		return 0;
-	}
-	/*
+	ASSERT(!xlog_is_shutdown(log));
-	 * Somebody could've already done the hard work for us.
-	 * No need to get locks for this.
-	 */
-	if (logerror && xlog_is_shutdown(log))
-		return 1;
 	/*
 	 * Flush all the completed transactions to disk before marking the log
-	 * being shut down. We need to do it in this order to ensure that
+	 * being shut down. We need to do this first as shutting down the log
-	 * completed operations are safely on disk before we shut down, and that
+	 * before the force will prevent the log force from flushing the iclogs
-	 * we don't have to issue any buffer IO after the shutdown flags are set
+	 * to disk.
-	 * to guarantee this.
+	 *
+	 * Re-entry due to a log IO error shutdown during the log force is
+	 * prevented by the atomicity of higher level shutdown code.
 	 */
-	if (!logerror)
+	if (!log_error)
-		xfs_log_force(mp, XFS_LOG_SYNC);
+		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 	/*
-	 * mark the filesystem and the as in a shutdown state and wake
+	 * Atomically set the shutdown state. If the shutdown state is already
-	 * everybody up to tell them the bad news.
+	 * set, there someone else is performing the shutdown and so we are done
+	 * here. This should never happen because we should only ever get called
+	 * once by the first shutdown caller.
+	 *
+	 * Much of the log state machine transitions assume that shutdown state
+	 * cannot change once they hold the log->l_icloglock. Hence we need to
+	 * hold that lock here, even though we use the atomic test_and_set_bit()
+	 * operation to set the shutdown state.
 	 */
 	spin_lock(&log->l_icloglock);
-	mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
+	if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) {
-	if (mp->m_sb_bp)
+		spin_unlock(&log->l_icloglock);
-		mp->m_sb_bp->b_flags |= XBF_DONE;
+		ASSERT(0);
+		return false;
-	/*
+	}
-	 * Mark the log and the iclogs with IO error flags to prevent any
-	 * further log IO from being issued or completed.
-	 */
-	if (!test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate))
-		retval = 1;
 	spin_unlock(&log->l_icloglock);
 	/*
@@ -3887,7 +3877,7 @@ xfs_log_force_umount(
 	spin_unlock(&log->l_cilp->xc_push_lock);
 	xlog_state_do_callback(log);
-	return retval;
+	return log_error;
 }
 STATIC int

--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -125,7 +125,6 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  bool		   permanent);
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 void      xfs_log_unmount(struct xfs_mount *mp);
-int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
 bool	xfs_log_writable(struct xfs_mount *mp);
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
@@ -140,6 +139,7 @@ void	xfs_log_clean(struct xfs_mount *mp);
 bool	xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
 xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
+bool	  xlog_force_shutdown(struct xlog *log, int shutdown_flags);
 void xlog_use_incompat_feat(struct xlog *log);
 void xlog_drop_incompat_feat(struct xlog *log);