xfs: push the ilock into xfs_zero_eof

Instead of calling xfs_zero_eof with the ilock held only take it internally for the minimall required critical section around xfs_bmapi_read. This also requires changing the calling convention for xfs_zero_last_block slightly. The actual zeroing operation is still serialized by the iolock, which must be taken exclusively over the call to xfs_zero_eof. We could in fact use a shared lock for the xfs_bmapi_read calls as long as the extent list has been read in, but given that we already hold the iolock exclusively there is little reason to micro optimize this further. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>

xfs: push the ilock into xfs_zero_eof
Instead of calling xfs_zero_eof with the ilock held only take it internally for the minimall required critical section around xfs_bmapi_read. This also requires changing the calling convention for xfs_zero_last_block slightly. The actual zeroing operation is still serialized by the iolock, which must be taken exclusively over the call to xfs_zero_eof. We could in fact use a shared lock for the xfs_bmapi_read calls as long as the extent list has been read in, but given that we already hold the iolock exclusively there is little reason to micro optimize this further. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
193aec10 · Christoph Hellwig · Ben Myers · f38996f5 · 193aec10 · 193aec10
Commit 193aec10 authored Mar 27, 2012 by Christoph Hellwig Committed by Ben Myers May 14, 2012
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 102 deletions

fs/xfs/xfs_file.c fs/xfs/xfs_file.c +63 -100

fs/xfs/xfs_iops.c fs/xfs/xfs_iops.c +0 -2

No files found.
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -396,114 +396,96 @@ xfs_file_splice_write(
 }
 /*
- * This routine is called to handle zeroing any space in the last
+ * This routine is called to handle zeroing any space in the last block of the
- * block of the file that is beyond the EOF.  We do this since the
+ * file that is beyond the EOF.  We do this since the size is being increased
- * size is being increased without writing anything to that block
+ * without writing anything to that block and we don't want to read the
- * and we don't want anyone to read the garbage on the disk.
+ * garbage on the disk.
 */
 STATIC int				/* error (positive) */
 xfs_zero_last_block(
-	xfs_inode_t	*ip,
+	struct xfs_inode	*ip,
-	xfs_fsize_t	offset,
+	xfs_fsize_t		offset,
-	xfs_fsize_t	isize)
+	xfs_fsize_t		isize)
 {
-	xfs_fileoff_t	last_fsb;
+	struct xfs_mount	*mp = ip->i_mount;
-	xfs_mount_t	*mp = ip->i_mount;
+	xfs_fileoff_t		last_fsb = XFS_B_TO_FSBT(mp, isize);
-	int		nimaps;
+	int			zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-	int		zero_offset;
+	int			zero_len;
-	int		zero_len;
+	int			nimaps = 1;
-	int		error = 0;
+	int			error = 0;
-	xfs_bmbt_irec_t	imap;
+	struct xfs_bmbt_irec	imap;
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-	if (zero_offset == 0) {
-		/*
-		 * There are no extra bytes in the last block on disk to
-		 * zero, so return.
-		 */
-		return 0;
-	}
-	last_fsb = XFS_B_TO_FSBT(mp, isize);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	nimaps = 1;
 	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	if (error)
 		return error;
 	ASSERT(nimaps > 0);
 	/*
 	 * If the block underlying isize is just a hole, then there
 	 * is nothing to zero.
 	 */
-	if (imap.br_startblock == HOLESTARTBLOCK) {
+	if (imap.br_startblock == HOLESTARTBLOCK)
 		return 0;
-	}
-	/*
-	 * Zero the part of the last block beyond the EOF, and write it
-	 * out sync.  We need to drop the ilock while we do this so we
-	 * don't deadlock when the buffer cache calls back to us.
-	 */
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	zero_len = mp->m_sb.sb_blocksize - zero_offset;
 	if (isize + zero_len > offset)
 		zero_len = offset - isize;
-	error = xfs_iozero(ip, isize, zero_len);
+	return xfs_iozero(ip, isize, zero_len);
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	ASSERT(error >= 0);
-	return error;
 }
 /*
- * Zero any on disk space between the current EOF and the new,
+ * Zero any on disk space between the current EOF and the new, larger EOF.
- * larger EOF.  This handles the normal case of zeroing the remainder
+ *
- * of the last block in the file and the unusual case of zeroing blocks
+ * This handles the normal case of zeroing the remainder of the last block in
- * out beyond the size of the file.  This second case only happens
+ * the file and the unusual case of zeroing blocks out beyond the size of the
- * with fixed size extents and when the system crashes before the inode
+ * file.  This second case only happens with fixed size extents and when the
- * size was updated but after blocks were allocated.  If fill is set,
+ * system crashes before the inode size was updated but after blocks were
- * then any holes in the range are filled and zeroed.  If not, the holes
+ * allocated.
- * are left alone as holes.
+ *
+ * Expects the iolock to be held exclusive, and will take the ilock internally.
 */
 int					/* error (positive) */
 xfs_zero_eof(
-	xfs_inode_t	*ip,
+	struct xfs_inode	*ip,
-	xfs_off_t	offset,		/* starting I/O offset */
+	xfs_off_t		offset,		/* starting I/O offset */
-	xfs_fsize_t	isize)		/* current inode size */
+	xfs_fsize_t		isize)		/* current inode size */
 {
-	xfs_mount_t	*mp = ip->i_mount;
+	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t	start_zero_fsb;
+	xfs_fileoff_t		start_zero_fsb;
-	xfs_fileoff_t	end_zero_fsb;
+	xfs_fileoff_t		end_zero_fsb;
-	xfs_fileoff_t	zero_count_fsb;
+	xfs_fileoff_t		zero_count_fsb;
-	xfs_fileoff_t	last_fsb;
+	xfs_fileoff_t		last_fsb;
-	xfs_fileoff_t	zero_off;
+	xfs_fileoff_t		zero_off;
-	xfs_fsize_t	zero_len;
+	xfs_fsize_t		zero_len;
-	int		nimaps;
+	int			nimaps;
-	int		error = 0;
+	int			error = 0;
-	xfs_bmbt_irec_t	imap;
+	struct xfs_bmbt_irec	imap;
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 	ASSERT(offset > isize);
 	/*
 	 * First handle zeroing the block on which isize resides.
+	 *
 	 * We only zero a part of that block so it is handled specially.
 	 */
-	error = xfs_zero_last_block(ip, offset, isize);
+	if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-	if (error) {
+		error = xfs_zero_last_block(ip, offset, isize);
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+		if (error)
-		return error;
+			return error;
 	}
 	/*
-	 * Calculate the range between the new size and the old
+	 * Calculate the range between the new size and the old where blocks
-	 * where blocks needing to be zeroed may exist.  To get the
+	 * needing to be zeroed may exist.
-	 * block where the last byte in the file currently resides,
+	 *
-	 * we need to subtract one from the size and truncate back
+	 * To get the block where the last byte in the file currently resides,
-	 * to a block boundary.  We subtract 1 in case the size is
+	 * we need to subtract one from the size and truncate back to a block
-	 * exactly on a block boundary.
+	 * boundary.  We subtract 1 in case the size is exactly on a block
+	 * boundary.
 	 */
 	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
 	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
@@ -521,23 +503,18 @@ xfs_zero_eof(
 	while (start_zero_fsb <= end_zero_fsb) {
 		nimaps = 1;
 		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
 					  &imap, &nimaps, 0);
-		if (error) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+		if (error)
 			return error;
-		}
 		ASSERT(nimaps > 0);
 		if (imap.br_state == XFS_EXT_UNWRITTEN ||
 		    imap.br_startblock == HOLESTARTBLOCK) {
-			/*
-			 * This loop handles initializing pages that were
-			 * partially initialized by the code below this
-			 * loop. It basically zeroes the part of the page
-			 * that sits on a hole and sets the page as P_HOLE
-			 * and calls remapf if it is a mapped file.
-			 */
 			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 			continue;
@@ -545,11 +522,7 @@ xfs_zero_eof(
 		/*
 		 * There are blocks we need to zero.
-		 * Drop the inode lock while we're doing the I/O.
-		 * We'll still have the iolock to protect us.
 		 */
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
 		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -557,22 +530,14 @@ xfs_zero_eof(
 			zero_len = offset - zero_off;
 		error = xfs_iozero(ip, zero_off, zero_len);
-		if (error) {
+		if (error)
-			goto out_lock;
+			return error;
-		}
 		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
 	}
 	return 0;
-out_lock:
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	ASSERT(error >= 0);
-	return error;
 }
 /*
@@ -612,9 +577,7 @@ xfs_file_aio_write_checks(
 			xfs_rw_ilock(ip, *iolock);
 			goto restart;
 		}
-		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
 		error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
-		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
 			return error;
 	}

--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -764,9 +764,7 @@ xfs_setattr_size(
 		 * before the inode is joined to the transaction to modify
 		 * i_size.
 		 */
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		error = xfs_zero_eof(ip, newsize, oldsize);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
 			goto out_unlock;
 	}