Commit 193aec10 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: push the ilock into xfs_zero_eof

Instead of calling xfs_zero_eof with the ilock held only take it internally
for the minimall required critical section around xfs_bmapi_read.  This
also requires changing the calling convention for xfs_zero_last_block
slightly.  The actual zeroing operation is still serialized by the iolock,
which must be taken exclusively over the call to xfs_zero_eof.

We could in fact use a shared lock for the xfs_bmapi_read calls as long as
the extent list has been read in, but given that we already hold the iolock
exclusively there is little reason to micro optimize this further.
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent f38996f5
...@@ -396,114 +396,96 @@ xfs_file_splice_write( ...@@ -396,114 +396,96 @@ xfs_file_splice_write(
} }
/* /*
* This routine is called to handle zeroing any space in the last * This routine is called to handle zeroing any space in the last block of the
* block of the file that is beyond the EOF. We do this since the * file that is beyond the EOF. We do this since the size is being increased
* size is being increased without writing anything to that block * without writing anything to that block and we don't want to read the
* and we don't want anyone to read the garbage on the disk. * garbage on the disk.
*/ */
STATIC int /* error (positive) */ STATIC int /* error (positive) */
xfs_zero_last_block( xfs_zero_last_block(
xfs_inode_t *ip, struct xfs_inode *ip,
xfs_fsize_t offset, xfs_fsize_t offset,
xfs_fsize_t isize) xfs_fsize_t isize)
{ {
xfs_fileoff_t last_fsb; struct xfs_mount *mp = ip->i_mount;
xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
int nimaps; int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
int zero_offset; int zero_len;
int zero_len; int nimaps = 1;
int error = 0; int error = 0;
xfs_bmbt_irec_t imap; struct xfs_bmbt_irec imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
if (zero_offset == 0) {
/*
* There are no extra bytes in the last block on disk to
* zero, so return.
*/
return 0;
}
last_fsb = XFS_B_TO_FSBT(mp, isize); xfs_ilock(ip, XFS_ILOCK_EXCL);
nimaps = 1;
error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
return error; return error;
ASSERT(nimaps > 0); ASSERT(nimaps > 0);
/* /*
* If the block underlying isize is just a hole, then there * If the block underlying isize is just a hole, then there
* is nothing to zero. * is nothing to zero.
*/ */
if (imap.br_startblock == HOLESTARTBLOCK) { if (imap.br_startblock == HOLESTARTBLOCK)
return 0; return 0;
}
/*
* Zero the part of the last block beyond the EOF, and write it
* out sync. We need to drop the ilock while we do this so we
* don't deadlock when the buffer cache calls back to us.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_len = mp->m_sb.sb_blocksize - zero_offset; zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset) if (isize + zero_len > offset)
zero_len = offset - isize; zero_len = offset - isize;
error = xfs_iozero(ip, isize, zero_len); return xfs_iozero(ip, isize, zero_len);
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
} }
/* /*
* Zero any on disk space between the current EOF and the new, * Zero any on disk space between the current EOF and the new, larger EOF.
* larger EOF. This handles the normal case of zeroing the remainder *
* of the last block in the file and the unusual case of zeroing blocks * This handles the normal case of zeroing the remainder of the last block in
* out beyond the size of the file. This second case only happens * the file and the unusual case of zeroing blocks out beyond the size of the
* with fixed size extents and when the system crashes before the inode * file. This second case only happens with fixed size extents and when the
* size was updated but after blocks were allocated. If fill is set, * system crashes before the inode size was updated but after blocks were
* then any holes in the range are filled and zeroed. If not, the holes * allocated.
* are left alone as holes. *
* Expects the iolock to be held exclusive, and will take the ilock internally.
*/ */
int /* error (positive) */ int /* error (positive) */
xfs_zero_eof( xfs_zero_eof(
xfs_inode_t *ip, struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */ xfs_off_t offset, /* starting I/O offset */
xfs_fsize_t isize) /* current inode size */ xfs_fsize_t isize) /* current inode size */
{ {
xfs_mount_t *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb; xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb; xfs_fileoff_t end_zero_fsb;
xfs_fileoff_t zero_count_fsb; xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb; xfs_fileoff_t last_fsb;
xfs_fileoff_t zero_off; xfs_fileoff_t zero_off;
xfs_fsize_t zero_len; xfs_fsize_t zero_len;
int nimaps; int nimaps;
int error = 0; int error = 0;
xfs_bmbt_irec_t imap; struct xfs_bmbt_irec imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize); ASSERT(offset > isize);
/* /*
* First handle zeroing the block on which isize resides. * First handle zeroing the block on which isize resides.
*
* We only zero a part of that block so it is handled specially. * We only zero a part of that block so it is handled specially.
*/ */
error = xfs_zero_last_block(ip, offset, isize); if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
if (error) { error = xfs_zero_last_block(ip, offset, isize);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); if (error)
return error; return error;
} }
/* /*
* Calculate the range between the new size and the old * Calculate the range between the new size and the old where blocks
* where blocks needing to be zeroed may exist. To get the * needing to be zeroed may exist.
* block where the last byte in the file currently resides, *
* we need to subtract one from the size and truncate back * To get the block where the last byte in the file currently resides,
* to a block boundary. We subtract 1 in case the size is * we need to subtract one from the size and truncate back to a block
* exactly on a block boundary. * boundary. We subtract 1 in case the size is exactly on a block
* boundary.
*/ */
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
...@@ -521,23 +503,18 @@ xfs_zero_eof( ...@@ -521,23 +503,18 @@ xfs_zero_eof(
while (start_zero_fsb <= end_zero_fsb) { while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1; nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
&imap, &nimaps, 0); &imap, &nimaps, 0);
if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); if (error)
return error; return error;
}
ASSERT(nimaps > 0); ASSERT(nimaps > 0);
if (imap.br_state == XFS_EXT_UNWRITTEN || if (imap.br_state == XFS_EXT_UNWRITTEN ||
imap.br_startblock == HOLESTARTBLOCK) { imap.br_startblock == HOLESTARTBLOCK) {
/*
* This loop handles initializing pages that were
* partially initialized by the code below this
* loop. It basically zeroes the part of the page
* that sits on a hole and sets the page as P_HOLE
* and calls remapf if it is a mapped file.
*/
start_zero_fsb = imap.br_startoff + imap.br_blockcount; start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
continue; continue;
...@@ -545,11 +522,7 @@ xfs_zero_eof( ...@@ -545,11 +522,7 @@ xfs_zero_eof(
/* /*
* There are blocks we need to zero. * There are blocks we need to zero.
* Drop the inode lock while we're doing the I/O.
* We'll still have the iolock to protect us.
*/ */
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
...@@ -557,22 +530,14 @@ xfs_zero_eof( ...@@ -557,22 +530,14 @@ xfs_zero_eof(
zero_len = offset - zero_off; zero_len = offset - zero_off;
error = xfs_iozero(ip, zero_off, zero_len); error = xfs_iozero(ip, zero_off, zero_len);
if (error) { if (error)
goto out_lock; return error;
}
start_zero_fsb = imap.br_startoff + imap.br_blockcount; start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
xfs_ilock(ip, XFS_ILOCK_EXCL);
} }
return 0; return 0;
out_lock:
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
} }
/* /*
...@@ -612,9 +577,7 @@ xfs_file_aio_write_checks( ...@@ -612,9 +577,7 @@ xfs_file_aio_write_checks(
xfs_rw_ilock(ip, *iolock); xfs_rw_ilock(ip, *iolock);
goto restart; goto restart;
} }
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
return error; return error;
} }
......
...@@ -764,9 +764,7 @@ xfs_setattr_size( ...@@ -764,9 +764,7 @@ xfs_setattr_size(
* before the inode is joined to the transaction to modify * before the inode is joined to the transaction to modify
* i_size. * i_size.
*/ */
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_zero_eof(ip, newsize, oldsize); error = xfs_zero_eof(ip, newsize, oldsize);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
goto out_unlock; goto out_unlock;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment