Commit 5d888b48 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: fix reflink source file racing with directio writes

While trawling through the dedupe file comparison code trying to fix
page deadlocking problems, Dave Chinner noticed that the reflink code
only takes shared IOLOCK/MMAPLOCKs on the source file.  Because
page_mkwrite and directio writes do not take the EXCL versions of those
locks, this means that reflink can race with writer processes.

For pure remapping this can lead to undefined behavior and file
corruption; for dedupe this means that we cannot be sure that the
contents are identical when we decide to go ahead with the remapping.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent edc58dd0
...@@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks( ...@@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks(
} }
/* /*
* Grab the exclusive iolock for a data copy from src to dest, making * Grab the exclusive iolock for a data copy from src to dest, making sure to
* sure to abide vfs locking order (lowest pointer value goes first) and * abide vfs locking order (lowest pointer value goes first) and breaking the
* breaking the pnfs layout leases on dest before proceeding. The loop * layout leases before proceeding. The loop is needed because we cannot call
* is needed because we cannot call the blocking break_layout() with the * the blocking break_layout() with the iolocks held, and therefore have to
* src iolock held, and therefore have to back out both locks. * back out both locks.
*/ */
static int static int
xfs_iolock_two_inodes_and_break_layout( xfs_iolock_two_inodes_and_break_layout(
...@@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout( ...@@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout(
{ {
int error; int error;
retry: if (src > dest)
if (src < dest) { swap(src, dest);
inode_lock_shared(src);
inode_lock_nested(dest, I_MUTEX_NONDIR2);
} else {
/* src >= dest */
inode_lock(dest);
}
error = break_layout(dest, false); retry:
if (error == -EWOULDBLOCK) { /* Wait to break both inodes' layouts before we start locking. */
inode_unlock(dest); error = break_layout(src, true);
if (src < dest) if (error)
inode_unlock_shared(src); return error;
if (src != dest) {
error = break_layout(dest, true); error = break_layout(dest, true);
if (error) if (error)
return error; return error;
goto retry;
} }
/* Lock one inode and make sure nobody got in and leased it. */
inode_lock(src);
error = break_layout(src, false);
if (error) { if (error) {
inode_unlock(src);
if (error == -EWOULDBLOCK)
goto retry;
return error;
}
if (src == dest)
return 0;
/* Lock the other inode and make sure nobody got in and leased it. */
inode_lock_nested(dest, I_MUTEX_NONDIR2);
error = break_layout(dest, false);
if (error) {
inode_unlock(src);
inode_unlock(dest); inode_unlock(dest);
if (src < dest) if (error == -EWOULDBLOCK)
inode_unlock_shared(src); goto retry;
return error; return error;
} }
if (src > dest)
inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
return 0; return 0;
} }
...@@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock( ...@@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock(
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode) if (!same_inode)
xfs_iunlock(src, XFS_MMAPLOCK_SHARED); xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
inode_unlock(inode_out); inode_unlock(inode_out);
if (!same_inode) if (!same_inode)
inode_unlock_shared(inode_in); inode_unlock(inode_in);
} }
/* /*
...@@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep( ...@@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep(
if (same_inode) if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL); xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else else
xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest,
XFS_MMAPLOCK_EXCL); XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */ /* Check file eligibility and prepare for block sharing. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment