Commit ee70daab authored by Eryu Guan's avatar Eryu Guan Committed by Darrick J. Wong

xfs: update i_size after unwritten conversion in dio completion

Since commit d531d91d ("xfs: always use unwritten extents for
direct I/O writes"), we start allocating unwritten extents for all
direct writes to allow appending aio in XFS.

But for dio writes that could extend file size we update the in-core
inode size first, then convert the unwritten extents to real
allocations at dio completion time in xfs_dio_write_end_io(). Thus a
racing direct read could see the new i_size and find the unwritten
extents first and read zeros instead of actual data, if the direct
writer also takes a shared iolock.

Fix it by updating the in-core inode size after the unwritten extent
conversion. To do this, introduce a new boolean argument to
xfs_iomap_write_unwritten() to tell if we want to update in-core
i_size or not.
Suggested-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Signed-off-by: default avatarEryu Guan <eguan@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 546e7be8
...@@ -343,7 +343,8 @@ xfs_end_io( ...@@ -343,7 +343,8 @@ xfs_end_io(
error = xfs_reflink_end_cow(ip, offset, size); error = xfs_reflink_end_cow(ip, offset, size);
break; break;
case XFS_IO_UNWRITTEN: case XFS_IO_UNWRITTEN:
error = xfs_iomap_write_unwritten(ip, offset, size); /* writeback should never update isize */
error = xfs_iomap_write_unwritten(ip, offset, size, false);
break; break;
default: default:
ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
......
...@@ -434,7 +434,6 @@ xfs_dio_write_end_io( ...@@ -434,7 +434,6 @@ xfs_dio_write_end_io(
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos; loff_t offset = iocb->ki_pos;
bool update_size = false;
int error = 0; int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size); trace_xfs_end_io_direct_write(ip, offset, size);
...@@ -445,6 +444,21 @@ xfs_dio_write_end_io( ...@@ -445,6 +444,21 @@ xfs_dio_write_end_io(
if (size <= 0) if (size <= 0)
return size; return size;
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
return error;
}
/*
* Unwritten conversion updates the in-core isize after extent
* conversion but before updating the on-disk size. Updating isize any
* earlier allows a racing dio read to find unwritten extents before
* they are converted.
*/
if (flags & IOMAP_DIO_UNWRITTEN)
return xfs_iomap_write_unwritten(ip, offset, size, true);
/* /*
* We need to update the in-core inode size here so that we don't end up * We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We * with the on-disk inode size being outside the in-core inode size. We
...@@ -459,20 +473,11 @@ xfs_dio_write_end_io( ...@@ -459,20 +473,11 @@ xfs_dio_write_end_io(
spin_lock(&ip->i_flags_lock); spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode)) { if (offset + size > i_size_read(inode)) {
i_size_write(inode, offset + size); i_size_write(inode, offset + size);
update_size = true; spin_unlock(&ip->i_flags_lock);
}
spin_unlock(&ip->i_flags_lock);
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
return error;
}
if (flags & IOMAP_DIO_UNWRITTEN)
error = xfs_iomap_write_unwritten(ip, offset, size);
else if (update_size)
error = xfs_setfilesize(ip, offset, size); error = xfs_setfilesize(ip, offset, size);
} else {
spin_unlock(&ip->i_flags_lock);
}
return error; return error;
} }
......
...@@ -829,7 +829,8 @@ int ...@@ -829,7 +829,8 @@ int
xfs_iomap_write_unwritten( xfs_iomap_write_unwritten(
xfs_inode_t *ip, xfs_inode_t *ip,
xfs_off_t offset, xfs_off_t offset,
xfs_off_t count) xfs_off_t count,
bool update_isize)
{ {
xfs_mount_t *mp = ip->i_mount; xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb; xfs_fileoff_t offset_fsb;
...@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten( ...@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
xfs_trans_t *tp; xfs_trans_t *tp;
xfs_bmbt_irec_t imap; xfs_bmbt_irec_t imap;
struct xfs_defer_ops dfops; struct xfs_defer_ops dfops;
struct inode *inode = VFS_I(ip);
xfs_fsize_t i_size; xfs_fsize_t i_size;
uint resblks; uint resblks;
int error; int error;
...@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten( ...@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
if (i_size > offset + count) if (i_size > offset + count)
i_size = offset + count; i_size = offset + count;
if (update_isize && i_size > i_size_read(inode))
i_size_write(inode, i_size);
i_size = xfs_new_eof(ip, i_size); i_size = xfs_new_eof(ip, i_size);
if (i_size) { if (i_size) {
ip->i_d.di_size = i_size; ip->i_d.di_size = i_size;
......
...@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, ...@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *, int); struct xfs_bmbt_irec *, int);
int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
struct xfs_bmbt_irec *); struct xfs_bmbt_irec *);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *); struct xfs_bmbt_irec *);
......
...@@ -274,7 +274,7 @@ xfs_fs_commit_blocks( ...@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
(end - 1) >> PAGE_SHIFT); (end - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(error); WARN_ON_ONCE(error);
error = xfs_iomap_write_unwritten(ip, start, length); error = xfs_iomap_write_unwritten(ip, start, length, false);
if (error) if (error)
goto out_drop_iolock; goto out_drop_iolock;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment