Commit ed1128c2 authored by Dave Chinner's avatar Dave Chinner Committed by Darrick J. Wong

xfs: reduce exclusive locking on unaligned dio

Attempt shared locking for unaligned DIO, but only if the the
underlying extent is already allocated and in written state. On
failure, retry with the existing exclusive locking.

Test case is fio randrw of 512 byte IOs using AIO and an iodepth of
32 IOs.

Vanilla:

  READ: bw=4560KiB/s (4670kB/s), 4560KiB/s-4560KiB/s (4670kB/s-4670kB/s), io=134MiB (140MB), run=30001-30001msec
  WRITE: bw=4567KiB/s (4676kB/s), 4567KiB/s-4567KiB/s (4676kB/s-4676kB/s), io=134MiB (140MB), run=30001-30001msec

Patched:
   READ: bw=37.6MiB/s (39.4MB/s), 37.6MiB/s-37.6MiB/s (39.4MB/s-39.4MB/s), io=1127MiB (1182MB), run=30002-30002msec
  WRITE: bw=37.6MiB/s (39.4MB/s), 37.6MiB/s-37.6MiB/s (39.4MB/s-39.4MB/s), io=1128MiB (1183MB), run=30002-30002msec

That's an improvement from ~18k IOPS to a ~150k IOPS, which is
about the IOPS limit of the VM block device setup I'm testing on.

4kB block IO comparison:

   READ: bw=296MiB/s (310MB/s), 296MiB/s-296MiB/s (310MB/s-310MB/s), io=8868MiB (9299MB), run=30002-30002msec
  WRITE: bw=296MiB/s (310MB/s), 296MiB/s-296MiB/s (310MB/s-310MB/s), io=8878MiB (9309MB), run=30002-30002msec

Which is ~150k IOPS, same as what the test gets for sub-block
AIO+DIO writes with this patch.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
[hch: rebased, split unaligned from nowait]
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
parent caa89dbc
...@@ -544,10 +544,13 @@ xfs_file_dio_write_aligned( ...@@ -544,10 +544,13 @@ xfs_file_dio_write_aligned(
* to do sub-block zeroing and that requires serialisation against other direct * to do sub-block zeroing and that requires serialisation against other direct
* I/O to the same block. In this case we need to serialise the submission of * I/O to the same block. In this case we need to serialise the submission of
* the unaligned I/O so that we don't get racing block zeroing in the dio layer. * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
* In the case where sub-block zeroing is not required, we can do concurrent
* sub-block dios to the same block successfully.
* *
* This means that unaligned dio writes always block. There is no "nowait" fast * Optimistically submit the I/O using the shared lock first, but use the
* path in this code - if IOCB_NOWAIT is set we simply return -EAGAIN up front * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
* and we don't have to worry about that anymore. * if block allocation or partial block zeroing would be required. In that case
* we try again with the exclusive lock.
*/ */
static noinline ssize_t static noinline ssize_t
xfs_file_dio_write_unaligned( xfs_file_dio_write_unaligned(
...@@ -555,13 +558,28 @@ xfs_file_dio_write_unaligned( ...@@ -555,13 +558,28 @@ xfs_file_dio_write_unaligned(
struct kiocb *iocb, struct kiocb *iocb,
struct iov_iter *from) struct iov_iter *from)
{ {
int iolock = XFS_IOLOCK_EXCL; size_t isize = i_size_read(VFS_I(ip));
size_t count = iov_iter_count(from);
int iolock = XFS_IOLOCK_SHARED;
unsigned int flags = IOMAP_DIO_OVERWRITE_ONLY;
ssize_t ret; ssize_t ret;
/* unaligned dio always waits, bail */ /*
* Extending writes need exclusivity because of the sub-block zeroing
* that the DIO code always does for partial tail blocks beyond EOF, so
* don't even bother trying the fast path in this case.
*/
if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
retry_exclusive:
if (iocb->ki_flags & IOCB_NOWAIT) if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN; return -EAGAIN;
xfs_ilock(ip, iolock); iolock = XFS_IOLOCK_EXCL;
flags = IOMAP_DIO_FORCE_WAIT;
}
ret = xfs_ilock_iocb(iocb, iolock);
if (ret)
return ret;
/* /*
* We can't properly handle unaligned direct I/O to reflink files yet, * We can't properly handle unaligned direct I/O to reflink files yet,
...@@ -578,15 +596,29 @@ xfs_file_dio_write_unaligned( ...@@ -578,15 +596,29 @@ xfs_file_dio_write_unaligned(
goto out_unlock; goto out_unlock;
/* /*
* If we are doing unaligned I/O, this must be the only I/O in-flight. * If we are doing exclusive unaligned I/O, this must be the only I/O
* Otherwise we risk data corruption due to unwritten extent conversions * in-flight. Otherwise we risk data corruption due to unwritten extent
* from the AIO end_io handler. Wait for all other I/O to drain first. * conversions from the AIO end_io handler. Wait for all other I/O to
* drain first.
*/ */
if (flags & IOMAP_DIO_FORCE_WAIT)
inode_dio_wait(VFS_I(ip)); inode_dio_wait(VFS_I(ip));
trace_xfs_file_direct_write(iocb, from); trace_xfs_file_direct_write(iocb, from);
ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
&xfs_dio_write_ops, IOMAP_DIO_FORCE_WAIT); &xfs_dio_write_ops, flags);
/*
* Retry unaligned I/O with exclusive blocking semantics if the DIO
* layer rejected it for mapping or locking reasons. If we are doing
* nonblocking user I/O, propagate the error.
*/
if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
xfs_iunlock(ip, iolock);
goto retry_exclusive;
}
out_unlock: out_unlock:
if (iolock) if (iolock)
xfs_iunlock(ip, iolock); xfs_iunlock(ip, iolock);
......
...@@ -784,14 +784,27 @@ xfs_direct_write_iomap_begin( ...@@ -784,14 +784,27 @@ xfs_direct_write_iomap_begin(
goto allocate_blocks; goto allocate_blocks;
/* /*
* NOWAIT IO needs to span the entire requested IO with a single map so * NOWAIT and OVERWRITE I/O needs to span the entire requested I/O with
* that we avoid partial IO failures due to the rest of the IO range not * a single map so that we avoid partial IO failures due to the rest of
* covered by this map triggering an EAGAIN condition when it is * the I/O range not covered by this map triggering an EAGAIN condition
* subsequently mapped and aborting the IO. * when it is subsequently mapped and aborting the I/O.
*/ */
if ((flags & IOMAP_NOWAIT) && if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) {
!imap_spans_range(&imap, offset_fsb, end_fsb)) {
error = -EAGAIN; error = -EAGAIN;
if (!imap_spans_range(&imap, offset_fsb, end_fsb))
goto out_unlock;
}
/*
* For overwrite only I/O, we cannot convert unwritten extents without
* requiring sub-block zeroing. This can only be done under an
* exclusive IOLOCK, hence return -EAGAIN if this is not a written
* extent to tell the caller to try again.
*/
if (flags & IOMAP_OVERWRITE_ONLY) {
error = -EAGAIN;
if (imap.br_state != XFS_EXT_NORM &&
((offset | length) & mp->m_blockmask))
goto out_unlock; goto out_unlock;
} }
...@@ -801,7 +814,7 @@ xfs_direct_write_iomap_begin( ...@@ -801,7 +814,7 @@ xfs_direct_write_iomap_begin(
allocate_blocks: allocate_blocks:
error = -EAGAIN; error = -EAGAIN;
if (flags & IOMAP_NOWAIT) if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY))
goto out_unlock; goto out_unlock;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment