Commit ebf00be3 authored by Andreas Gruenbacher's avatar Andreas Gruenbacher Committed by Darrick J. Wong

iomap: complete partial direct I/O writes synchronously

According to xfstest generic/240, applications seem to expect direct I/O
writes to either complete as a whole or to fail; short direct I/O writes
are apparently not appreciated.  This means that when only part of an
asynchronous direct I/O write succeeds, we can either fail the entire
write, or we can wait for the partial write to complete and retry the
remaining write as buffered I/O.  The old __blockdev_direct_IO helper
has code for waiting for partial writes to complete; the new
iomap_dio_rw iomap helper does not.

The above mentioned fallback mode is needed for gfs2, which doesn't
allow block allocations under direct I/O to avoid taking cluster-wide
exclusive locks.  As a consequence, an asynchronous direct I/O write to
a file range that contains a hole will result in a short write.  In that
case, wait for the short write to complete to allow gfs2 to recover.
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 3d7b6b21
...@@ -811,6 +811,7 @@ struct iomap_dio { ...@@ -811,6 +811,7 @@ struct iomap_dio {
atomic_t ref; atomic_t ref;
unsigned flags; unsigned flags;
int error; int error;
bool wait_for_completion;
union { union {
/* used during submission and for synchronous completion: */ /* used during submission and for synchronous completion: */
...@@ -914,9 +915,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) ...@@ -914,9 +915,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) { if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) { if (dio->wait_for_completion) {
struct task_struct *waiter = dio->submit.waiter; struct task_struct *waiter = dio->submit.waiter;
WRITE_ONCE(dio->submit.waiter, NULL); WRITE_ONCE(dio->submit.waiter, NULL);
wake_up_process(waiter); wake_up_process(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) { } else if (dio->flags & IOMAP_DIO_WRITE) {
...@@ -1131,13 +1131,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1131,13 +1131,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->end_io = end_io; dio->end_io = end_io;
dio->error = 0; dio->error = 0;
dio->flags = 0; dio->flags = 0;
dio->wait_for_completion = is_sync_kiocb(iocb);
dio->submit.iter = iter; dio->submit.iter = iter;
if (is_sync_kiocb(iocb)) { dio->submit.waiter = current;
dio->submit.waiter = current; dio->submit.cookie = BLK_QC_T_NONE;
dio->submit.cookie = BLK_QC_T_NONE; dio->submit.last_queue = NULL;
dio->submit.last_queue = NULL;
}
if (iov_iter_rw(iter) == READ) { if (iov_iter_rw(iter) == READ) {
if (pos >= dio->i_size) if (pos >= dio->i_size)
...@@ -1187,7 +1186,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1187,7 +1186,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio_warn_stale_pagecache(iocb->ki_filp); dio_warn_stale_pagecache(iocb->ki_filp);
ret = 0; ret = 0;
if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
!inode->i_sb->s_dio_done_wq) { !inode->i_sb->s_dio_done_wq) {
ret = sb_init_dio_done_wq(inode->i_sb); ret = sb_init_dio_done_wq(inode->i_sb);
if (ret < 0) if (ret < 0)
...@@ -1202,8 +1201,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1202,8 +1201,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomap_dio_actor); iomap_dio_actor);
if (ret <= 0) { if (ret <= 0) {
/* magic error code to fall back to buffered I/O */ /* magic error code to fall back to buffered I/O */
if (ret == -ENOTBLK) if (ret == -ENOTBLK) {
dio->wait_for_completion = true;
ret = 0; ret = 0;
}
break; break;
} }
pos += ret; pos += ret;
...@@ -1224,7 +1225,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1224,7 +1225,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->flags &= ~IOMAP_DIO_NEED_SYNC; dio->flags &= ~IOMAP_DIO_NEED_SYNC;
if (!atomic_dec_and_test(&dio->ref)) { if (!atomic_dec_and_test(&dio->ref)) {
if (!is_sync_kiocb(iocb)) if (!dio->wait_for_completion)
return -EIOCBQUEUED; return -EIOCBQUEUED;
for (;;) { for (;;) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment