Commit 9650b453 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe

block: ignore RWF_HIPRI hint for sync dio

So far bio is marked as REQ_POLLED if RWF_HIPRI/IOCB_HIPRI is passed
from userspace sync io interface, then block layer tries to poll until
the bio is completed. But the current implementation calls
blk_io_schedule() if bio_poll() returns 0, and this way causes io hang or
timeout easily.

But looks no one reports this kind of issue, which should have been
triggered in normal io poll sanity test or blktests block/007 as
observed by Changhui, that means it is very likely that no one uses it
or no one cares it.

Also after io_uring is invented, io poll for sync dio becomes legacy
interface.

So ignore RWF_HIPRI hint for sync dio.

CC: linux-mm@kvack.org
Cc: linux-xfs@vger.kernel.org
Reported-by: default avatarChanghui Zhong <czhong@redhat.com>
Suggested-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Tested-by: default avatarChanghui Zhong <czhong@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220420143110.2679002-1-ming.lei@redhat.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e233fe1a
...@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) ...@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
#define DIO_INLINE_BIO_VECS 4 #define DIO_INLINE_BIO_VECS 4
static void blkdev_bio_end_io_simple(struct bio *bio)
{
struct task_struct *waiter = bio->bi_private;
WRITE_ONCE(bio->bi_private, NULL);
blk_wake_io_task(waiter);
}
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages) struct iov_iter *iter, unsigned int nr_pages)
{ {
...@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, ...@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
} }
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
bio.bi_ioprio = iocb->ki_ioprio; bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter); ret = bio_iov_iter_get_pages(&bio, iter);
...@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, ...@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT) if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT; bio.bi_opf |= REQ_NOWAIT;
if (iocb->ki_flags & IOCB_HIPRI)
bio_set_polled(&bio, iocb);
submit_bio(&bio); submit_bio_wait(&bio);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(bio.bi_private))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
bio_release_pages(&bio, should_dirty); bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status)) if (unlikely(bio.bi_status))
......
...@@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, ...@@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
{ {
atomic_inc(&dio->ref); atomic_inc(&dio->ref);
if (dio->iocb->ki_flags & IOCB_HIPRI) { /* Sync dio can't be polled reliably */
if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
bio_set_polled(bio, dio->iocb); bio_set_polled(bio, dio->iocb);
dio->submit.poll_bio = bio; dio->submit.poll_bio = bio;
} }
...@@ -653,8 +654,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -653,8 +654,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!READ_ONCE(dio->submit.waiter)) if (!READ_ONCE(dio->submit.waiter))
break; break;
if (!dio->submit.poll_bio ||
!bio_poll(dio->submit.poll_bio, NULL, 0))
blk_io_schedule(); blk_io_schedule();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
......
...@@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) ...@@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous)
* attempt to access it in the page fault retry time check. * attempt to access it in the page fault retry time check.
*/ */
if (synchronous) { if (synchronous) {
bio->bi_opf |= REQ_POLLED;
get_task_struct(current); get_task_struct(current);
bio->bi_private = current; bio->bi_private = current;
} }
...@@ -372,7 +371,6 @@ int swap_readpage(struct page *page, bool synchronous) ...@@ -372,7 +371,6 @@ int swap_readpage(struct page *page, bool synchronous)
if (!READ_ONCE(bio->bi_private)) if (!READ_ONCE(bio->bi_private))
break; break;
if (!bio_poll(bio, NULL, 0))
blk_io_schedule(); blk_io_schedule();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment