Commit 895aa7b1 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'async-buffered.8' into for-5.9/io_uring

Pull in async buffered reads branch.

* async-buffered.8:
  io_uring: support true async buffered reads, if file provides it
  mm: add kiocb_wait_page_queue_init() helper
  btrfs: flag files as supporting buffered async reads
  xfs: flag files as supporting buffered async reads
  block: flag block devices as supporting IOCB_WAITQ
  fs: add FMODE_BUF_RASYNC
  mm: support async buffered reads in generic_file_buffered_read()
  mm: add support for async page locking
  mm: abstract out wake_page_match() from wake_page_function()
  mm: allow read-ahead with IOCB_NOWAIT set
  io_uring: re-issue block requests that failed because of resources
  io_uring: catch -EIO from buffered issue request failure
  io_uring: always plug for any number of IOs
  block: provide plug based way of signaling forced no-wait semantics
parents 2e0464d4 bcf5a063
...@@ -958,6 +958,7 @@ generic_make_request_checks(struct bio *bio) ...@@ -958,6 +958,7 @@ generic_make_request_checks(struct bio *bio)
struct request_queue *q; struct request_queue *q;
int nr_sectors = bio_sectors(bio); int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR; blk_status_t status = BLK_STS_IOERR;
struct blk_plug *plug;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
might_sleep(); might_sleep();
...@@ -971,6 +972,10 @@ generic_make_request_checks(struct bio *bio) ...@@ -971,6 +972,10 @@ generic_make_request_checks(struct bio *bio)
goto end_io; goto end_io;
} }
plug = blk_mq_plug(q, bio);
if (plug && plug->nowait)
bio->bi_opf |= REQ_NOWAIT;
/* /*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP * For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue. * if queue is not a request based queue.
...@@ -1800,6 +1805,7 @@ void blk_start_plug(struct blk_plug *plug) ...@@ -1800,6 +1805,7 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->cb_list); INIT_LIST_HEAD(&plug->cb_list);
plug->rq_count = 0; plug->rq_count = 0;
plug->multiple_queues = false; plug->multiple_queues = false;
plug->nowait = false;
/* /*
* Store ordering should not be needed here, since a potential * Store ordering should not be needed here, since a potential
......
...@@ -1851,7 +1851,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) ...@@ -1851,7 +1851,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
*/ */
filp->f_flags |= O_LARGEFILE; filp->f_flags |= O_LARGEFILE;
filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
if (filp->f_flags & O_NDELAY) if (filp->f_flags & O_NDELAY)
filp->f_mode |= FMODE_NDELAY; filp->f_mode |= FMODE_NDELAY;
......
...@@ -3472,7 +3472,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) ...@@ -3472,7 +3472,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
static int btrfs_file_open(struct inode *inode, struct file *filp) static int btrfs_file_open(struct inode *inode, struct file *filp)
{ {
filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return generic_file_open(inode, filp); return generic_file_open(inode, filp);
} }
......
This diff is collapsed.
...@@ -1080,7 +1080,7 @@ xfs_file_open( ...@@ -1080,7 +1080,7 @@ xfs_file_open(
return -EFBIG; return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO; return -EIO;
file->f_mode |= FMODE_NOWAIT; file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return 0; return 0;
} }
......
...@@ -1189,6 +1189,7 @@ struct blk_plug { ...@@ -1189,6 +1189,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */ struct list_head cb_list; /* md requires an unplug callback */
unsigned short rq_count; unsigned short rq_count;
bool multiple_queues; bool multiple_queues;
bool nowait;
}; };
#define BLK_MAX_REQUEST_COUNT 16 #define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024) #define BLK_PLUG_FLUSH_SIZE (128 * 1024)
......
...@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ...@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */ /* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
/* File supports async buffered reads */
#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
/* /*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are * that indicates that they should check the contents of the iovec are
...@@ -315,6 +318,8 @@ enum rw_hint { ...@@ -315,6 +318,8 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5) #define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6) #define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7) #define IOCB_NOWAIT (1 << 7)
/* iocb->ki_waitq is valid */
#define IOCB_WAITQ (1 << 8)
struct kiocb { struct kiocb {
struct file *ki_filp; struct file *ki_filp;
...@@ -328,7 +333,10 @@ struct kiocb { ...@@ -328,7 +333,10 @@ struct kiocb {
int ki_flags; int ki_flags;
u16 ki_hint; u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */ u16 ki_ioprio; /* See linux/ioprio.h */
union {
unsigned int ki_cookie; /* for ->iopoll */ unsigned int ki_cookie; /* for ->iopoll */
struct wait_page_queue *ki_waitq; /* for async buffered IO */
};
randomized_struct_fields_end randomized_struct_fields_end
}; };
......
...@@ -496,8 +496,67 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, ...@@ -496,8 +496,67 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff; return pgoff;
} }
/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
struct wait_page_key {
struct page *page;
int bit_nr;
int page_match;
};
struct wait_page_queue {
struct page *page;
int bit_nr;
wait_queue_entry_t wait;
};
static inline int wake_page_match(struct wait_page_queue *wait_page,
struct wait_page_key *key)
{
if (wait_page->page != key->page)
return 0;
key->page_match = 1;
if (wait_page->bit_nr != key->bit_nr)
return 0;
/*
* Stop walking if it's locked.
* Is this safe if put_and_wait_on_page_locked() is in use?
* Yes: the waker must hold a reference to this page, and if PG_locked
* has now already been set by another task, that task must also hold
* a reference to the *same usage* of this page; so there is no need
* to walk on to wake even the put_and_wait_on_page_locked() callers.
*/
if (test_bit(key->bit_nr, &key->page->flags))
return -1;
return 1;
}
static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
struct wait_page_queue *wait,
wait_queue_func_t func,
void *data)
{
/* Can't support async wakeup with polled IO */
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
wait->wait.func = func;
wait->wait.private = data;
wait->wait.flags = 0;
INIT_LIST_HEAD(&wait->wait.entry);
kiocb->ki_flags |= IOCB_WAITQ;
kiocb->ki_waitq = wait;
return 0;
}
return -EOPNOTSUPP;
}
extern void __lock_page(struct page *page); extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page); extern int __lock_page_killable(struct page *page);
extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags); unsigned int flags);
extern void unlock_page(struct page *page); extern void unlock_page(struct page *page);
...@@ -534,6 +593,22 @@ static inline int lock_page_killable(struct page *page) ...@@ -534,6 +593,22 @@ static inline int lock_page_killable(struct page *page)
return 0; return 0;
} }
/*
* lock_page_async - Lock the page, unless this would block. If the page
* is already locked, then queue a callback when the page becomes unlocked.
* This callback can then retry the operation.
*
* Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
* was already locked and the callback defined in 'wait' was queued.
*/
static inline int lock_page_async(struct page *page,
struct wait_page_queue *wait)
{
if (!trylock_page(page))
return __lock_page_async(page, wait);
return 0;
}
/* /*
* lock_page_or_retry - Lock the page, unless this would block and the * lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry. * caller indicated that it can handle a retry.
......
...@@ -987,43 +987,16 @@ void __init pagecache_init(void) ...@@ -987,43 +987,16 @@ void __init pagecache_init(void)
page_writeback_init(); page_writeback_init();
} }
/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
struct wait_page_key {
struct page *page;
int bit_nr;
int page_match;
};
struct wait_page_queue {
struct page *page;
int bit_nr;
wait_queue_entry_t wait;
};
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{ {
struct wait_page_key *key = arg; struct wait_page_key *key = arg;
struct wait_page_queue *wait_page struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait); = container_of(wait, struct wait_page_queue, wait);
int ret;
if (wait_page->page != key->page) ret = wake_page_match(wait_page, key);
return 0; if (ret != 1)
key->page_match = 1; return ret;
if (wait_page->bit_nr != key->bit_nr)
return 0;
/*
* Stop walking if it's locked.
* Is this safe if put_and_wait_on_page_locked() is in use?
* Yes: the waker must hold a reference to this page, and if PG_locked
* has now already been set by another task, that task must also hold
* a reference to the *same usage* of this page; so there is no need
* to walk on to wake even the put_and_wait_on_page_locked() callers.
*/
if (test_bit(key->bit_nr, &key->page->flags))
return -1;
return autoremove_wake_function(wait, mode, sync, key); return autoremove_wake_function(wait, mode, sync, key);
} }
...@@ -1207,6 +1180,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) ...@@ -1207,6 +1180,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
} }
EXPORT_SYMBOL(wait_on_page_bit_killable); EXPORT_SYMBOL(wait_on_page_bit_killable);
static int __wait_on_page_locked_async(struct page *page,
struct wait_page_queue *wait, bool set)
{
struct wait_queue_head *q = page_waitqueue(page);
int ret = 0;
wait->page = page;
wait->bit_nr = PG_locked;
spin_lock_irq(&q->lock);
__add_wait_queue_entry_tail(q, &wait->wait);
SetPageWaiters(page);
if (set)
ret = !trylock_page(page);
else
ret = PageLocked(page);
/*
* If we were succesful now, we know we're still on the
* waitqueue as we're still under the lock. This means it's
* safe to remove and return success, we know the callback
* isn't going to trigger.
*/
if (!ret)
__remove_wait_queue(q, &wait->wait);
else
ret = -EIOCBQUEUED;
spin_unlock_irq(&q->lock);
return ret;
}
static int wait_on_page_locked_async(struct page *page,
struct wait_page_queue *wait)
{
if (!PageLocked(page))
return 0;
return __wait_on_page_locked_async(compound_head(page), wait, false);
}
/** /**
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
* @page: The page to wait for. * @page: The page to wait for.
...@@ -1369,6 +1380,11 @@ int __lock_page_killable(struct page *__page) ...@@ -1369,6 +1380,11 @@ int __lock_page_killable(struct page *__page)
} }
EXPORT_SYMBOL_GPL(__lock_page_killable); EXPORT_SYMBOL_GPL(__lock_page_killable);
int __lock_page_async(struct page *page, struct wait_page_queue *wait)
{
return __wait_on_page_locked_async(page, wait, true);
}
/* /*
* Return values: * Return values:
* 1 - page is locked; mmap_lock is still held. * 1 - page is locked; mmap_lock is still held.
...@@ -2028,8 +2044,6 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2028,8 +2044,6 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
page = find_get_page(mapping, index); page = find_get_page(mapping, index);
if (!page) { if (!page) {
if (iocb->ki_flags & IOCB_NOWAIT)
goto would_block;
page_cache_sync_readahead(mapping, page_cache_sync_readahead(mapping,
ra, filp, ra, filp,
index, last_index - index); index, last_index - index);
...@@ -2043,17 +2057,25 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2043,17 +2057,25 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
index, last_index - index); index, last_index - index);
} }
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
put_page(page);
goto would_block;
}
/* /*
* See comment in do_read_cache_page on why * See comment in do_read_cache_page on why
* wait_on_page_locked is used to avoid unnecessarily * wait_on_page_locked is used to avoid unnecessarily
* serialisations and why it's safe. * serialisations and why it's safe.
*/ */
if (iocb->ki_flags & IOCB_WAITQ) {
if (written) {
put_page(page);
goto out;
}
error = wait_on_page_locked_async(page,
iocb->ki_waitq);
} else {
if (iocb->ki_flags & IOCB_NOWAIT) {
put_page(page);
goto would_block;
}
error = wait_on_page_locked_killable(page); error = wait_on_page_locked_killable(page);
}
if (unlikely(error)) if (unlikely(error))
goto readpage_error; goto readpage_error;
if (PageUptodate(page)) if (PageUptodate(page))
...@@ -2141,6 +2163,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2141,6 +2163,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
page_not_up_to_date: page_not_up_to_date:
/* Get exclusive access to the page ... */ /* Get exclusive access to the page ... */
if (iocb->ki_flags & IOCB_WAITQ)
error = lock_page_async(page, iocb->ki_waitq);
else
error = lock_page_killable(page); error = lock_page_killable(page);
if (unlikely(error)) if (unlikely(error))
goto readpage_error; goto readpage_error;
...@@ -2160,6 +2185,11 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2160,6 +2185,11 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
} }
readpage: readpage:
if (iocb->ki_flags & IOCB_NOWAIT) {
unlock_page(page);
put_page(page);
goto would_block;
}
/* /*
* A previous I/O error may have been due to temporary * A previous I/O error may have been due to temporary
* failures, eg. multipath errors. * failures, eg. multipath errors.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment