Commit acf7aa2c authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Use a sync iocb for generic_file_read

This adds support for synchronous iocbs and converts generic_file_read
to use a sync iocb to call into generic_file_aio_read.

The tests I've run with lmbench on a piii-866 showed no difference in
file re-read speed when forced to use a completion path via aio_complete
and an -EIOCBQUEUED return from generic_file_aio_read -- people with
slower machines might want to test this to see if we can tune it any
better.  Also, a bug fix to correct a missing call into the aio code
from the fork code is present.  This patch sets things up for making
generic_file_aio_read actually asynchronous.
parent a83638a4
...@@ -30,10 +30,11 @@ ...@@ -30,10 +30,11 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/brlock.h> #include <linux/brlock.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/tqueue.h>
#include <linux/highmem.h>
#include <asm/kmap_types.h> #include <asm/kmap_types.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/highmem.h>
#if DEBUG > 1 #if DEBUG > 1
#define dprintk printk #define dprintk printk
...@@ -304,10 +305,25 @@ void wait_for_all_aios(struct kioctx *ctx) ...@@ -304,10 +305,25 @@ void wait_for_all_aios(struct kioctx *ctx)
schedule(); schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_task_state(tsk, TASK_UNINTERRUPTIBLE);
} }
set_task_state(tsk, TASK_RUNNING); __set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(&ctx->wait, &wait); remove_wait_queue(&ctx->wait, &wait);
} }
/* wait_on_sync_kiocb:
* Waits on the given sync kiocb to complete.
*/
ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
{
while (iocb->ki_users) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!iocb->ki_users)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
return iocb->ki_user_data;
}
/* exit_aio: called when the last user of mm goes away. At this point, /* exit_aio: called when the last user of mm goes away. At this point,
* there is no way for any new requests to be submited or any of the * there is no way for any new requests to be submited or any of the
* io_* syscalls to be called on the context. However, there may be * io_* syscalls to be called on the context. However, there may be
...@@ -516,13 +532,36 @@ static inline struct kioctx *lookup_ioctx(unsigned long ctx_id) ...@@ -516,13 +532,36 @@ static inline struct kioctx *lookup_ioctx(unsigned long ctx_id)
int aio_complete(struct kiocb *iocb, long res, long res2) int aio_complete(struct kiocb *iocb, long res, long res2)
{ {
struct kioctx *ctx = iocb->ki_ctx; struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring_info *info = &ctx->ring_info; struct aio_ring_info *info;
struct aio_ring *ring; struct aio_ring *ring;
struct io_event *event; struct io_event *event;
unsigned long flags; unsigned long flags;
unsigned long tail; unsigned long tail;
int ret; int ret;
/* Special case handling for sync iocbs: events go directly
* into the iocb for fast handling. Note that this will not
* work if we allow sync kiocbs to be cancelled. in which
* case the usage count checks will have to move under ctx_lock
* for all cases.
*/
if (ctx == &ctx->mm->default_kioctx) {
int ret;
iocb->ki_user_data = res;
if (iocb->ki_users == 1) {
iocb->ki_users = 0;
return 1;
}
spin_lock_irq(&ctx->ctx_lock);
iocb->ki_users--;
ret = (0 == iocb->ki_users);
spin_unlock_irq(&ctx->ctx_lock);
return 0;
}
info = &ctx->ring_info;
/* add a completion event to the ring buffer. /* add a completion event to the ring buffer.
* must be done holding ctx->ctx_lock to prevent * must be done holding ctx->ctx_lock to prevent
* other code from messing with the tail * other code from messing with the tail
......
#ifndef __LINUX__AIO_H #ifndef __LINUX__AIO_H
#define __LINUX__AIO_H #define __LINUX__AIO_H
#include <linux/tqueue.h>
#include <linux/list.h> #include <linux/list.h>
#include <asm/atomic.h> #include <asm/atomic.h>
...@@ -21,10 +20,14 @@ struct kioctx; ...@@ -21,10 +20,14 @@ struct kioctx;
#define KIOCB_C_CANCELLED 0x01 #define KIOCB_C_CANCELLED 0x01
#define KIOCB_C_COMPLETE 0x02 #define KIOCB_C_COMPLETE 0x02
#define KIOCB_SYNC_KEY (~0U)
#define KIOCB_PRIVATE_SIZE (16 * sizeof(long)) #define KIOCB_PRIVATE_SIZE (16 * sizeof(long))
struct kiocb { struct kiocb {
int ki_users; int ki_users;
unsigned ki_key; /* id of this request */
struct file *ki_filp; struct file *ki_filp;
struct kioctx *ki_ctx; /* may be NULL for sync ops */ struct kioctx *ki_ctx; /* may be NULL for sync ops */
int (*ki_cancel)(struct kiocb *, struct io_event *); int (*ki_cancel)(struct kiocb *, struct io_event *);
...@@ -34,17 +37,19 @@ struct kiocb { ...@@ -34,17 +37,19 @@ struct kiocb {
void *ki_data; /* for use by the the file */ void *ki_data; /* for use by the the file */
void *ki_user_obj; /* pointer to userland's iocb */ void *ki_user_obj; /* pointer to userland's iocb */
__u64 ki_user_data; /* user's data for completion */ __u64 ki_user_data; /* user's data for completion */
unsigned ki_key; /* id of this request */
long private[KIOCB_PRIVATE_SIZE/sizeof(long)]; long private[KIOCB_PRIVATE_SIZE/sizeof(long)];
}; };
#define init_sync_kiocb(x, filp) \ #define init_sync_kiocb(x, filp) \
do { \ do { \
(x)->ki_users = 1; \ struct task_struct *tsk = current; \
(x)->ki_filp = (filp); \ (x)->ki_users = 1; \
(x)->ki_ctx = 0; \ (x)->ki_key = KIOCB_SYNC_KEY; \
(x)->ki_cancel = NULL; \ (x)->ki_filp = (filp); \
(x)->ki_ctx = &tsk->active_mm->default_kioctx; \
(x)->ki_cancel = NULL; \
(x)->ki_user_obj = tsk; \
} while (0) } while (0)
#define AIO_RING_MAGIC 0xa10a10a1 #define AIO_RING_MAGIC 0xa10a10a1
...@@ -105,6 +110,7 @@ struct kioctx { ...@@ -105,6 +110,7 @@ struct kioctx {
/* prototypes */ /* prototypes */
extern unsigned aio_max_size; extern unsigned aio_max_size;
extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
extern int FASTCALL(aio_put_req(struct kiocb *iocb)); extern int FASTCALL(aio_put_req(struct kiocb *iocb));
extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2)); extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
extern void FASTCALL(__put_ioctx(struct kioctx *ctx)); extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
......
...@@ -18,15 +18,29 @@ ...@@ -18,15 +18,29 @@
.fd_array = { NULL, } \ .fd_array = { NULL, } \
} }
#define INIT_KIOCTX(name, which_mm) \
{ \
.users = ATOMIC_INIT(1), \
.dead = 0, \
.mm = &which_mm, \
.user_id = 0, \
.next = NULL, \
.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
.ctx_lock = SPIN_LOCK_UNLOCKED, \
.reqs_active = 0U, \
.max_reqs = ~0U, \
}
#define INIT_MM(name) \ #define INIT_MM(name) \
{ \ { \
.mm_rb = RB_ROOT, \ .mm_rb = RB_ROOT, \
.pgd = swapper_pg_dir, \ .pgd = swapper_pg_dir, \
.mm_users = ATOMIC_INIT(2), \ .mm_users = ATOMIC_INIT(2), \
.mm_count = ATOMIC_INIT(1), \ .mm_count = ATOMIC_INIT(1), \
.mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
.page_table_lock = SPIN_LOCK_UNLOCKED, \ .page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \ .mmlist = LIST_HEAD_INIT(name.mmlist), \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
} }
#define INIT_SIGNALS(sig) { \ #define INIT_SIGNALS(sig) { \
......
...@@ -169,7 +169,8 @@ struct namespace; ...@@ -169,7 +169,8 @@ struct namespace;
/* Maximum number of active map areas.. This is a random (large) number */ /* Maximum number of active map areas.. This is a random (large) number */
#define MAX_MAP_COUNT (65536) #define MAX_MAP_COUNT (65536)
struct kioctx; #include <linux/aio.h>
struct mm_struct { struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */ struct vm_area_struct * mmap; /* list of VMAs */
rb_root_t mm_rb; rb_root_t mm_rb;
...@@ -202,6 +203,8 @@ struct mm_struct { ...@@ -202,6 +203,8 @@ struct mm_struct {
/* aio bits */ /* aio bits */
rwlock_t ioctx_list_lock; rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list; struct kioctx *ioctx_list;
struct kioctx default_kioctx;
}; };
extern int mmlist_nr; extern int mmlist_nr;
......
...@@ -296,12 +296,16 @@ int mmlist_nr; ...@@ -296,12 +296,16 @@ int mmlist_nr;
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
#include <linux/init_task.h>
static struct mm_struct * mm_init(struct mm_struct * mm) static struct mm_struct * mm_init(struct mm_struct * mm)
{ {
atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1); atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem); init_rwsem(&mm->mmap_sem);
mm->page_table_lock = SPIN_LOCK_UNLOCKED; mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
mm->pgd = pgd_alloc(mm); mm->pgd = pgd_alloc(mm);
if (mm->pgd) if (mm->pgd)
return mm; return mm;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/aio.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mman.h> #include <linux/mman.h>
...@@ -1123,9 +1124,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, ...@@ -1123,9 +1124,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
* that can use the page cache directly. * that can use the page cache directly.
*/ */
static ssize_t static ssize_t
__generic_file_read(struct file *filp, const struct iovec *iov, __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos) unsigned long nr_segs, loff_t *ppos)
{ {
struct file *filp = iocb->ki_filp;
ssize_t retval; ssize_t retval;
unsigned long seg; unsigned long seg;
size_t count = iov_length(iov, nr_segs); size_t count = iov_length(iov, nr_segs);
...@@ -1188,12 +1190,26 @@ __generic_file_read(struct file *filp, const struct iovec *iov, ...@@ -1188,12 +1190,26 @@ __generic_file_read(struct file *filp, const struct iovec *iov,
return retval; return retval;
} }
ssize_t
generic_file_aio_read(struct kiocb *iocb, char *buf, size_t count, loff_t *ppos)
{
struct iovec local_iov = { .iov_base = buf, .iov_len = count };
return __generic_file_aio_read(iocb, &local_iov, 1, ppos);
}
ssize_t ssize_t
generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{ {
struct iovec local_iov = { .iov_base = buf, .iov_len = count }; struct iovec local_iov = { .iov_base = buf, .iov_len = count };
struct kiocb kiocb;
ssize_t ret;
return __generic_file_read(filp, &local_iov, 1, ppos); init_sync_kiocb(&kiocb, filp);
ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
} }
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
...@@ -2210,7 +2226,14 @@ ssize_t generic_file_write(struct file *file, const char *buf, ...@@ -2210,7 +2226,14 @@ ssize_t generic_file_write(struct file *file, const char *buf,
ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos) unsigned long nr_segs, loff_t *ppos)
{ {
return __generic_file_read(filp, iov, nr_segs, ppos); struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
} }
ssize_t generic_file_writev(struct file *file, const struct iovec *iov, ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment