Commit acf7aa2c authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Use a sync iocb for generic_file_read

This adds support for synchronous iocbs and converts generic_file_read
to use a sync iocb to call into generic_file_aio_read.

The tests I've run with lmbench on a piii-866 showed no difference in
file re-read speed when forced to use a completion path via aio_complete
and an -EIOCBQUEUED return from generic_file_aio_read -- people with
slower machines might want to test this to see if we can tune it any
better.  Also, a bug fix to correct a missing call into the aio code
from the fork code is present.  This patch sets things up for making
generic_file_aio_read actually asynchronous.
parent a83638a4
......@@ -30,10 +30,11 @@
#include <linux/compiler.h>
#include <linux/brlock.h>
#include <linux/module.h>
#include <linux/tqueue.h>
#include <linux/highmem.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
#include <linux/highmem.h>
#if DEBUG > 1
#define dprintk printk
......@@ -304,10 +305,25 @@ void wait_for_all_aios(struct kioctx *ctx)
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
set_task_state(tsk, TASK_RUNNING);
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(&ctx->wait, &wait);
}
/* wait_on_sync_kiocb:
* Waits on the given sync kiocb to complete.
*/
ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
{
while (iocb->ki_users) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!iocb->ki_users)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
return iocb->ki_user_data;
}
/* exit_aio: called when the last user of mm goes away. At this point,
* there is no way for any new requests to be submited or any of the
* io_* syscalls to be called on the context. However, there may be
......@@ -516,13 +532,36 @@ static inline struct kioctx *lookup_ioctx(unsigned long ctx_id)
int aio_complete(struct kiocb *iocb, long res, long res2)
{
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring_info *info = &ctx->ring_info;
struct aio_ring_info *info;
struct aio_ring *ring;
struct io_event *event;
unsigned long flags;
unsigned long tail;
int ret;
/* Special case handling for sync iocbs: events go directly
* into the iocb for fast handling. Note that this will not
* work if we allow sync kiocbs to be cancelled. in which
* case the usage count checks will have to move under ctx_lock
* for all cases.
*/
if (ctx == &ctx->mm->default_kioctx) {
int ret;
iocb->ki_user_data = res;
if (iocb->ki_users == 1) {
iocb->ki_users = 0;
return 1;
}
spin_lock_irq(&ctx->ctx_lock);
iocb->ki_users--;
ret = (0 == iocb->ki_users);
spin_unlock_irq(&ctx->ctx_lock);
return 0;
}
info = &ctx->ring_info;
/* add a completion event to the ring buffer.
* must be done holding ctx->ctx_lock to prevent
* other code from messing with the tail
......
#ifndef __LINUX__AIO_H
#define __LINUX__AIO_H
#include <linux/tqueue.h>
#include <linux/list.h>
#include <asm/atomic.h>
......@@ -21,10 +20,14 @@ struct kioctx;
#define KIOCB_C_CANCELLED 0x01
#define KIOCB_C_COMPLETE 0x02
#define KIOCB_SYNC_KEY (~0U)
#define KIOCB_PRIVATE_SIZE (16 * sizeof(long))
struct kiocb {
int ki_users;
unsigned ki_key; /* id of this request */
struct file *ki_filp;
struct kioctx *ki_ctx; /* may be NULL for sync ops */
int (*ki_cancel)(struct kiocb *, struct io_event *);
......@@ -34,17 +37,19 @@ struct kiocb {
void *ki_data; /* for use by the the file */
void *ki_user_obj; /* pointer to userland's iocb */
__u64 ki_user_data; /* user's data for completion */
unsigned ki_key; /* id of this request */
long private[KIOCB_PRIVATE_SIZE/sizeof(long)];
};
#define init_sync_kiocb(x, filp) \
do { \
(x)->ki_users = 1; \
(x)->ki_filp = (filp); \
(x)->ki_ctx = 0; \
(x)->ki_cancel = NULL; \
#define init_sync_kiocb(x, filp) \
do { \
struct task_struct *tsk = current; \
(x)->ki_users = 1; \
(x)->ki_key = KIOCB_SYNC_KEY; \
(x)->ki_filp = (filp); \
(x)->ki_ctx = &tsk->active_mm->default_kioctx; \
(x)->ki_cancel = NULL; \
(x)->ki_user_obj = tsk; \
} while (0)
#define AIO_RING_MAGIC 0xa10a10a1
......@@ -105,6 +110,7 @@ struct kioctx {
/* prototypes */
extern unsigned aio_max_size;
extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
extern int FASTCALL(aio_put_req(struct kiocb *iocb));
extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
......
......@@ -18,15 +18,29 @@
.fd_array = { NULL, } \
}
#define INIT_KIOCTX(name, which_mm) \
{ \
.users = ATOMIC_INIT(1), \
.dead = 0, \
.mm = &which_mm, \
.user_id = 0, \
.next = NULL, \
.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
.ctx_lock = SPIN_LOCK_UNLOCKED, \
.reqs_active = 0U, \
.max_reqs = ~0U, \
}
#define INIT_MM(name) \
{ \
.mm_rb = RB_ROOT, \
.pgd = swapper_pg_dir, \
.mm_users = ATOMIC_INIT(2), \
.mm_count = ATOMIC_INIT(1), \
.mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
.page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \
{ \
.mm_rb = RB_ROOT, \
.pgd = swapper_pg_dir, \
.mm_users = ATOMIC_INIT(2), \
.mm_count = ATOMIC_INIT(1), \
.mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
.page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
}
#define INIT_SIGNALS(sig) { \
......
......@@ -169,7 +169,8 @@ struct namespace;
/* Maximum number of active map areas.. This is a random (large) number */
#define MAX_MAP_COUNT (65536)
struct kioctx;
#include <linux/aio.h>
struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */
rb_root_t mm_rb;
......@@ -202,6 +203,8 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
struct kioctx default_kioctx;
};
extern int mmlist_nr;
......
......@@ -296,12 +296,16 @@ int mmlist_nr;
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
#include <linux/init_task.h>
static struct mm_struct * mm_init(struct mm_struct * mm)
{
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
mm->pgd = pgd_alloc(mm);
if (mm->pgd)
return mm;
......
......@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/aio.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/mman.h>
......@@ -1123,9 +1124,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
* that can use the page cache directly.
*/
static ssize_t
__generic_file_read(struct file *filp, const struct iovec *iov,
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
size_t count = iov_length(iov, nr_segs);
......@@ -1188,12 +1190,26 @@ __generic_file_read(struct file *filp, const struct iovec *iov,
return retval;
}
ssize_t
generic_file_aio_read(struct kiocb *iocb, char *buf, size_t count, loff_t *ppos)
{
struct iovec local_iov = { .iov_base = buf, .iov_len = count };
return __generic_file_aio_read(iocb, &local_iov, 1, ppos);
}
ssize_t
generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
struct iovec local_iov = { .iov_base = buf, .iov_len = count };
struct kiocb kiocb;
ssize_t ret;
return __generic_file_read(filp, &local_iov, 1, ppos);
init_sync_kiocb(&kiocb, filp);
ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
......@@ -2210,7 +2226,14 @@ ssize_t generic_file_write(struct file *file, const char *buf,
ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
return __generic_file_read(filp, iov, nr_segs, ppos);
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment