Commit 9de05205 authored by David Howells's avatar David Howells Committed by Linus Torvalds

[PATCH] do_generic_file_read / readahead adjustments

This does the following three things:

 (1) Makes the functions in mm/readahead.c only use struct file* to pass to
     readpage(). address_mapping* and file_ra_state* are used instead to keep
     track of readahead stuff.

 (2) Adds a new function do_generic_mapping_read() that is similar to
     do_generic_file_read(), except that it uses a mapping pointer and a
     readahead state pointer to access a file. The file* is only used to pass
     to readpage().

 (3) Turns do_generic_file_read() into an inline function in linux/fs.h that
     simply wraps do_generic_mapping_read().

This should mean that it is no longer necessary to have a struct file to
access a file in this manner. Just an inode or address space should be
sufficient.

It also means alternate read-ahead structures can be maintained.

The reason I want this is that I'm writing a general cache manager for
filesystems such as AFS, NFSv4, and Lustre. Block devices are made available
to the "cache manager" by means of a filesystem that can be mounted. I'm
storing meta data in an inode in the cache, but to scan this at the moment I
need to gain a "struct file" to use with do_generic_file_read().

This involves either creating a dummy dentry and struct file (which will cause
Al Viro to come looking for me with a shotgun), or to use an extra auxilliary
filesystem mounted with do_kern_mount(), neither of which are particularly
appealing.

This patch is the alternative... it provides a function that I can pass an
address_space to. This also allows me to make use of readahead semantics
without having to reinvent them for myself.
parent 5a7728c6
......@@ -1251,7 +1251,8 @@ extern ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, lof
ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t);
extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
extern void do_generic_mapping_read(struct address_space *, struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec
......@@ -1268,6 +1269,18 @@ extern int generic_file_open(struct inode * inode, struct file * filp);
extern int generic_vm_writeback(struct page *page,
struct writeback_control *wbc);
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{
do_generic_mapping_read(filp->f_dentry->d_inode->i_mapping,
&filp->f_ra,
filp,
ppos,
desc,
actor);
}
extern struct file_operations generic_ro_fops;
extern int vfs_readlink(struct dentry *, char *, int, const char *);
......
......@@ -513,11 +513,18 @@ int write_one_page(struct page *page, int wait);
/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
int do_page_cache_readahead(struct file *file,
int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
unsigned long offset, unsigned long nr_to_read);
void page_cache_readahead(struct file *file, unsigned long offset);
void page_cache_readaround(struct file *file, unsigned long offset);
void handle_ra_miss(struct file *file);
void page_cache_readahead(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
unsigned long offset);
void page_cache_readaround(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
unsigned long offset);
void handle_ra_miss(struct address_space *mapping,
struct file_ra_state *ra);
/* Do stack extension */
extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
......
......@@ -229,7 +229,7 @@ EXPORT_SYMBOL(block_truncate_page);
EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(generic_file_sendfile);
EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(do_generic_mapping_read);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_file_write_nolock);
EXPORT_SYMBOL(generic_file_mmap);
......
......@@ -570,10 +570,15 @@ void mark_page_accessed(struct page *page)
*
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
* - note the struct file * is only passed for the use of readpage
*/
void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor)
void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *ra,
struct file * filp,
loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{
struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host;
unsigned long index, offset;
struct page *cached_page;
......@@ -598,7 +603,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
break;
}
page_cache_readahead(filp, index);
page_cache_readahead(mapping, ra, filp, index);
nr = nr - offset;
......@@ -610,7 +615,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
page = radix_tree_lookup(&mapping->page_tree, index);
if (!page) {
read_unlock(&mapping->page_lock);
handle_ra_miss(filp);
handle_ra_miss(mapping,ra);
goto no_cached_page;
}
page_cache_get(page);
......@@ -946,9 +951,9 @@ ssize_t generic_file_sendfile(struct file *out_file, struct file *in_file,
}
static ssize_t
do_readahead(struct file *file, unsigned long index, unsigned long nr)
do_readahead(struct address_space *mapping, struct file *filp,
unsigned long index, unsigned long nr)
{
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long max;
unsigned long active;
unsigned long inactive;
......@@ -962,7 +967,7 @@ do_readahead(struct file *file, unsigned long index, unsigned long nr)
if (nr > max)
nr = max;
do_page_cache_readahead(file, index, nr);
do_page_cache_readahead(mapping, filp, index, nr);
return 0;
}
......@@ -975,10 +980,11 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
file = fget(fd);
if (file) {
if (file->f_mode & FMODE_READ) {
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long start = offset >> PAGE_CACHE_SHIFT;
unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
unsigned long len = end - start + 1;
ret = do_readahead(file, start, len);
ret = do_readahead(mapping, file, start, len);
}
fput(file);
}
......@@ -999,6 +1005,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
int error;
struct file *file = area->vm_file;
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
unsigned long size, pgoff, endoff;
......@@ -1031,7 +1038,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
*/
if (VM_SequentialReadHint(area)) {
did_readahead = 1;
page_cache_readahead(area->vm_file, pgoff);
page_cache_readahead(mapping, ra, file, pgoff);
}
/*
......@@ -1040,7 +1047,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
*/
if ((pgoff < size) && !VM_RandomReadHint(area)) {
did_readahead = 1;
page_cache_readaround(file, pgoff);
page_cache_readaround(mapping, ra, file, pgoff);
}
/*
......@@ -1050,7 +1057,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
page = find_get_page(mapping, pgoff);
if (!page) {
if (did_readahead) {
handle_ra_miss(file);
handle_ra_miss(mapping,ra);
did_readahead = 0;
}
goto no_cached_page;
......
......@@ -80,7 +80,7 @@ static long madvise_willneed(struct vm_area_struct * vma,
if ((vma->vm_mm->rss + (end - start)) > rlim_rss)
return error;
do_page_cache_readahead(file, start, end - start);
do_page_cache_readahead(file->f_dentry->d_inode->i_mapping, file, start, end - start);
return 0;
}
......
......@@ -22,18 +22,18 @@ struct backing_dev_info default_backing_dev_info = {
/*
* Return max readahead size for this inode in number-of-pages.
*/
static inline unsigned long get_max_readahead(struct file *file)
static inline unsigned long get_max_readahead(struct file_ra_state *ra)
{
return file->f_ra.ra_pages;
return ra->ra_pages;
}
static inline unsigned long get_min_readahead(struct file *file)
static inline unsigned long get_min_readahead(struct file_ra_state *ra)
{
return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
}
static int
read_pages(struct file *file, struct address_space *mapping,
read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
{
unsigned page_idx;
......@@ -48,7 +48,7 @@ read_pages(struct file *file, struct address_space *mapping,
struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) {
mapping->a_ops->readpage(file, page);
mapping->a_ops->readpage(filp, page);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
} else {
......@@ -134,10 +134,11 @@ read_pages(struct file *file, struct address_space *mapping,
*
* Returns the number of pages which actually had IO started against them.
*/
int do_page_cache_readahead(struct file *file,
unsigned long offset, unsigned long nr_to_read)
int do_page_cache_readahead(struct address_space *mapping,
struct file *filp,
unsigned long offset,
unsigned long nr_to_read)
{
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host;
struct page *page;
unsigned long end_index; /* The last page we want to read */
......@@ -181,7 +182,7 @@ int do_page_cache_readahead(struct file *file,
* will then handle the error.
*/
if (ret) {
read_pages(file, mapping, &page_pool, ret);
read_pages(mapping, filp, &page_pool, ret);
blk_run_queues();
}
BUG_ON(!list_empty(&page_pool));
......@@ -216,9 +217,9 @@ check_ra_success(struct file_ra_state *ra, pgoff_t attempt,
* page_cache_readahead is the main function. If performs the adaptive
* readahead window size management and submits the readahead I/O.
*/
void page_cache_readahead(struct file *file, unsigned long offset)
void page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset)
{
struct file_ra_state *ra = &file->f_ra;
unsigned max;
unsigned min;
unsigned orig_next_size;
......@@ -239,11 +240,11 @@ void page_cache_readahead(struct file *file, unsigned long offset)
if (ra->next_size == -1UL)
goto out; /* Maximally shrunk */
max = get_max_readahead(file);
max = get_max_readahead(ra);
if (max == 0)
goto out; /* No readahead */
min = get_min_readahead(file);
min = get_min_readahead(ra);
orig_next_size = ra->next_size;
if (ra->next_size == 0 && offset == 0) {
......@@ -316,7 +317,8 @@ void page_cache_readahead(struct file *file, unsigned long offset)
ra->ahead_start = 0; /* Invalidate these */
ra->ahead_size = 0;
actual = do_page_cache_readahead(file, offset, ra->size);
actual = do_page_cache_readahead(mapping, filp, offset,
ra->size);
check_ra_success(ra, ra->size, actual, orig_next_size);
} else {
/*
......@@ -327,7 +329,7 @@ void page_cache_readahead(struct file *file, unsigned long offset)
if (ra->ahead_start == 0) {
ra->ahead_start = ra->start + ra->size;
ra->ahead_size = ra->next_size;
actual = do_page_cache_readahead(file,
actual = do_page_cache_readahead(mapping, filp,
ra->ahead_start, ra->ahead_size);
check_ra_success(ra, ra->ahead_size,
actual, orig_next_size);
......@@ -342,12 +344,11 @@ void page_cache_readahead(struct file *file, unsigned long offset)
* but somewhat ascending. So readaround favours pages beyond the target one.
* We also boost the window size, as it can easily shrink due to misses.
*/
void page_cache_readaround(struct file *file, unsigned long offset)
void page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset)
{
struct file_ra_state *ra = &file->f_ra;
if (ra->next_size != -1UL) {
const unsigned long min = get_min_readahead(file) * 2;
const unsigned long min = get_min_readahead(ra) * 2;
unsigned long target;
unsigned long backward;
......@@ -365,7 +366,7 @@ void page_cache_readaround(struct file *file, unsigned long offset)
target = 0;
else
target -= backward;
page_cache_readahead(file, target);
page_cache_readahead(mapping, ra, filp, target);
}
}
......@@ -383,10 +384,9 @@ void page_cache_readaround(struct file *file, unsigned long offset)
* that the readahead window size will stabilise around the maximum level at
* which there is no thrashing.
*/
void handle_ra_miss(struct file *file)
void handle_ra_miss(struct address_space *mapping, struct file_ra_state *ra)
{
struct file_ra_state *ra = &file->f_ra;
const unsigned long min = get_min_readahead(file);
const unsigned long min = get_min_readahead(ra);
if (ra->next_size == -1UL) {
ra->next_size = min;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment