Commit 9de05205 authored by David Howells's avatar David Howells Committed by Linus Torvalds

[PATCH] do_generic_file_read / readahead adjustments

This does the following three things:

 (1) Makes the functions in mm/readahead.c only use struct file* to pass to
     readpage(). address_mapping* and file_ra_state* are used instead to keep
     track of readahead stuff.

 (2) Adds a new function do_generic_mapping_read() that is similar to
     do_generic_file_read(), except that it uses a mapping pointer and a
     readahead state pointer to access a file. The file* is only used to pass
     to readpage().

 (3) Turns do_generic_file_read() into an inline function in linux/fs.h that
     simply wraps do_generic_mapping_read().

This should mean that it is no longer necessary to have a struct file to
access a file in this manner. Just an inode or address space should be
sufficient.

It also means alternate read-ahead structures can be maintained.

The reason I want this is that I'm writing a general cache manager for
filesystems such as AFS, NFSv4, and Lustre. Block devices are made available
to the "cache manager" by means of a filesystem that can be mounted. I'm
storing meta data in an inode in the cache, but to scan this at the moment I
need to gain a "struct file" to use with do_generic_file_read().

This involves either creating a dummy dentry and struct file (which will cause
Al Viro to come looking for me with a shotgun), or to use an extra auxilliary
filesystem mounted with do_kern_mount(), neither of which are particularly
appealing.

This patch is the alternative... it provides a function that I can pass an
address_space to. This also allows me to make use of readahead semantics
without having to reinvent them for myself.
parent 5a7728c6
...@@ -1251,7 +1251,8 @@ extern ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, lof ...@@ -1251,7 +1251,8 @@ extern ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, lof
ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos); unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t); extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t);
extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); extern void do_generic_mapping_read(struct address_space *, struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_direct_IO(int rw, struct file *file, extern ssize_t generic_file_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, unsigned long nr_segs); const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec
...@@ -1268,6 +1269,18 @@ extern int generic_file_open(struct inode * inode, struct file * filp); ...@@ -1268,6 +1269,18 @@ extern int generic_file_open(struct inode * inode, struct file * filp);
extern int generic_vm_writeback(struct page *page, extern int generic_vm_writeback(struct page *page,
struct writeback_control *wbc); struct writeback_control *wbc);
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{
do_generic_mapping_read(filp->f_dentry->d_inode->i_mapping,
&filp->f_ra,
filp,
ppos,
desc,
actor);
}
extern struct file_operations generic_ro_fops; extern struct file_operations generic_ro_fops;
extern int vfs_readlink(struct dentry *, char *, int, const char *); extern int vfs_readlink(struct dentry *, char *, int, const char *);
......
...@@ -513,11 +513,18 @@ int write_one_page(struct page *page, int wait); ...@@ -513,11 +513,18 @@ int write_one_page(struct page *page, int wait);
/* readahead.c */ /* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */ #define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
int do_page_cache_readahead(struct file *file, int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
unsigned long offset, unsigned long nr_to_read); unsigned long offset, unsigned long nr_to_read);
void page_cache_readahead(struct file *file, unsigned long offset); void page_cache_readahead(struct address_space *mapping,
void page_cache_readaround(struct file *file, unsigned long offset); struct file_ra_state *ra,
void handle_ra_miss(struct file *file); struct file *filp,
unsigned long offset);
void page_cache_readaround(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
unsigned long offset);
void handle_ra_miss(struct address_space *mapping,
struct file_ra_state *ra);
/* Do stack extension */ /* Do stack extension */
extern int expand_stack(struct vm_area_struct * vma, unsigned long address); extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
......
...@@ -229,7 +229,7 @@ EXPORT_SYMBOL(block_truncate_page); ...@@ -229,7 +229,7 @@ EXPORT_SYMBOL(block_truncate_page);
EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(generic_file_sendfile); EXPORT_SYMBOL(generic_file_sendfile);
EXPORT_SYMBOL(do_generic_file_read); EXPORT_SYMBOL(do_generic_mapping_read);
EXPORT_SYMBOL(generic_file_write); EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_file_write_nolock); EXPORT_SYMBOL(generic_file_write_nolock);
EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_mmap);
......
...@@ -570,10 +570,15 @@ void mark_page_accessed(struct page *page) ...@@ -570,10 +570,15 @@ void mark_page_accessed(struct page *page)
* *
* This is really ugly. But the goto's actually try to clarify some * This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc. * of the logic when it comes to error handling etc.
* - note the struct file * is only passed for the use of readpage
*/ */
void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *ra,
struct file * filp,
loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{ {
struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned long index, offset; unsigned long index, offset;
struct page *cached_page; struct page *cached_page;
...@@ -598,7 +603,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * ...@@ -598,7 +603,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
break; break;
} }
page_cache_readahead(filp, index); page_cache_readahead(mapping, ra, filp, index);
nr = nr - offset; nr = nr - offset;
...@@ -610,7 +615,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * ...@@ -610,7 +615,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
page = radix_tree_lookup(&mapping->page_tree, index); page = radix_tree_lookup(&mapping->page_tree, index);
if (!page) { if (!page) {
read_unlock(&mapping->page_lock); read_unlock(&mapping->page_lock);
handle_ra_miss(filp); handle_ra_miss(mapping,ra);
goto no_cached_page; goto no_cached_page;
} }
page_cache_get(page); page_cache_get(page);
...@@ -946,9 +951,9 @@ ssize_t generic_file_sendfile(struct file *out_file, struct file *in_file, ...@@ -946,9 +951,9 @@ ssize_t generic_file_sendfile(struct file *out_file, struct file *in_file,
} }
static ssize_t static ssize_t
do_readahead(struct file *file, unsigned long index, unsigned long nr) do_readahead(struct address_space *mapping, struct file *filp,
unsigned long index, unsigned long nr)
{ {
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long max; unsigned long max;
unsigned long active; unsigned long active;
unsigned long inactive; unsigned long inactive;
...@@ -962,7 +967,7 @@ do_readahead(struct file *file, unsigned long index, unsigned long nr) ...@@ -962,7 +967,7 @@ do_readahead(struct file *file, unsigned long index, unsigned long nr)
if (nr > max) if (nr > max)
nr = max; nr = max;
do_page_cache_readahead(file, index, nr); do_page_cache_readahead(mapping, filp, index, nr);
return 0; return 0;
} }
...@@ -975,10 +980,11 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) ...@@ -975,10 +980,11 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
file = fget(fd); file = fget(fd);
if (file) { if (file) {
if (file->f_mode & FMODE_READ) { if (file->f_mode & FMODE_READ) {
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long start = offset >> PAGE_CACHE_SHIFT; unsigned long start = offset >> PAGE_CACHE_SHIFT;
unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT; unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
unsigned long len = end - start + 1; unsigned long len = end - start + 1;
ret = do_readahead(file, start, len); ret = do_readahead(mapping, file, start, len);
} }
fput(file); fput(file);
} }
...@@ -999,6 +1005,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -999,6 +1005,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
int error; int error;
struct file *file = area->vm_file; struct file *file = area->vm_file;
struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct page *page; struct page *page;
unsigned long size, pgoff, endoff; unsigned long size, pgoff, endoff;
...@@ -1031,7 +1038,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -1031,7 +1038,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
*/ */
if (VM_SequentialReadHint(area)) { if (VM_SequentialReadHint(area)) {
did_readahead = 1; did_readahead = 1;
page_cache_readahead(area->vm_file, pgoff); page_cache_readahead(mapping, ra, file, pgoff);
} }
/* /*
...@@ -1040,7 +1047,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -1040,7 +1047,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
*/ */
if ((pgoff < size) && !VM_RandomReadHint(area)) { if ((pgoff < size) && !VM_RandomReadHint(area)) {
did_readahead = 1; did_readahead = 1;
page_cache_readaround(file, pgoff); page_cache_readaround(mapping, ra, file, pgoff);
} }
/* /*
...@@ -1050,7 +1057,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -1050,7 +1057,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
page = find_get_page(mapping, pgoff); page = find_get_page(mapping, pgoff);
if (!page) { if (!page) {
if (did_readahead) { if (did_readahead) {
handle_ra_miss(file); handle_ra_miss(mapping,ra);
did_readahead = 0; did_readahead = 0;
} }
goto no_cached_page; goto no_cached_page;
......
...@@ -80,7 +80,7 @@ static long madvise_willneed(struct vm_area_struct * vma, ...@@ -80,7 +80,7 @@ static long madvise_willneed(struct vm_area_struct * vma,
if ((vma->vm_mm->rss + (end - start)) > rlim_rss) if ((vma->vm_mm->rss + (end - start)) > rlim_rss)
return error; return error;
do_page_cache_readahead(file, start, end - start); do_page_cache_readahead(file->f_dentry->d_inode->i_mapping, file, start, end - start);
return 0; return 0;
} }
......
...@@ -22,18 +22,18 @@ struct backing_dev_info default_backing_dev_info = { ...@@ -22,18 +22,18 @@ struct backing_dev_info default_backing_dev_info = {
/* /*
* Return max readahead size for this inode in number-of-pages. * Return max readahead size for this inode in number-of-pages.
*/ */
static inline unsigned long get_max_readahead(struct file *file) static inline unsigned long get_max_readahead(struct file_ra_state *ra)
{ {
return file->f_ra.ra_pages; return ra->ra_pages;
} }
static inline unsigned long get_min_readahead(struct file *file) static inline unsigned long get_min_readahead(struct file_ra_state *ra)
{ {
return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
} }
static int static int
read_pages(struct file *file, struct address_space *mapping, read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages) struct list_head *pages, unsigned nr_pages)
{ {
unsigned page_idx; unsigned page_idx;
...@@ -48,7 +48,7 @@ read_pages(struct file *file, struct address_space *mapping, ...@@ -48,7 +48,7 @@ read_pages(struct file *file, struct address_space *mapping,
struct page *page = list_entry(pages->prev, struct page, list); struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list); list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) { if (!add_to_page_cache(page, mapping, page->index)) {
mapping->a_ops->readpage(file, page); mapping->a_ops->readpage(filp, page);
if (!pagevec_add(&lru_pvec, page)) if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec); __pagevec_lru_add(&lru_pvec);
} else { } else {
...@@ -134,10 +134,11 @@ read_pages(struct file *file, struct address_space *mapping, ...@@ -134,10 +134,11 @@ read_pages(struct file *file, struct address_space *mapping,
* *
* Returns the number of pages which actually had IO started against them. * Returns the number of pages which actually had IO started against them.
*/ */
int do_page_cache_readahead(struct file *file, int do_page_cache_readahead(struct address_space *mapping,
unsigned long offset, unsigned long nr_to_read) struct file *filp,
unsigned long offset,
unsigned long nr_to_read)
{ {
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct page *page; struct page *page;
unsigned long end_index; /* The last page we want to read */ unsigned long end_index; /* The last page we want to read */
...@@ -181,7 +182,7 @@ int do_page_cache_readahead(struct file *file, ...@@ -181,7 +182,7 @@ int do_page_cache_readahead(struct file *file,
* will then handle the error. * will then handle the error.
*/ */
if (ret) { if (ret) {
read_pages(file, mapping, &page_pool, ret); read_pages(mapping, filp, &page_pool, ret);
blk_run_queues(); blk_run_queues();
} }
BUG_ON(!list_empty(&page_pool)); BUG_ON(!list_empty(&page_pool));
...@@ -216,9 +217,9 @@ check_ra_success(struct file_ra_state *ra, pgoff_t attempt, ...@@ -216,9 +217,9 @@ check_ra_success(struct file_ra_state *ra, pgoff_t attempt,
* page_cache_readahead is the main function. If performs the adaptive * page_cache_readahead is the main function. If performs the adaptive
* readahead window size management and submits the readahead I/O. * readahead window size management and submits the readahead I/O.
*/ */
void page_cache_readahead(struct file *file, unsigned long offset) void page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset)
{ {
struct file_ra_state *ra = &file->f_ra;
unsigned max; unsigned max;
unsigned min; unsigned min;
unsigned orig_next_size; unsigned orig_next_size;
...@@ -239,11 +240,11 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -239,11 +240,11 @@ void page_cache_readahead(struct file *file, unsigned long offset)
if (ra->next_size == -1UL) if (ra->next_size == -1UL)
goto out; /* Maximally shrunk */ goto out; /* Maximally shrunk */
max = get_max_readahead(file); max = get_max_readahead(ra);
if (max == 0) if (max == 0)
goto out; /* No readahead */ goto out; /* No readahead */
min = get_min_readahead(file); min = get_min_readahead(ra);
orig_next_size = ra->next_size; orig_next_size = ra->next_size;
if (ra->next_size == 0 && offset == 0) { if (ra->next_size == 0 && offset == 0) {
...@@ -316,7 +317,8 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -316,7 +317,8 @@ void page_cache_readahead(struct file *file, unsigned long offset)
ra->ahead_start = 0; /* Invalidate these */ ra->ahead_start = 0; /* Invalidate these */
ra->ahead_size = 0; ra->ahead_size = 0;
actual = do_page_cache_readahead(file, offset, ra->size); actual = do_page_cache_readahead(mapping, filp, offset,
ra->size);
check_ra_success(ra, ra->size, actual, orig_next_size); check_ra_success(ra, ra->size, actual, orig_next_size);
} else { } else {
/* /*
...@@ -327,7 +329,7 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -327,7 +329,7 @@ void page_cache_readahead(struct file *file, unsigned long offset)
if (ra->ahead_start == 0) { if (ra->ahead_start == 0) {
ra->ahead_start = ra->start + ra->size; ra->ahead_start = ra->start + ra->size;
ra->ahead_size = ra->next_size; ra->ahead_size = ra->next_size;
actual = do_page_cache_readahead(file, actual = do_page_cache_readahead(mapping, filp,
ra->ahead_start, ra->ahead_size); ra->ahead_start, ra->ahead_size);
check_ra_success(ra, ra->ahead_size, check_ra_success(ra, ra->ahead_size,
actual, orig_next_size); actual, orig_next_size);
...@@ -342,12 +344,11 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -342,12 +344,11 @@ void page_cache_readahead(struct file *file, unsigned long offset)
* but somewhat ascending. So readaround favours pages beyond the target one. * but somewhat ascending. So readaround favours pages beyond the target one.
* We also boost the window size, as it can easily shrink due to misses. * We also boost the window size, as it can easily shrink due to misses.
*/ */
void page_cache_readaround(struct file *file, unsigned long offset) void page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset)
{ {
struct file_ra_state *ra = &file->f_ra;
if (ra->next_size != -1UL) { if (ra->next_size != -1UL) {
const unsigned long min = get_min_readahead(file) * 2; const unsigned long min = get_min_readahead(ra) * 2;
unsigned long target; unsigned long target;
unsigned long backward; unsigned long backward;
...@@ -365,7 +366,7 @@ void page_cache_readaround(struct file *file, unsigned long offset) ...@@ -365,7 +366,7 @@ void page_cache_readaround(struct file *file, unsigned long offset)
target = 0; target = 0;
else else
target -= backward; target -= backward;
page_cache_readahead(file, target); page_cache_readahead(mapping, ra, filp, target);
} }
} }
...@@ -383,10 +384,9 @@ void page_cache_readaround(struct file *file, unsigned long offset) ...@@ -383,10 +384,9 @@ void page_cache_readaround(struct file *file, unsigned long offset)
* that the readahead window size will stabilise around the maximum level at * that the readahead window size will stabilise around the maximum level at
* which there is no thrashing. * which there is no thrashing.
*/ */
void handle_ra_miss(struct file *file) void handle_ra_miss(struct address_space *mapping, struct file_ra_state *ra)
{ {
struct file_ra_state *ra = &file->f_ra; const unsigned long min = get_min_readahead(ra);
const unsigned long min = get_min_readahead(file);
if (ra->next_size == -1UL) { if (ra->next_size == -1UL) {
ra->next_size = min; ra->next_size = min;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment