Commit 92216226 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] permit zero-length readahead, and tidy up readahead

- Initialise the per-request_queue readahead parameter properly,
  rather than the dopey "if it's zero you get the deafult"
  approach.

- Permit zero-length readahead.

- 80-columnify mm/readahead.c
parent 49d90743
...@@ -187,6 +187,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) ...@@ -187,6 +187,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_phys_segments = MAX_PHYS_SEGMENTS;
q->max_hw_segments = MAX_HW_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS;
q->make_request_fn = mfn; q->make_request_fn = mfn;
q->ra_sectors = VM_MAX_READAHEAD << (10 - 9); /* kbytes->sectors */
blk_queue_max_sectors(q, MAX_SECTORS); blk_queue_max_sectors(q, MAX_SECTORS);
blk_queue_hardsect_size(q, 512); blk_queue_hardsect_size(q, 512);
...@@ -854,7 +855,6 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) ...@@ -854,7 +855,6 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
q->plug_tq.data = q; q->plug_tq.data = q;
q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
q->queue_lock = lock; q->queue_lock = lock;
q->ra_sectors = 0; /* Use VM default */
blk_queue_segment_boundary(q, 0xffffffff); blk_queue_segment_boundary(q, 0xffffffff);
......
...@@ -539,6 +539,8 @@ extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned ...@@ -539,6 +539,8 @@ extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned
extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
/* readahead.c */ /* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
void do_page_cache_readahead(struct file *file, void do_page_cache_readahead(struct file *file,
unsigned long offset, unsigned long nr_to_read); unsigned long offset, unsigned long nr_to_read);
void page_cache_readahead(struct file *file, unsigned long offset); void page_cache_readahead(struct file *file, unsigned long offset);
......
...@@ -25,9 +25,6 @@ ...@@ -25,9 +25,6 @@
* has a zero value of ra_sectors. * has a zero value of ra_sectors.
*/ */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
/* /*
* Return max readahead size for this inode in number-of-pages. * Return max readahead size for this inode in number-of-pages.
*/ */
...@@ -37,8 +34,6 @@ static int get_max_readahead(struct inode *inode) ...@@ -37,8 +34,6 @@ static int get_max_readahead(struct inode *inode)
if (inode->i_sb->s_bdev) { if (inode->i_sb->s_bdev) {
blk_ra_kbytes = blk_get_readahead(inode->i_sb->s_bdev) / 2; blk_ra_kbytes = blk_get_readahead(inode->i_sb->s_bdev) / 2;
if (blk_ra_kbytes < VM_MIN_READAHEAD)
blk_ra_kbytes = VM_MAX_READAHEAD;
} }
return blk_ra_kbytes >> (PAGE_CACHE_SHIFT - 10); return blk_ra_kbytes >> (PAGE_CACHE_SHIFT - 10);
} }
...@@ -62,11 +57,12 @@ static int get_min_readahead(struct inode *inode) ...@@ -62,11 +57,12 @@ static int get_min_readahead(struct inode *inode)
* size: Number of pages in that read * size: Number of pages in that read
* Together, these form the "current window". * Together, these form the "current window".
* Together, start and size represent the `readahead window'. * Together, start and size represent the `readahead window'.
* next_size: The number of pages to read when we get the next readahead miss. * next_size: The number of pages to read on the next readahead miss.
* prev_page: The page which the readahead algorithm most-recently inspected. * prev_page: The page which the readahead algorithm most-recently inspected.
* prev_page is mainly an optimisation: if page_cache_readahead sees * prev_page is mainly an optimisation: if page_cache_readahead
* that it is again being called for a page which it just looked at, * sees that it is again being called for a page which it just
* it can return immediately without making any state changes. * looked at, it can return immediately without making any state
* changes.
* ahead_start, * ahead_start,
* ahead_size: Together, these form the "ahead window". * ahead_size: Together, these form the "ahead window".
* *
...@@ -88,38 +84,39 @@ static int get_min_readahead(struct inode *inode) ...@@ -88,38 +84,39 @@ static int get_min_readahead(struct inode *inode)
* ahead window. * ahead window.
* *
* A `readahead hit' occurs when a read request is made against a page which is * A `readahead hit' occurs when a read request is made against a page which is
* inside the current window. Hits are good, and the window size (next_size) is * inside the current window. Hits are good, and the window size (next_size)
* grown aggressively when hits occur. Two pages are added to the next window * is grown aggressively when hits occur. Two pages are added to the next
* size on each hit, which will end up doubling the next window size by the time * window size on each hit, which will end up doubling the next window size by
* I/O is submitted for it. * the time I/O is submitted for it.
* *
* If readahead hits are more sparse (say, the application is only reading every * If readahead hits are more sparse (say, the application is only reading
* second page) then the window will build more slowly. * every second page) then the window will build more slowly.
* *
* On a readahead miss (the application seeked away) the readahead window is shrunk * On a readahead miss (the application seeked away) the readahead window is
* by 25%. We don't want to drop it too aggressively, because it's a good assumption * shrunk by 25%. We don't want to drop it too aggressively, because it is a
* that an application which has built a good readahead window will continue to * good assumption that an application which has built a good readahead window
* perform linear reads. Either at the new file position, or at the old one after * will continue to perform linear reads. Either at the new file position, or
* another seek. * at the old one after another seek.
* *
* There is a special-case: if the first page which the application tries to read * There is a special-case: if the first page which the application tries to
* happens to be the first page of the file, it is assumed that a linear read is * read happens to be the first page of the file, it is assumed that a linear
* about to happen and the window is immediately set to half of the device maximum. * read is about to happen and the window is immediately set to half of the
* device maximum.
* *
* A page request at (start + size) is not a miss at all - it's just a part of * A page request at (start + size) is not a miss at all - it's just a part of
* sequential file reading. * sequential file reading.
* *
* This function is to be called for every page which is read, rather than when * This function is to be called for every page which is read, rather than when
* it is time to perform readahead. This is so the readahead algorithm can centrally * it is time to perform readahead. This is so the readahead algorithm can
* work out the access patterns. This could be costly with many tiny read()s, so * centrally work out the access patterns. This could be costly with many tiny
* we specifically optimise for that case with prev_page. * read()s, so we specifically optimise for that case with prev_page.
*/ */
/* /*
* do_page_cache_readahead actually reads a chunk of disk. It allocates all the * do_page_cache_readahead actually reads a chunk of disk. It allocates all
* pages first, then submits them all for I/O. This avoids the very bad behaviour * the pages first, then submits them all for I/O. This avoids the very bad
* which would occur if page allocations are causing VM writeback. We really don't * behaviour which would occur if page allocations are causing VM writeback.
* want to intermingle reads and writes like that. * We really don't want to intermingle reads and writes like that.
*/ */
void do_page_cache_readahead(struct file *file, void do_page_cache_readahead(struct file *file,
unsigned long offset, unsigned long nr_to_read) unsigned long offset, unsigned long nr_to_read)
...@@ -209,8 +206,10 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -209,8 +206,10 @@ void page_cache_readahead(struct file *file, unsigned long offset)
goto out; goto out;
} }
min = get_min_readahead(inode);
max = get_max_readahead(inode); max = get_max_readahead(inode);
if (max == 0)
goto out; /* No readahead */
min = get_min_readahead(inode);
if (ra->next_size == 0 && offset == 0) { if (ra->next_size == 0 && offset == 0) {
/* /*
...@@ -232,7 +231,8 @@ void page_cache_readahead(struct file *file, unsigned long offset) ...@@ -232,7 +231,8 @@ void page_cache_readahead(struct file *file, unsigned long offset)
ra->next_size += 2; ra->next_size += 2;
} else { } else {
/* /*
* A miss - lseek, pread, etc. Shrink the readahead window by 25%. * A miss - lseek, pread, etc. Shrink the readahead
* window by 25%.
*/ */
ra->next_size -= ra->next_size / 4; ra->next_size -= ra->next_size / 4;
if (ra->next_size < min) if (ra->next_size < min)
...@@ -332,8 +332,9 @@ void page_cache_readaround(struct file *file, unsigned long offset) ...@@ -332,8 +332,9 @@ void page_cache_readaround(struct file *file, unsigned long offset)
* the VM. * the VM.
* *
* We shrink the readahead window by three pages. This is because we grow it * We shrink the readahead window by three pages. This is because we grow it
* by two pages on a readahead hit. Theory being that the readahead window size * by two pages on a readahead hit. Theory being that the readahead window
* will stabilise around the maximum level at which there isn't any thrashing. * size will stabilise around the maximum level at which there isn't any
* thrashing.
*/ */
void handle_ra_thrashing(struct file *file) void handle_ra_thrashing(struct file *file)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment