Commit a41cd6e4 authored by Linus Torvalds's avatar Linus Torvalds

v2.4.10 -> v2.4.10.0.1

  - me/al/andrea: buffers-in-pagecache coherency, buffer.c cleanups
parent 8c7cba55
......@@ -22,63 +22,85 @@
#include <asm/uaccess.h>
static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
{
int err;
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
err = -EIO;
if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
goto out;
static inline unsigned int blksize_bits(unsigned int size)
{
unsigned int bits = 8;
do {
bits++;
size >>= 1;
} while (size > 256);
return bits;
}
bh_result->b_blocknr = iblock;
bh_result->b_state |= 1UL << BH_Mapped;
err = 0;
static inline unsigned int block_size(kdev_t dev)
{
int retval = BLOCK_SIZE;
int major = MAJOR(dev);
out:
return err;
if (blksize_size[major]) {
int minor = MINOR(dev);
if (blksize_size[major][minor])
retval = blksize_size[major][minor];
}
return retval;
}
static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
static unsigned int max_block(kdev_t dev)
{
int i, nr_blocks, retval, dev = inode->i_rdev;
unsigned long * blocks = iobuf->blocks;
unsigned int retval = ~0U;
int major = MAJOR(dev);
if (blocksize != BUFFERED_BLOCKSIZE)
BUG();
if (blk_size[major]) {
int minor = MINOR(dev);
unsigned int blocks = blk_size[major][minor];
if (blocks) {
unsigned int size = block_size(dev);
unsigned int sizebits = blksize_bits(size);
blocks += (size-1) >> BLOCK_SIZE_BITS;
retval = blocks << (BLOCK_SIZE_BITS - sizebits);
if (sizebits > BLOCK_SIZE_BITS)
retval = blocks >> (sizebits - BLOCK_SIZE_BITS);
}
}
return retval;
}
nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
/* build the blocklist */
for (i = 0; i < nr_blocks; i++, blocknr++) {
struct buffer_head bh;
retval = blkdev_get_block(inode, blocknr, &bh);
if (retval)
goto out;
static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
{
int err;
blocks[i] = bh.b_blocknr;
}
err = -EIO;
if (iblock >= max_block(inode->i_rdev))
goto out;
retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
bh_result->b_blocknr = iblock;
bh_result->b_state |= 1UL << BH_Mapped;
err = 0;
out:
return retval;
return err;
}
static int blkdev_writepage(struct page * page)
{
int err, i;
unsigned int blocksize;
unsigned long block;
struct buffer_head *bh, *head;
struct inode *inode = page->mapping->host;
if (!PageLocked(page))
BUG();
blocksize = block_size(inode->i_rdev);
if (!page->buffers)
create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, inode->i_rdev, blocksize);
head = page->buffers;
block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
block = page->index << (PAGE_CACHE_SHIFT - blksize_bits(blocksize));
bh = head;
i = 0;
......@@ -132,19 +154,21 @@ static int blkdev_readpage(struct file * file, struct page * page)
struct inode *inode = page->mapping->host;
kdev_t dev = inode->i_rdev;
unsigned long iblock, lblock;
struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
unsigned int blocks;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocks, blocksize, blocksize_bits;
int nr, i;
if (!PageLocked(page))
PAGE_BUG(page);
blocksize = block_size(dev);
blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, dev, blocksize);
head = page->buffers;
blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
blocks = PAGE_CACHE_SIZE >> blocksize_bits;
iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
lblock = max_block(dev);
bh = head;
nr = 0;
i = 0;
......@@ -159,7 +183,7 @@ static int blkdev_readpage(struct file * file, struct page * page)
continue;
}
if (!buffer_mapped(bh)) {
memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
memset(kmap(page) + i * blocksize, 0, blocksize);
flush_dcache_page(page);
kunmap(page);
set_bit(BH_Uptodate, &bh->b_state);
......@@ -206,19 +230,21 @@ static int __blkdev_prepare_write(struct inode *inode, struct page *page,
unsigned long block;
int err = 0;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
kmap(page);
unsigned int blocksize, blocksize_bits;
blocksize = block_size(dev);
blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, dev, blocksize);
head = page->buffers;
block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
if (!bh)
BUG();
block_end = block_start + BUFFERED_BLOCKSIZE;
block_end = block_start + blocksize;
if (block_end <= from)
continue;
if (block_start >= to)
......@@ -258,7 +284,6 @@ static int blkdev_prepare_write(struct file *file, struct page *page, unsigned f
int err = __blkdev_prepare_write(inode, page, from, to);
if (err) {
ClearPageUptodate(page);
kunmap(page);
}
return err;
}
......@@ -269,11 +294,13 @@ static int __blkdev_commit_write(struct inode *inode, struct page *page,
unsigned block_start, block_end;
int partial = 0, need_balance_dirty = 0;
struct buffer_head *bh, *head;
unsigned int blocksize;
blocksize = block_size(inode->i_rdev);
for(bh = head = page->buffers, block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + BUFFERED_BLOCKSIZE;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
......@@ -305,7 +332,6 @@ static int blkdev_commit_write(struct file *file, struct page *page,
{
struct inode *inode = page->mapping->host;
__blkdev_commit_write(inode,page,from,to);
kunmap(page);
return 0;
}
......@@ -797,8 +823,6 @@ int blkdev_put(struct block_device *bdev, int kind)
invalidate_buffers(bd_inode->i_rdev);
}
lock_super(sb);
if (sb->s_flags & MS_RDONLY)
update_buffers(bd_inode->i_rdev);
unlock_super(sb);
drop_super(sb);
}
......@@ -837,7 +861,6 @@ struct address_space_operations def_blk_aops = {
sync_page: block_sync_page,
prepare_write: blkdev_prepare_write,
commit_write: blkdev_commit_write,
direct_IO: blkdev_direct_IO,
};
struct file_operations def_blk_fops = {
......
This diff is collapsed.
......@@ -586,10 +586,6 @@ static int ext2_bmap(struct address_space *mapping, long block)
{
return generic_block_bmap(mapping,block,ext2_get_block);
}
static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
{
return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
}
struct address_space_operations ext2_aops = {
readpage: ext2_readpage,
writepage: ext2_writepage,
......@@ -597,7 +593,6 @@ struct address_space_operations ext2_aops = {
prepare_write: ext2_prepare_write,
commit_write: generic_commit_write,
bmap: ext2_bmap,
direct_IO: ext2_direct_IO,
};
/*
......
......@@ -203,15 +203,4 @@ static inline int get_hardsect_size(kdev_t dev)
#define blk_finished_io(nsects) do { } while (0)
#define blk_started_io(nsects) do { } while (0)
static inline int buffered_blk_size(kdev_t dev)
{
int ret = INT_MAX;
int major = MAJOR(dev);
if (blk_size[major])
ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS);
return ret;
}
#endif
......@@ -46,10 +46,6 @@ struct poll_table_struct;
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
/* buffer header fixed size for the blkdev I/O through pagecache */
#define BUFFERED_BLOCKSIZE_BITS 10
#define BUFFERED_BLOCKSIZE (1 << BUFFERED_BLOCKSIZE_BITS)
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
int nr_files; /* read only */
......@@ -1174,14 +1170,9 @@ extern int invalidate_device(kdev_t, int);
extern void invalidate_inode_pages(struct inode *);
extern void invalidate_inode_pages2(struct address_space *);
extern void invalidate_inode_buffers(struct inode *);
#define invalidate_buffers(dev) __invalidate_buffers((dev), 0, 0)
#define destroy_buffers(dev) __invalidate_buffers((dev), 1, 0)
#define update_buffers(dev) \
do { \
__invalidate_buffers((dev), 0, 1); \
__invalidate_buffers((dev), 0, 2); \
} while (0)
extern void __invalidate_buffers(kdev_t dev, int, int);
#define invalidate_buffers(dev) __invalidate_buffers((dev), 0)
#define destroy_buffers(dev) __invalidate_buffers((dev), 1)
extern void __invalidate_buffers(kdev_t dev, int);
extern void sync_inodes(kdev_t);
extern void sync_unlocked_inodes(void);
extern void write_inode_now(struct inode *, int);
......@@ -1367,7 +1358,6 @@ extern int block_sync_page(struct page *);
int generic_block_bmap(struct address_space *, long, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
extern int waitfor_one_page(struct page*);
......
......@@ -76,6 +76,9 @@ extern struct page * __find_get_page(struct address_space *mapping,
__find_get_page(mapping, index, page_hash(mapping, index))
extern struct page * __find_lock_page (struct address_space * mapping,
unsigned long index, struct page **hash);
extern struct page * find_or_create_page(struct address_space *mapping,
unsigned long index, unsigned int gfp_mask);
extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \
__find_lock_page(mapping, index, page_hash(mapping, index))
......
......@@ -131,6 +131,7 @@ extern struct page * read_swap_cache_async(swp_entry_t);
extern void oom_kill(void);
/* linux/mm/swapfile.c */
extern int total_swap_pages;
extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t);
......
......@@ -210,7 +210,6 @@ EXPORT_SYMBOL(waitfor_one_page);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_direct_IO);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_ro_fops);
EXPORT_SYMBOL(generic_buffer_fdatasync);
......
......@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/iobuf.h>
#include <linux/compiler.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
......@@ -56,6 +57,7 @@ spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p));
static void add_page_to_hash_queue(struct page * page, struct page **p)
{
struct page *next = *p;
......@@ -792,11 +794,13 @@ struct page * __find_get_page(struct address_space *mapping,
}
/*
* Same as the above, but lock the page too, verifying that
* it's still valid once we own it.
* Must be called with the pagecache lock held,
* will return with it held (but it may be dropped
* during blocking operations..
*/
struct page * __find_lock_page (struct address_space *mapping,
unsigned long offset, struct page **hash)
static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *));
static struct page * __find_lock_page_helper(struct address_space *mapping,
unsigned long offset, struct page *hash)
{
struct page *page;
......@@ -805,27 +809,72 @@ struct page * __find_lock_page (struct address_space *mapping,
* the hash-list needs a held write-lock.
*/
repeat:
spin_lock(&pagecache_lock);
page = __find_page_nolock(mapping, offset, *hash);
page = __find_page_nolock(mapping, offset, hash);
if (page) {
page_cache_get(page);
spin_unlock(&pagecache_lock);
if (TryLockPage(page)) {
spin_unlock(&pagecache_lock);
lock_page(page);
spin_lock(&pagecache_lock);
lock_page(page);
/* Has the page been re-allocated while we slept? */
if (page->mapping != mapping || page->index != offset) {
UnlockPage(page);
page_cache_release(page);
goto repeat;
}
}
}
return page;
}
/* Is the page still hashed? Ok, good.. */
if (page->mapping == mapping && page->index == offset)
return page;
/*
* Same as the above, but lock the page too, verifying that
* it's still valid once we own it.
*/
struct page * __find_lock_page (struct address_space *mapping,
unsigned long offset, struct page **hash)
{
struct page *page;
/* Nope: we raced. Release and try again.. */
UnlockPage(page);
page_cache_release(page);
goto repeat;
}
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, offset, *hash);
spin_unlock(&pagecache_lock);
return NULL;
return page;
}
/*
* Same as above, but create the page if required..
*/
struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask)
{
struct page *page;
struct page **hash = page_hash(mapping, index);
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, index, *hash);
spin_unlock(&pagecache_lock);
if (!page) {
struct page *newpage = alloc_page(gfp_mask);
page = ERR_PTR(-ENOMEM);
if (newpage) {
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, index, *hash);
if (likely(!page)) {
page = newpage;
__add_to_page_cache(page, mapping, index, hash);
newpage = NULL;
}
spin_unlock(&pagecache_lock);
if (unlikely(newpage != NULL))
page_cache_release(newpage);
}
}
return page;
}
#if 0
#define PROFILE_READAHEAD
#define DEBUG_READAHEAD
......@@ -960,10 +1009,7 @@ static inline unsigned long calc_end_index(struct inode * inode)
{
unsigned long end_index;
if (!S_ISBLK(inode->i_mode))
end_index = inode->i_size >> PAGE_CACHE_SHIFT;
else
end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
end_index = inode->i_size >> PAGE_CACHE_SHIFT;
return end_index;
}
......@@ -972,10 +1018,7 @@ static inline loff_t calc_rsize(struct inode * inode)
{
loff_t rsize;
if (!S_ISBLK(inode->i_mode))
rsize = inode->i_size;
else
rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
rsize = inode->i_size;
return rsize;
}
......@@ -1316,92 +1359,6 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
UPDATE_ATIME(inode);
}
static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
{
ssize_t retval;
int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
struct kiobuf * iobuf;
struct inode * inode = filp->f_dentry->d_inode;
struct address_space * mapping = inode->i_mapping;
new_iobuf = 0;
iobuf = filp->f_iobuf;
if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
/*
* A parallel read/write is using the preallocated iobuf
* so just run slow and allocate a new one.
*/
retval = alloc_kiovec(1, &iobuf);
if (retval)
goto out;
new_iobuf = 1;
}
if (!S_ISBLK(inode->i_mode)) {
blocksize = inode->i_sb->s_blocksize;
blocksize_bits = inode->i_sb->s_blocksize_bits;
} else {
blocksize = BUFFERED_BLOCKSIZE;
blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
}
blocksize_mask = blocksize - 1;
chunk_size = KIO_MAX_ATOMIC_IO << 10;
retval = -EINVAL;
if ((offset & blocksize_mask) || (count & blocksize_mask))
goto out_free;
if (!mapping->a_ops->direct_IO)
goto out_free;
/*
* Flush to disk exlusively the _data_, metadata must remains
* completly asynchronous or performance will go to /dev/null.
*/
filemap_fdatasync(mapping);
retval = fsync_inode_data_buffers(inode);
filemap_fdatawait(mapping);
if (retval < 0)
goto out_free;
progress = retval = 0;
while (count > 0) {
iosize = count;
if (iosize > chunk_size)
iosize = chunk_size;
retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
if (retval)
break;
retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
if (rw == READ && retval > 0)
mark_dirty_kiobuf(iobuf, retval);
if (retval >= 0) {
count -= retval;
buf += retval;
progress += retval;
}
unmap_kiobuf(iobuf);
if (retval != iosize)
break;
}
if (progress)
retval = progress;
out_free:
if (!new_iobuf)
clear_bit(0, &filp->f_iobuf_lock);
else
free_kiovec(1, &iobuf);
out:
return retval;
}
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
char *kaddr;
......@@ -1435,9 +1392,6 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
if ((ssize_t) count < 0)
return -EINVAL;
if (filp->f_flags & O_DIRECT)
goto o_direct;
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
retval = 0;
......@@ -1456,28 +1410,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
retval = desc.error;
}
}
out:
return retval;
o_direct:
{
loff_t pos = *ppos, size;
struct inode * inode = filp->f_dentry->d_inode;
retval = 0;
if (!count)
goto out; /* skip atime */
size = calc_rsize(inode);
if (pos < size) {
if (pos + count > size)
count = size - pos;
retval = generic_file_direct_IO(READ, filp, buf, count, pos);
if (retval > 0)
*ppos = pos + retval;
}
UPDATE_ATIME(filp->f_dentry->d_inode);
goto out;
}
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
......@@ -2778,9 +2711,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
if (file->f_flags & O_DIRECT)
goto o_direct;
do {
unsigned long index, offset;
long page_fault;
......@@ -2855,7 +2785,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
if ((status >= 0) && (file->f_flags & O_SYNC))
status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
out_status:
err = written ? written : status;
out:
......@@ -2864,25 +2793,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
fail_write:
status = -EFAULT;
goto unlock;
o_direct:
written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
if (written > 0) {
loff_t end = pos + written;
if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
inode->i_size = end;
mark_inode_dirty(inode);
}
*ppos = end;
invalidate_inode_pages2(mapping);
}
/*
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
*/
if (written >= 0 && file->f_flags & O_SYNC)
status = generic_osync_inode(inode, OSYNC_METADATA);
goto out_status;
}
void __init page_cache_init(unsigned long mempages)
......
......@@ -1101,6 +1101,10 @@ void swapin_readahead(swp_entry_t entry)
return;
}
/* Swap 80% full? Release the pages as they are paged in.. */
#define vm_swap_full() \
(swapper_space.nrpages*5 > total_swap_pages*4)
/*
* We hold the mm semaphore and the page_table_lock on entry and exit.
*/
......@@ -1158,10 +1162,12 @@ static int do_swap_page(struct mm_struct * mm,
swap_free(entry);
mark_page_accessed(page);
if (exclusive_swap_page(page)) {
if (vma->vm_flags & VM_WRITE)
pte = pte_mkwrite(pte);
pte = pte_mkdirty(pte);
delete_from_swap_cache(page);
if (write_access || vm_swap_full()) {
pte = pte_mkdirty(pte);
if (vma->vm_flags & VM_WRITE)
pte = pte_mkwrite(pte);
delete_from_swap_cache(page);
}
}
UnlockPage(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment