Commit a41cd6e4 authored by Linus Torvalds's avatar Linus Torvalds

v2.4.10 -> v2.4.10.0.1

  - me/al/andrea: buffers-in-pagecache coherency, buffer.c cleanups
parent 8c7cba55
......@@ -22,63 +22,85 @@
#include <asm/uaccess.h>
static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
{
int err;
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
err = -EIO;
if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
goto out;
static inline unsigned int blksize_bits(unsigned int size)
{
unsigned int bits = 8;
do {
bits++;
size >>= 1;
} while (size > 256);
return bits;
}
bh_result->b_blocknr = iblock;
bh_result->b_state |= 1UL << BH_Mapped;
err = 0;
static inline unsigned int block_size(kdev_t dev)
{
int retval = BLOCK_SIZE;
int major = MAJOR(dev);
out:
return err;
if (blksize_size[major]) {
int minor = MINOR(dev);
if (blksize_size[major][minor])
retval = blksize_size[major][minor];
}
return retval;
}
static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
static unsigned int max_block(kdev_t dev)
{
int i, nr_blocks, retval, dev = inode->i_rdev;
unsigned long * blocks = iobuf->blocks;
unsigned int retval = ~0U;
int major = MAJOR(dev);
if (blocksize != BUFFERED_BLOCKSIZE)
BUG();
if (blk_size[major]) {
int minor = MINOR(dev);
unsigned int blocks = blk_size[major][minor];
if (blocks) {
unsigned int size = block_size(dev);
unsigned int sizebits = blksize_bits(size);
blocks += (size-1) >> BLOCK_SIZE_BITS;
retval = blocks << (BLOCK_SIZE_BITS - sizebits);
if (sizebits > BLOCK_SIZE_BITS)
retval = blocks >> (sizebits - BLOCK_SIZE_BITS);
}
}
return retval;
}
nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
/* build the blocklist */
for (i = 0; i < nr_blocks; i++, blocknr++) {
struct buffer_head bh;
retval = blkdev_get_block(inode, blocknr, &bh);
if (retval)
goto out;
static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
{
int err;
blocks[i] = bh.b_blocknr;
}
err = -EIO;
if (iblock >= max_block(inode->i_rdev))
goto out;
retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
bh_result->b_blocknr = iblock;
bh_result->b_state |= 1UL << BH_Mapped;
err = 0;
out:
return retval;
return err;
}
static int blkdev_writepage(struct page * page)
{
int err, i;
unsigned int blocksize;
unsigned long block;
struct buffer_head *bh, *head;
struct inode *inode = page->mapping->host;
if (!PageLocked(page))
BUG();
blocksize = block_size(inode->i_rdev);
if (!page->buffers)
create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, inode->i_rdev, blocksize);
head = page->buffers;
block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
block = page->index << (PAGE_CACHE_SHIFT - blksize_bits(blocksize));
bh = head;
i = 0;
......@@ -132,19 +154,21 @@ static int blkdev_readpage(struct file * file, struct page * page)
struct inode *inode = page->mapping->host;
kdev_t dev = inode->i_rdev;
unsigned long iblock, lblock;
struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
unsigned int blocks;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocks, blocksize, blocksize_bits;
int nr, i;
if (!PageLocked(page))
PAGE_BUG(page);
blocksize = block_size(dev);
blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, dev, blocksize);
head = page->buffers;
blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
blocks = PAGE_CACHE_SIZE >> blocksize_bits;
iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
lblock = max_block(dev);
bh = head;
nr = 0;
i = 0;
......@@ -159,7 +183,7 @@ static int blkdev_readpage(struct file * file, struct page * page)
continue;
}
if (!buffer_mapped(bh)) {
memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
memset(kmap(page) + i * blocksize, 0, blocksize);
flush_dcache_page(page);
kunmap(page);
set_bit(BH_Uptodate, &bh->b_state);
......@@ -206,19 +230,21 @@ static int __blkdev_prepare_write(struct inode *inode, struct page *page,
unsigned long block;
int err = 0;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
kmap(page);
unsigned int blocksize, blocksize_bits;
blocksize = block_size(dev);
blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
create_empty_buffers(page, dev, blocksize);
head = page->buffers;
block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
if (!bh)
BUG();
block_end = block_start + BUFFERED_BLOCKSIZE;
block_end = block_start + blocksize;
if (block_end <= from)
continue;
if (block_start >= to)
......@@ -258,7 +284,6 @@ static int blkdev_prepare_write(struct file *file, struct page *page, unsigned f
int err = __blkdev_prepare_write(inode, page, from, to);
if (err) {
ClearPageUptodate(page);
kunmap(page);
}
return err;
}
......@@ -269,11 +294,13 @@ static int __blkdev_commit_write(struct inode *inode, struct page *page,
unsigned block_start, block_end;
int partial = 0, need_balance_dirty = 0;
struct buffer_head *bh, *head;
unsigned int blocksize;
blocksize = block_size(inode->i_rdev);
for(bh = head = page->buffers, block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + BUFFERED_BLOCKSIZE;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
......@@ -305,7 +332,6 @@ static int blkdev_commit_write(struct file *file, struct page *page,
{
struct inode *inode = page->mapping->host;
__blkdev_commit_write(inode,page,from,to);
kunmap(page);
return 0;
}
......@@ -797,8 +823,6 @@ int blkdev_put(struct block_device *bdev, int kind)
invalidate_buffers(bd_inode->i_rdev);
}
lock_super(sb);
if (sb->s_flags & MS_RDONLY)
update_buffers(bd_inode->i_rdev);
unlock_super(sb);
drop_super(sb);
}
......@@ -837,7 +861,6 @@ struct address_space_operations def_blk_aops = {
sync_page: block_sync_page,
prepare_write: blkdev_prepare_write,
commit_write: blkdev_commit_write,
direct_IO: blkdev_direct_IO,
};
struct file_operations def_blk_fops = {
......
......@@ -96,7 +96,8 @@ struct bh_free_head {
};
static struct bh_free_head free_list[NR_SIZES];
static int grow_buffers(int size);
static void truncate_buffers(kdev_t dev);
static int grow_buffers(kdev_t dev, int block, int size);
static void __refile_buffer(struct buffer_head *);
/* This is used by some architectures to estimate available memory. */
......@@ -559,59 +560,28 @@ static void __insert_into_queues(struct buffer_head *bh)
__insert_into_lru_list(bh, bh->b_list);
}
/* This function must only run if there are no other
* references _anywhere_ to this buffer head.
*/
static void put_last_free(struct buffer_head * bh)
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
struct buffer_head **bhp = &head->list;
bh->b_state = 0;
struct buffer_head *bh, **p = &hash(dev, block);
spin_lock(&head->lock);
bh->b_dev = B_FREE;
if(!*bhp) {
*bhp = bh;
bh->b_prev_free = bh;
}
bh->b_next_free = *bhp;
bh->b_prev_free = (*bhp)->b_prev_free;
(*bhp)->b_prev_free->b_next_free = bh;
(*bhp)->b_prev_free = bh;
spin_unlock(&head->lock);
}
/*
* Why like this, I hear you say... The reason is race-conditions.
* As we don't lock buffers (unless we are reading them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
* the code is ready.
*/
static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head *bh = hash(dev, block);
read_lock(&hash_table_lock);
for (; bh; bh = bh->b_next)
if (bh->b_blocknr == block &&
bh->b_size == size &&
bh->b_dev == dev)
for (;;) {
bh = *p;
if (!bh)
break;
if (bh)
p = &bh->b_next;
if (bh->b_blocknr != block)
continue;
if (bh->b_size != size)
continue;
if (bh->b_dev != dev)
continue;
get_bh(bh);
break;
}
return bh;
}
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head *bh;
read_lock(&hash_table_lock);
bh = __get_hash_table(dev, block, size);
read_unlock(&hash_table_lock);
return bh;
}
......@@ -688,7 +658,7 @@ int inode_has_buffers(struct inode *inode)
we think the disk contains more recent information than the buffercache.
The update == 1 pass marks the buffers we need to update, the update == 2
pass does the actual I/O. */
void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
{
int i, nlist, slept;
struct buffer_head * bh, * bh_next;
......@@ -722,33 +692,18 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
/* All buffers in the lru lists are mapped */
if (!buffer_mapped(bh))
BUG();
if (buffer_dirty(bh))
printk("invalidate: dirty buffer\n");
if (!atomic_read(&bh->b_count)) {
if (destroy_dirty_buffers || !buffer_dirty(bh)) {
remove_inode_queue(bh);
#if 0
__remove_from_queues(bh);
put_last_free(bh);
#endif
}
} else if (update) {
if ((update == 2) ^ buffer_uptodate(bh) &&
(update == 2) ^ buffer_req(bh)) {
write_unlock(&hash_table_lock);
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
if (update == 2) {
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
} else {
lock_buffer(bh);
clear_bit(BH_Uptodate, &bh->b_state);
clear_bit(BH_Req, &bh->b_state);
unlock_buffer(bh);
}
atomic_dec(&bh->b_count);
goto retry;
}
}
} else
printk("invalidate: busy buffer\n");
write_unlock(&hash_table_lock);
if (slept)
......@@ -759,13 +714,14 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
spin_unlock(&lru_list_lock);
if (slept)
goto retry;
/* Get rid of the page cache */
truncate_buffers(dev);
}
void set_blocksize(kdev_t dev, int size)
{
extern int *blksize_size[];
int i, nlist, slept;
struct buffer_head * bh, * bh_next;
if (!blksize_size[MAJOR(dev)])
return;
......@@ -780,60 +736,10 @@ void set_blocksize(kdev_t dev, int size)
}
if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
return;
sync_buffers(dev, 2);
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
retry:
slept = 0;
spin_lock(&lru_list_lock);
for(nlist = 0; nlist < NR_LIST; nlist++) {
bh = lru_list[nlist];
if (!bh)
continue;
for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
bh_next = bh->b_next_free;
if (bh->b_dev != dev || bh->b_size == size)
continue;
/* Unhashed? */
if (!bh->b_pprev)
continue;
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(&lru_list_lock);
wait_on_buffer(bh);
slept = 1;
spin_lock(&lru_list_lock);
put_bh(bh);
}
write_lock(&hash_table_lock);
if (!atomic_read(&bh->b_count)) {
if (buffer_dirty(bh))
printk(KERN_WARNING
"set_blocksize: dev %s buffer_dirty %lu size %hu\n",
kdevname(dev), bh->b_blocknr, bh->b_size);
remove_inode_queue(bh);
__remove_from_queues(bh);
put_last_free(bh);
} else {
if (atomic_set_buffer_clean(bh))
__refile_buffer(bh);
clear_bit(BH_Uptodate, &bh->b_state);
printk(KERN_WARNING
"set_blocksize: "
"b_count %d, dev %s, block %lu, from %p\n",
atomic_read(&bh->b_count), bdevname(bh->b_dev),
bh->b_blocknr, __builtin_return_address(0));
}
write_unlock(&hash_table_lock);
if (slept)
goto out;
}
}
out:
spin_unlock(&lru_list_lock);
if (slept)
goto retry;
invalidate_buffers(dev);
}
static void free_more_memory(void)
......@@ -1137,57 +1043,16 @@ void invalidate_inode_buffers(struct inode *inode)
*/
struct buffer_head * getblk(kdev_t dev, int block, int size)
{
for (;;) {
struct buffer_head * bh;
int isize;
repeat:
spin_lock(&lru_list_lock);
write_lock(&hash_table_lock);
bh = __get_hash_table(dev, block, size);
bh = get_hash_table(dev, block, size);
if (bh)
goto out;
isize = BUFSIZE_INDEX(size);
spin_lock(&free_list[isize].lock);
bh = free_list[isize].list;
if (bh) {
__remove_from_free_list(bh, isize);
atomic_set(&bh->b_count, 1);
}
spin_unlock(&free_list[isize].lock);
/*
* OK, FINALLY we know that this buffer is the only one of
* its kind, we hold a reference (b_count>0), it is unlocked,
* and it is clean.
*/
if (bh) {
init_buffer(bh, NULL, NULL);
bh->b_dev = dev;
bh->b_blocknr = block;
bh->b_state = 1 << BH_Mapped;
/* Insert the buffer into the regular lists */
__insert_into_queues(bh);
out:
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
touch_buffer(bh);
return bh;
}
/*
* If we block while refilling the free list, somebody may
* create the buffer first ... search the hashes again.
*/
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
if (!grow_buffers(size))
if (!grow_buffers(dev, block, size))
free_more_memory();
/* FIXME: getblk should fail if there's no enough memory */
goto repeat;
}
}
/* -1 -> no need to flush
......@@ -1313,22 +1178,7 @@ void __brelse(struct buffer_head * buf)
*/
void __bforget(struct buffer_head * buf)
{
/* grab the lru lock here to block bdflush. */
spin_lock(&lru_list_lock);
write_lock(&hash_table_lock);
if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
goto in_use;
__hash_unlink(buf);
write_unlock(&hash_table_lock);
remove_inode_queue(buf);
__remove_from_lru_list(buf, buf->b_list);
spin_unlock(&lru_list_lock);
put_last_free(buf);
return;
in_use:
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
__brelse(buf);
}
/**
......@@ -1524,17 +1374,17 @@ static struct buffer_head * create_buffers(struct page * page, unsigned long siz
goto try_again;
}
static void unmap_buffer(struct buffer_head * bh)
/*
* Called when truncating a buffer on a page completely.
*
* We can avoid IO by marking it clean.
* FIXME!! FIXME!! FIXME!! We need to unmap it too,
* so that the filesystem won't write to it. There's
* some bug somewhere..
*/
static void discard_buffer(struct buffer_head * bh)
{
if (buffer_mapped(bh)) {
mark_buffer_clean(bh);
lock_buffer(bh);
clear_bit(BH_Uptodate, &bh->b_state);
clear_bit(BH_Mapped, &bh->b_state);
clear_bit(BH_Req, &bh->b_state);
clear_bit(BH_New, &bh->b_state);
unlock_buffer(bh);
}
}
/*
......@@ -1564,7 +1414,7 @@ int discard_bh_page(struct page *page, unsigned long offset, int drop_pagecache)
* is this block fully flushed?
*/
if (offset <= curr_off)
unmap_buffer(bh);
discard_buffer(bh);
curr_off = next_off;
bh = next;
} while (bh != head);
......@@ -2141,47 +1991,6 @@ int generic_block_bmap(struct address_space *mapping, long block, get_block_t *g
return tmp.b_blocknr;
}
int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block)
{
int i, nr_blocks, retval;
unsigned long * blocks = iobuf->blocks;
nr_blocks = iobuf->length / blocksize;
/* build the blocklist */
for (i = 0; i < nr_blocks; i++, blocknr++) {
struct buffer_head bh;
bh.b_state = 0;
bh.b_dev = inode->i_dev;
bh.b_size = blocksize;
retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
if (retval)
goto out;
if (rw == READ) {
if (buffer_new(&bh))
BUG();
if (!buffer_mapped(&bh)) {
/* there was an hole in the filesystem */
blocks[i] = -1UL;
continue;
}
} else {
if (buffer_new(&bh))
unmap_underlying_metadata(&bh);
if (!buffer_mapped(&bh))
BUG();
}
blocks[i] = bh.b_blocknr;
}
retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
out:
return retval;
}
/*
* IO completion routine for a buffer_head being used for kiobuf IO: we
* can't dispatch the kiobuf callback until io_count reaches 0.
......@@ -2447,67 +2256,125 @@ int block_symlink(struct inode *inode, const char *symname, int len)
return err;
}
/*
* Create the page-cache page that contains the requested block
*/
static struct page * grow_dev_page(struct block_device *bdev, unsigned long index, int size)
{
struct page * page;
page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS);
if (IS_ERR(page))
return NULL;
if (!PageLocked(page))
BUG();
if (!page->buffers) {
struct buffer_head *bh, *tail;
struct buffer_head *head = create_buffers(page, size, 0);
if (!head)
goto failed;
bh = head;
do {
tail = bh;
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
page->buffers = head;
page_cache_get(page);
atomic_inc(&buffermem_pages);
}
return page;
failed:
UnlockPage(page);
page_cache_release(page);
return NULL;
}
static void hash_page_buffers(struct page *page, kdev_t dev, int block, int size)
{
struct buffer_head *head = page->buffers;
struct buffer_head *bh = head;
unsigned int uptodate;
uptodate = 1 << BH_Mapped;
if (Page_Uptodate(page))
uptodate |= 1 << BH_Uptodate;
spin_lock(&lru_list_lock);
write_lock(&hash_table_lock);
do {
if (!(bh->b_state & (1 << BH_Mapped))) {
init_buffer(bh, NULL, NULL);
bh->b_dev = dev;
bh->b_blocknr = block;
bh->b_state = uptodate;
}
/* Insert the buffer into the regular lists */
if (!bh->b_pprev) {
__insert_into_queues(bh);
}
block++;
bh = bh->b_this_page;
} while (bh != head);
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
}
/*
* Try to increase the number of buffers available: the size argument
* is used to determine what kind of buffers we want.
*/
static int grow_buffers(int size)
static int grow_buffers(kdev_t dev, int block, int size)
{
struct page * page;
struct buffer_head *bh, *tmp;
struct buffer_head * insert_point;
int isize;
struct block_device *bdev;
unsigned long index;
int sizebits;
if ((size & 511) || (size > PAGE_SIZE)) {
printk(KERN_ERR "VFS: grow_buffers: size = %d\n",size);
return 0;
}
sizebits = -1;
do {
sizebits++;
} while ((size << sizebits) < PAGE_SIZE);
page = alloc_page(GFP_NOFS);
if (!page)
goto out;
LockPage(page);
bh = create_buffers(page, size, 0);
if (!bh)
goto no_buffer_head;
isize = BUFSIZE_INDEX(size);
index = block >> sizebits;
block = index << sizebits;
spin_lock(&free_list[isize].lock);
insert_point = free_list[isize].list;
tmp = bh;
while (1) {
if (insert_point) {
tmp->b_next_free = insert_point->b_next_free;
tmp->b_prev_free = insert_point;
insert_point->b_next_free->b_prev_free = tmp;
insert_point->b_next_free = tmp;
} else {
tmp->b_prev_free = tmp;
tmp->b_next_free = tmp;
}
insert_point = tmp;
if (tmp->b_this_page)
tmp = tmp->b_this_page;
else
break;
bdev = bdget(kdev_t_to_nr(dev));
if (!bdev) {
printk("No block device for %s\n", kdevname(dev));
BUG();
}
tmp->b_this_page = bh;
free_list[isize].list = bh;
spin_unlock(&free_list[isize].lock);
page->buffers = bh;
page->flags &= ~(1 << PG_referenced);
lru_cache_add(page);
UnlockPage(page);
atomic_inc(&buffermem_pages);
return 1;
/* Create a page with the proper size buffers.. */
page = grow_dev_page(bdev, index, size);
/* This is "wrong" - talk to Al Viro */
atomic_dec(&bdev->bd_count);
if (!page)
return 0;
no_buffer_head:
/* Hash in the buffers on the hash list */
hash_page_buffers(page, dev, block, size);
UnlockPage(page);
page_cache_release(page);
out:
return 0;
return 1;
}
static void truncate_buffers(kdev_t dev)
{
struct block_device *bdev = bdget(kdev_t_to_nr(dev));
truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
atomic_dec(&bdev->bd_count);
}
static int sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask)
......
......@@ -586,10 +586,6 @@ static int ext2_bmap(struct address_space *mapping, long block)
{
return generic_block_bmap(mapping,block,ext2_get_block);
}
static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
{
return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
}
struct address_space_operations ext2_aops = {
readpage: ext2_readpage,
writepage: ext2_writepage,
......@@ -597,7 +593,6 @@ struct address_space_operations ext2_aops = {
prepare_write: ext2_prepare_write,
commit_write: generic_commit_write,
bmap: ext2_bmap,
direct_IO: ext2_direct_IO,
};
/*
......
......@@ -203,15 +203,4 @@ static inline int get_hardsect_size(kdev_t dev)
#define blk_finished_io(nsects) do { } while (0)
#define blk_started_io(nsects) do { } while (0)
static inline int buffered_blk_size(kdev_t dev)
{
int ret = INT_MAX;
int major = MAJOR(dev);
if (blk_size[major])
ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS);
return ret;
}
#endif
......@@ -46,10 +46,6 @@ struct poll_table_struct;
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
/* buffer header fixed size for the blkdev I/O through pagecache */
#define BUFFERED_BLOCKSIZE_BITS 10
#define BUFFERED_BLOCKSIZE (1 << BUFFERED_BLOCKSIZE_BITS)
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
int nr_files; /* read only */
......@@ -1174,14 +1170,9 @@ extern int invalidate_device(kdev_t, int);
extern void invalidate_inode_pages(struct inode *);
extern void invalidate_inode_pages2(struct address_space *);
extern void invalidate_inode_buffers(struct inode *);
#define invalidate_buffers(dev) __invalidate_buffers((dev), 0, 0)
#define destroy_buffers(dev) __invalidate_buffers((dev), 1, 0)
#define update_buffers(dev) \
do { \
__invalidate_buffers((dev), 0, 1); \
__invalidate_buffers((dev), 0, 2); \
} while (0)
extern void __invalidate_buffers(kdev_t dev, int, int);
#define invalidate_buffers(dev) __invalidate_buffers((dev), 0)
#define destroy_buffers(dev) __invalidate_buffers((dev), 1)
extern void __invalidate_buffers(kdev_t dev, int);
extern void sync_inodes(kdev_t);
extern void sync_unlocked_inodes(void);
extern void write_inode_now(struct inode *, int);
......@@ -1367,7 +1358,6 @@ extern int block_sync_page(struct page *);
int generic_block_bmap(struct address_space *, long, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
extern int waitfor_one_page(struct page*);
......
......@@ -76,6 +76,9 @@ extern struct page * __find_get_page(struct address_space *mapping,
__find_get_page(mapping, index, page_hash(mapping, index))
extern struct page * __find_lock_page (struct address_space * mapping,
unsigned long index, struct page **hash);
extern struct page * find_or_create_page(struct address_space *mapping,
unsigned long index, unsigned int gfp_mask);
extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \
__find_lock_page(mapping, index, page_hash(mapping, index))
......
......@@ -131,6 +131,7 @@ extern struct page * read_swap_cache_async(swp_entry_t);
extern void oom_kill(void);
/* linux/mm/swapfile.c */
extern int total_swap_pages;
extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t);
......
......@@ -210,7 +210,6 @@ EXPORT_SYMBOL(waitfor_one_page);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_direct_IO);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_ro_fops);
EXPORT_SYMBOL(generic_buffer_fdatasync);
......
......@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/iobuf.h>
#include <linux/compiler.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
......@@ -56,6 +57,7 @@ spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p));
static void add_page_to_hash_queue(struct page * page, struct page **p)
{
struct page *next = *p;
......@@ -792,11 +794,13 @@ struct page * __find_get_page(struct address_space *mapping,
}
/*
* Same as the above, but lock the page too, verifying that
* it's still valid once we own it.
* Must be called with the pagecache lock held,
* will return with it held (but it may be dropped
* during blocking operations..
*/
struct page * __find_lock_page (struct address_space *mapping,
unsigned long offset, struct page **hash)
static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *));
static struct page * __find_lock_page_helper(struct address_space *mapping,
unsigned long offset, struct page *hash)
{
struct page *page;
......@@ -805,27 +809,72 @@ struct page * __find_lock_page (struct address_space *mapping,
* the hash-list needs a held write-lock.
*/
repeat:
spin_lock(&pagecache_lock);
page = __find_page_nolock(mapping, offset, *hash);
page = __find_page_nolock(mapping, offset, hash);
if (page) {
page_cache_get(page);
if (TryLockPage(page)) {
spin_unlock(&pagecache_lock);
lock_page(page);
spin_lock(&pagecache_lock);
/* Is the page still hashed? Ok, good.. */
if (page->mapping == mapping && page->index == offset)
return page;
/* Nope: we raced. Release and try again.. */
/* Has the page been re-allocated while we slept? */
if (page->mapping != mapping || page->index != offset) {
UnlockPage(page);
page_cache_release(page);
goto repeat;
}
}
}
return page;
}
/*
* Same as the above, but lock the page too, verifying that
* it's still valid once we own it.
*/
struct page * __find_lock_page (struct address_space *mapping,
unsigned long offset, struct page **hash)
{
struct page *page;
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, offset, *hash);
spin_unlock(&pagecache_lock);
return page;
}
/*
* Same as above, but create the page if required..
*/
struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask)
{
struct page *page;
struct page **hash = page_hash(mapping, index);
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, index, *hash);
spin_unlock(&pagecache_lock);
if (!page) {
struct page *newpage = alloc_page(gfp_mask);
page = ERR_PTR(-ENOMEM);
if (newpage) {
spin_lock(&pagecache_lock);
page = __find_lock_page_helper(mapping, index, *hash);
if (likely(!page)) {
page = newpage;
__add_to_page_cache(page, mapping, index, hash);
newpage = NULL;
}
spin_unlock(&pagecache_lock);
return NULL;
if (unlikely(newpage != NULL))
page_cache_release(newpage);
}
}
return page;
}
#if 0
#define PROFILE_READAHEAD
#define DEBUG_READAHEAD
......@@ -960,10 +1009,7 @@ static inline unsigned long calc_end_index(struct inode * inode)
{
unsigned long end_index;
if (!S_ISBLK(inode->i_mode))
end_index = inode->i_size >> PAGE_CACHE_SHIFT;
else
end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
return end_index;
}
......@@ -972,10 +1018,7 @@ static inline loff_t calc_rsize(struct inode * inode)
{
loff_t rsize;
if (!S_ISBLK(inode->i_mode))
rsize = inode->i_size;
else
rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
return rsize;
}
......@@ -1316,92 +1359,6 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
UPDATE_ATIME(inode);
}
static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
{
ssize_t retval;
int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
struct kiobuf * iobuf;
struct inode * inode = filp->f_dentry->d_inode;
struct address_space * mapping = inode->i_mapping;
new_iobuf = 0;
iobuf = filp->f_iobuf;
if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
/*
* A parallel read/write is using the preallocated iobuf
* so just run slow and allocate a new one.
*/
retval = alloc_kiovec(1, &iobuf);
if (retval)
goto out;
new_iobuf = 1;
}
if (!S_ISBLK(inode->i_mode)) {
blocksize = inode->i_sb->s_blocksize;
blocksize_bits = inode->i_sb->s_blocksize_bits;
} else {
blocksize = BUFFERED_BLOCKSIZE;
blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
}
blocksize_mask = blocksize - 1;
chunk_size = KIO_MAX_ATOMIC_IO << 10;
retval = -EINVAL;
if ((offset & blocksize_mask) || (count & blocksize_mask))
goto out_free;
if (!mapping->a_ops->direct_IO)
goto out_free;
/*
* Flush to disk exlusively the _data_, metadata must remains
* completly asynchronous or performance will go to /dev/null.
*/
filemap_fdatasync(mapping);
retval = fsync_inode_data_buffers(inode);
filemap_fdatawait(mapping);
if (retval < 0)
goto out_free;
progress = retval = 0;
while (count > 0) {
iosize = count;
if (iosize > chunk_size)
iosize = chunk_size;
retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
if (retval)
break;
retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
if (rw == READ && retval > 0)
mark_dirty_kiobuf(iobuf, retval);
if (retval >= 0) {
count -= retval;
buf += retval;
progress += retval;
}
unmap_kiobuf(iobuf);
if (retval != iosize)
break;
}
if (progress)
retval = progress;
out_free:
if (!new_iobuf)
clear_bit(0, &filp->f_iobuf_lock);
else
free_kiovec(1, &iobuf);
out:
return retval;
}
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
char *kaddr;
......@@ -1435,9 +1392,6 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
if ((ssize_t) count < 0)
return -EINVAL;
if (filp->f_flags & O_DIRECT)
goto o_direct;
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
retval = 0;
......@@ -1456,28 +1410,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
retval = desc.error;
}
}
out:
return retval;
o_direct:
{
loff_t pos = *ppos, size;
struct inode * inode = filp->f_dentry->d_inode;
retval = 0;
if (!count)
goto out; /* skip atime */
size = calc_rsize(inode);
if (pos < size) {
if (pos + count > size)
count = size - pos;
retval = generic_file_direct_IO(READ, filp, buf, count, pos);
if (retval > 0)
*ppos = pos + retval;
}
UPDATE_ATIME(filp->f_dentry->d_inode);
goto out;
}
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
......@@ -2778,9 +2711,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
if (file->f_flags & O_DIRECT)
goto o_direct;
do {
unsigned long index, offset;
long page_fault;
......@@ -2855,7 +2785,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
if ((status >= 0) && (file->f_flags & O_SYNC))
status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
out_status:
err = written ? written : status;
out:
......@@ -2864,25 +2793,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
fail_write:
status = -EFAULT;
goto unlock;
o_direct:
written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
if (written > 0) {
loff_t end = pos + written;
if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
inode->i_size = end;
mark_inode_dirty(inode);
}
*ppos = end;
invalidate_inode_pages2(mapping);
}
/*
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
*/
if (written >= 0 && file->f_flags & O_SYNC)
status = generic_osync_inode(inode, OSYNC_METADATA);
goto out_status;
}
void __init page_cache_init(unsigned long mempages)
......
......@@ -1101,6 +1101,10 @@ void swapin_readahead(swp_entry_t entry)
return;
}
/* Swap 80% full? Release the pages as they are paged in.. */
#define vm_swap_full() \
(swapper_space.nrpages*5 > total_swap_pages*4)
/*
* We hold the mm semaphore and the page_table_lock on entry and exit.
*/
......@@ -1158,11 +1162,13 @@ static int do_swap_page(struct mm_struct * mm,
swap_free(entry);
mark_page_accessed(page);
if (exclusive_swap_page(page)) {
if (write_access || vm_swap_full()) {
pte = pte_mkdirty(pte);
if (vma->vm_flags & VM_WRITE)
pte = pte_mkwrite(pte);
pte = pte_mkdirty(pte);
delete_from_swap_cache(page);
}
}
UnlockPage(page);
flush_page_to_ram(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment