Commit e2f6721a authored by Linus Torvalds's avatar Linus Torvalds

v2.4.9.14 -> v2.4.9.15

  - Jan Harkes: make Coda work with arbitrary host filesystems, not
  just filesystems that use generic_file_read/write
  - Al Viro: block device cleanups
  - Hugh Dickins: swap device lock fixes - fix swap readahead race
  - me, Andrea: more reference bit cleanups
parent 269f8f70
VERSION = 2
PATCHLEVEL = 4
SUBLEVEL = 10
EXTRAVERSION =-pre14
EXTRAVERSION =-pre15
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
......
......@@ -491,7 +491,6 @@ static void __exit rd_cleanup (void)
bdev->bd_cache_openers--;
truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
blkdev_put(bdev, BDEV_FILE);
bdput(bdev);
}
destroy_buffers(MKDEV(MAJOR_NR, i));
}
......@@ -780,7 +779,7 @@ static void __init rd_load_image(kdev_t device, int offset, int unit)
if (i && (i % devblocks == 0)) {
printk("done disk #%d.\n", i/devblocks);
rotate = 0;
if (blkdev_close(inode, &infile) != 0) {
if (infile.f_op->release(inode, &infile) != 0) {
printk("Error closing the disk.\n");
goto noclose_input;
}
......
......@@ -103,6 +103,7 @@ int raw_open(struct inode *inode, struct file *filp)
if (!bdev)
goto out;
atomic_inc(&bdev->bd_count);
rdev = to_kdev_t(bdev->bd_dev);
err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW);
if (err)
......
......@@ -270,6 +270,8 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
return 0;
}
EXPORT_SYMBOL(tty_register_ldisc);
/* Set the discipline of a tty line. */
static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
{
......
......@@ -279,6 +279,7 @@ static void __init probedisk(int major, int minor,int device)
int i;
struct highpoint_raid_conf *prom;
static unsigned char block[4096];
struct block_device *bdev;
if (maxsectors(major,minor)==0)
return;
......@@ -301,12 +302,12 @@ static void __init probedisk(int major, int minor,int device)
if (i>8)
return;
raid[device].disk[i].bdev = bdget(MKDEV(major,minor));
if (raid[device].disk[i].bdev != NULL) {
bdev = bdget(MKDEV(major,minor));
if (bdev && blkdev_get(bdev,FMODE_READ|FMODE_WRITE,0,BDEV_RAW) == 0) {
int j=0;
struct gendisk *gd;
/* This is supposed to prevent others from stealing our underlying disks */
blkdev_get(raid[device].disk[i].bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW);
raid[device].disk[i].bdev = bdev;
/* now blank the /proc/partitions table for the wrong partition table,
so that scripts don't accidentally mount it and crash the kernel */
/* XXX: the 0 is an utter hack --hch */
......@@ -408,12 +409,12 @@ static void __exit hptraid_exit (void)
{
int i,device;
for (device = 0; device<16; device++) {
for (i=0;i<8;i++)
if (raid[device].disk[i].bdev) {
blkdev_put(raid[device].disk[i].bdev, BDEV_RAW);
bdput(raid[device].disk[i].bdev);
raid[device].disk[i].bdev = NULL;
}
for (i=0;i<8;i++) {
struct block_device *bdev = raid[device].disk[i].bdev;
raid[device].disk[i].bdev = NULL;
if (bdev)
blkdev_put(bdev, BDEV_RAW);
}
if (raid[device].sectors)
ataraid_release_device(device);
}
......
......@@ -311,12 +311,12 @@ static void __init probedisk(int major, int minor,int device)
for (i=0;(i<prom->raid.total_disks)&&(i<8);i++) {
if ( (prom->raid.disk[i].channel== prom->raid.channel) &&
(prom->raid.disk[i].device == prom->raid.device) ) {
raid[device].disk[i].bdev = bdget(MKDEV(major,minor));
if (raid[device].disk[i].bdev != NULL) {
struct block_device *bdev = bdget(MKDEV(major,minor));
if (bdev && blkdev_get(bdev,FMODE_READ|FMODE_WRITE,0,BDEV_RAW) == 0) {
struct gendisk *gd;
int j;
/* This is supposed to prevent others from stealing our underlying disks */
blkdev_get(raid[device].disk[i].bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW);
raid[device].disk[i].bdev = bdev;
gd=get_gendisk(major);
if (gd!=NULL) {
for (j=1+(minor<<gd->minor_shift);j<((minor+1)<<gd->minor_shift);j++)
......@@ -418,13 +418,12 @@ static void __exit pdcraid_exit (void)
{
int i,device;
for (device = 0; device<16; device++) {
for (i=0;i<8;i++)
if (raid[device].disk[i].bdev) {
blkdev_put(raid[device].disk[i].bdev, BDEV_RAW);
bdput(raid[device].disk[i].bdev);
raid[device].disk[i].bdev = NULL;
}
for (i=0;i<8;i++) {
struct block_device *bdev = raid[device].disk[i].bdev;
raid[device].disk[i].bdev = NULL;
if (bdev)
blkdev_put(bdev, BDEV_RAW);
}
if (raid[device].sectors)
ataraid_release_device(device);
}
......
......@@ -649,11 +649,11 @@ static int lock_rdev (mdk_rdev_t *rdev)
static void unlock_rdev (mdk_rdev_t *rdev)
{
if (!rdev->bdev)
MD_BUG();
blkdev_put(rdev->bdev, BDEV_RAW);
bdput(rdev->bdev);
struct block_device *bdev = rdev->bdev;
rdev->bdev = NULL;
if (!bdev)
MD_BUG();
blkdev_put(bdev, BDEV_RAW);
}
void md_autodetect_dev (kdev_t dev);
......
......@@ -404,7 +404,6 @@ static int get_inode(struct block_device *bdev)
if (!inode)
return -ENOMEM;
inode->i_rdev = to_kdev_t(bdev->bd_dev);
atomic_inc(&bdev->bd_count); /* will go away */
inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops;
bdev->bd_inode = inode;
......@@ -437,6 +436,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
{
memset(bdev, 0, sizeof(*bdev));
sema_init(&bdev->bd_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
}
}
......@@ -522,17 +522,58 @@ struct block_device *bdget(dev_t dev)
void bdput(struct block_device *bdev)
{
if (atomic_dec_and_test(&bdev->bd_count)) {
if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
struct list_head *p;
if (bdev->bd_openers)
BUG();
if (bdev->bd_cache_openers)
BUG();
spin_lock(&bdev_lock);
list_del(&bdev->bd_hash);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
struct inode *inode;
inode = list_entry(p, struct inode, i_devices);
list_del_init(p);
inode->i_bdev = NULL;
}
spin_unlock(&bdev_lock);
destroy_bdev(bdev);
}
}
int bd_acquire(struct inode *inode)
{
struct block_device *bdev;
spin_lock(&bdev_lock);
if (inode->i_bdev) {
atomic_inc(&inode->i_bdev->bd_count);
spin_unlock(&bdev_lock);
return 0;
}
spin_unlock(&bdev_lock);
bdev = bdget(kdev_t_to_nr(inode->i_rdev));
if (!bdev)
return -ENOMEM;
spin_lock(&bdev_lock);
if (!inode->i_bdev) {
inode->i_bdev = bdev;
list_add(&inode->i_devices, &bdev->bd_inodes);
} else if (inode->i_bdev != bdev)
BUG();
spin_unlock(&bdev_lock);
return 0;
}
/* Call when you free inode */
void bd_forget(struct inode *inode)
{
spin_lock(&bdev_lock);
if (inode->i_bdev) {
list_del_init(&inode->i_devices);
inode->i_bdev = NULL;
}
spin_unlock(&bdev_lock);
}
static struct {
const char *name;
......@@ -706,13 +747,15 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
}
unlock_kernel();
up(&bdev->bd_sem);
if (ret)
bdput(bdev);
return ret;
}
int blkdev_open(struct inode * inode, struct file * filp)
{
int ret = -ENXIO;
struct block_device *bdev = inode->i_bdev;
int ret;
struct block_device *bdev;
/*
* Preserve backwards compatibility and allow large file access
......@@ -722,13 +765,15 @@ int blkdev_open(struct inode * inode, struct file * filp)
*/
filp->f_flags |= O_LARGEFILE;
bd_acquire(inode);
bdev = inode->i_bdev;
down(&bdev->bd_sem);
if (get_inode(bdev)) {
up(&bdev->bd_sem);
return -ENOMEM;
}
ret = get_inode(bdev);
if (ret)
goto out;
ret = -ENXIO;
lock_kernel();
if (!bdev->bd_op)
bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
......@@ -749,7 +794,10 @@ int blkdev_open(struct inode * inode, struct file * filp)
}
}
unlock_kernel();
out:
up(&bdev->bd_sem);
if (ret)
bdput(bdev);
return ret;
}
......@@ -777,6 +825,7 @@ int blkdev_put(struct block_device *bdev, int kind)
}
unlock_kernel();
up(&bdev->bd_sem);
bdput(bdev);
return ret;
}
......@@ -841,6 +890,7 @@ int blkdev_close(struct inode * inode, struct file * filp)
}
unlock_kernel();
up(&bdev->bd_sem);
bdput(bdev);
return ret;
}
......
......@@ -31,28 +31,65 @@
int use_coda_close;
static ssize_t
coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
coda_file_read(struct file *file, char *buf, size_t count, loff_t *ppos)
{
struct inode *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
struct file *cfile;
cfile = cii->c_container;
if (!cfile) BUG();
if (!cfile->f_op || !cfile->f_op->read)
return -EINVAL;
return cfile->f_op->read(cfile, buf, count, ppos);
}
static ssize_t
coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
{
struct inode *cinode, *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
ssize_t n;
struct file *cfile;
ssize_t ret;
int flags;
cfile = cii->c_container;
if (!cfile) BUG();
if (!cfile->f_op || cfile->f_op->write != generic_file_write)
BUG();
if (!cfile->f_op || !cfile->f_op->write)
return -EINVAL;
cinode = cfile->f_dentry->d_inode;
down(&cinode->i_sem);
down(&inode->i_sem);
flags = cfile->f_flags;
cfile->f_flags |= file->f_flags & (O_APPEND | O_SYNC);
n = generic_file_write(file, buf, count, ppos);
ret = cfile->f_op->write(cfile, buf, count, ppos);
cfile->f_flags = flags;
inode->i_size = cinode->i_size;
up(&inode->i_sem);
return ret;
}
static int
coda_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
struct file *cfile;
cfile = cii->c_container;
if (!cfile) BUG();
up(&cinode->i_sem);
if (!cfile->f_op || !cfile->f_op->mmap)
return -ENODEV;
return n;
return cfile->f_op->mmap(cfile, vma);
}
int coda_open(struct inode *i, struct file *f)
......@@ -237,9 +274,9 @@ int coda_fsync(struct file *file, struct dentry *dentry, int datasync)
struct file_operations coda_file_operations = {
llseek: generic_file_llseek,
read: generic_file_read,
read: coda_file_read,
write: coda_file_write,
mmap: generic_file_mmap,
mmap: coda_file_mmap,
open: coda_open,
flush: coda_flush,
release: coda_release,
......
......@@ -414,7 +414,7 @@ MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>");
static int __init init_coda(void)
{
int status;
printk(KERN_INFO "Coda Kernel/Venus communications, v5.3.14, coda@cs.cmu.edu\n");
printk(KERN_INFO "Coda Kernel/Venus communications, v5.3.15, coda@cs.cmu.edu\n");
status = init_coda_psdev();
if ( status ) {
......
......@@ -2291,9 +2291,16 @@ static int devfs_statfs (struct super_block *sb, struct statfs *buf)
return 0;
} /* End Function devfs_statfs */
static void devfs_clear_inode(struct inode *inode)
{
if (S_ISBLK(inode->i_mode))
bdput(inode->i_bdev);
}
static struct super_operations devfs_sops =
{
put_inode: force_delete,
clear_inode: devfs_clear_inode,
statfs: devfs_statfs,
};
......@@ -2351,9 +2358,7 @@ static struct inode *get_vfs_inode (struct super_block *sb,
{
inode->i_rdev = MKDEV (de->u.fcb.u.device.major,
de->u.fcb.u.device.minor);
inode->i_bdev = bdget ( kdev_t_to_nr (inode->i_rdev) );
inode->i_mapping->a_ops = &def_blk_aops;
if (inode->i_bdev)
if (bd_acquire(inode) == 0)
{
if (!inode->i_bdev->bd_op && de->u.fcb.ops)
inode->i_bdev->bd_op = de->u.fcb.ops;
......
......@@ -207,7 +207,6 @@ void init_special_inode(struct inode *inode, umode_t mode, int rdev)
} else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops;
inode->i_rdev = to_kdev_t(rdev);
inode->i_bdev = bdget(rdev);
} else if (S_ISFIFO(mode))
inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode))
......
......@@ -106,6 +106,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_dirty_buffers);
INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
INIT_LIST_HEAD(&inode->i_devices);
sema_init(&inode->i_sem, 1);
sema_init(&inode->i_zombie, 1);
spin_lock_init(&inode->i_data.i_shared_lock);
......@@ -516,11 +517,9 @@ void clear_inode(struct inode *inode)
DQUOT_DROP(inode);
if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
inode->i_sb->s_op->clear_inode(inode);
if (inode->i_bdev) {
bdput(inode->i_bdev);
inode->i_bdev = NULL;
}
if (inode->i_cdev) {
if (inode->i_bdev)
bd_forget(inode);
else if (inode->i_cdev) {
cdput(inode->i_cdev);
inode->i_cdev = NULL;
}
......
......@@ -47,9 +47,10 @@ get_drive_geometry(int kdev,struct hd_geometry *geo)
{
struct block_device *bdev = bdget(kdev_t_to_nr(kdev));
int rc = blkdev_get(bdev, 0, 1, BDEV_FILE);
if ( rc == 0 )
if ( rc == 0 ) {
rc = ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo);
blkdev_put(bdev,BDEV_FILE);
blkdev_put(bdev, BDEV_FILE);
}
return rc;
}
......@@ -58,9 +59,10 @@ get_drive_info(int kdev,dasd_information_t *info)
{
struct block_device *bdev = bdget(kdev_t_to_nr(kdev));
int rc = blkdev_get(bdev, 0, 1, BDEV_FILE);
if ( rc == 0 )
if ( rc == 0 ) {
rc = ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)(info));
blkdev_put(bdev,BDEV_FILE);
blkdev_put(bdev, BDEV_FILE);
}
return rc;
}
......
......@@ -925,6 +925,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
error = -EACCES;
if (nd.mnt->mnt_flags & MNT_NODEV)
goto out;
bd_acquire(inode);
bdev = inode->i_bdev;
bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
if (bdops) bdev->bd_op = bdops;
......@@ -982,8 +983,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
if (!fs_type->read_super(s, data, 0))
goto out_fail;
unlock_super(s);
/* tell bdcache that we are going to keep this one */
atomic_inc(&bdev->bd_count);
get_filesystem(fs_type);
path_release(&nd);
return s;
......@@ -1128,10 +1127,9 @@ static void kill_super(struct super_block *sb)
sb->s_type = NULL;
unlock_super(sb);
unlock_kernel();
if (bdev) {
if (bdev)
blkdev_put(bdev, BDEV_FS);
bdput(bdev);
} else
else
put_unnamed_dev(dev);
spin_lock(&sb_lock);
list_del(&sb->s_list);
......@@ -1718,6 +1716,7 @@ void __init mount_root(void)
if (!ROOT_DEV)
panic("I have no root and I want to scream");
retry:
bdev = bdget(kdev_t_to_nr(ROOT_DEV));
if (!bdev)
panic(__FUNCTION__ ": unable to allocate root device");
......@@ -1729,7 +1728,7 @@ void __init mount_root(void)
retval = blkdev_get(bdev, mode, 0, BDEV_FS);
if (retval == -EROFS) {
root_mountflags |= MS_RDONLY;
retval = blkdev_get(bdev, FMODE_READ, 0, BDEV_FS);
goto retry;
}
if (retval) {
/*
......@@ -1977,6 +1976,7 @@ int __init change_root(kdev_t new_root_dev,const char *put_old)
int blivet;
struct block_device *ramdisk = old_rootmnt->mnt_sb->s_bdev;
atomic_inc(&ramdisk->bd_count);
blivet = blkdev_get(ramdisk, FMODE_READ, 0, BDEV_FS);
printk(KERN_NOTICE "Trying to unmount old root ... ");
if (!blivet) {
......
......@@ -415,6 +415,7 @@ struct block_device {
int bd_cache_openers;
const struct block_device_operations *bd_op;
struct semaphore bd_sem; /* open/close mutex */
struct list_head bd_inodes;
};
struct inode {
......@@ -452,6 +453,7 @@ struct inode {
int i_mapping_overload;
struct dquot *i_dquot[MAXQUOTAS];
/* These three should probably be a union */
struct list_head i_devices;
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct char_device *i_cdev;
......@@ -1046,6 +1048,8 @@ enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW};
extern int register_blkdev(unsigned int, const char *, struct block_device_operations *);
extern int unregister_blkdev(unsigned int, const char *);
extern struct block_device *bdget(dev_t);
extern int bd_acquire(struct inode *inode);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
extern struct char_device *cdget(dev_t);
extern void cdput(struct char_device *);
......
......@@ -274,7 +274,6 @@ typedef struct page {
#define PG_active 6
#define PG_inactive 7
#define PG_slab 8
#define PG_swap_cache 9
#define PG_skip 10
#define PG_highmem 11
#define PG_checked 12 /* kill me in 2.5.<early>. */
......@@ -326,18 +325,9 @@ static inline void set_page_dirty(struct page * page)
#define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags)
#define PageTestandClearDecrAfter(page) test_and_clear_bit(PG_decr_after, &(page)->flags)
#define PageSlab(page) test_bit(PG_slab, &(page)->flags)
#define PageSwapCache(page) test_bit(PG_swap_cache, &(page)->flags)
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags)
#define PageSetSwapCache(page) set_bit(PG_swap_cache, &(page)->flags)
#define PageTestandSetSwapCache(page) test_and_set_bit(PG_swap_cache, &(page)->flags)
#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
#define PageClearSwapCache(page) clear_bit(PG_swap_cache, &(page)->flags)
#define PageTestandClearSwapCache(page) test_and_clear_bit(PG_swap_cache, &(page)->flags)
#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define PageActive(page) test_bit(PG_active, &(page)->flags)
#define SetPageActive(page) set_bit(PG_active, &(page)->flags)
......@@ -465,6 +455,9 @@ extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern void swapin_readahead(swp_entry_t);
extern struct address_space swapper_space;
#define PageSwapCache(page) ((page)->mapping == &swapper_space)
static inline int is_page_cache_freeable(struct page * page)
{
return page_count(page) - !!page->buffers == 1;
......@@ -476,15 +469,13 @@ static inline int is_page_cache_freeable(struct page * page)
*/
static inline int exclusive_swap_page(struct page *page)
{
unsigned int count;
if (!PageLocked(page))
BUG();
if (!PageSwapCache(page))
return 0;
count = page_count(page) - !!page->buffers; /* 2: us + swap cache */
count += swap_count(page); /* +1: just swap cache */
return count == 3; /* =3: total */
if (page_count(page) - !!page->buffers != 2) /* 2: us + cache */
return 0;
return swap_count(page) == 1; /* 1: just cache */
}
extern void __free_pte(pte_t);
......@@ -565,11 +556,10 @@ extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO \
| __GFP_FS | __GFP_HIGHMEM)
#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KSWAPD ( __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
......
......@@ -84,7 +84,6 @@ extern unsigned int nr_free_buffer_pages(void);
extern int nr_active_pages;
extern int nr_inactive_pages;
extern atomic_t nr_async_pages;
extern struct address_space swapper_space;
extern atomic_t page_cache_size;
extern atomic_t buffermem_pages;
extern spinlock_t pagecache_lock;
......@@ -122,35 +121,27 @@ extern void rw_swap_page_nolock(int, swp_entry_t, char *);
/* linux/mm/swap_state.c */
extern void show_swap_cache_info(void);
extern void add_to_swap_cache(struct page *, swp_entry_t);
extern int swap_check_entry(unsigned long);
extern void __delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache(struct page *page);
extern void free_page_and_swap_cache(struct page *page);
extern struct page * lookup_swap_cache(swp_entry_t);
extern struct page * read_swap_cache_async(swp_entry_t);
/* linux/mm/oom_kill.c */
extern void oom_kill(void);
/*
* Make these inline later once they are working properly.
*/
extern void __delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache_nolock(struct page *page);
extern void free_page_and_swap_cache(struct page *page);
/* linux/mm/swapfile.c */
extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t);
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t __get_swap_page(unsigned short);
extern swp_entry_t get_swap_page(void);
extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
struct inode **);
extern int swap_duplicate(swp_entry_t);
extern int swap_count(struct page *);
extern int valid_swaphandles(swp_entry_t, unsigned long *);
#define get_swap_page() __get_swap_page(1)
extern void __swap_free(swp_entry_t, unsigned short);
#define swap_free(entry) __swap_free((entry), 1)
extern void swap_free(swp_entry_t);
struct swap_list_t {
int head; /* head of priority-ordered swapfile list */
int next; /* swapfile to be used next */
......
......@@ -1704,6 +1704,7 @@ struct page * filemap_nopage(struct vm_area_struct * area,
* and possibly copy it over to another page..
*/
old_page = page;
mark_page_accessed(page);
if (no_share) {
struct page *new_page = alloc_page(GFP_HIGHUSER);
......@@ -2553,7 +2554,6 @@ struct page *__read_cache_page(struct address_space *mapping,
}
if (cached_page)
page_cache_release(cached_page);
mark_page_accessed(page);
return page;
}
......@@ -2571,7 +2571,10 @@ struct page *read_cache_page(struct address_space *mapping,
retry:
page = __read_cache_page(mapping, index, filler, data);
if (IS_ERR(page) || Page_Uptodate(page))
if (IS_ERR(page))
goto out;
mark_page_accessed(page);
if (Page_Uptodate(page))
goto out;
lock_page(page);
......@@ -2835,6 +2838,7 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
unlock:
kunmap(page);
/* Mark it unlocked again and drop the page.. */
SetPageReferenced(page);
UnlockPage(page);
page_cache_release(page);
......
......@@ -85,8 +85,6 @@ void __free_pte(pte_t pte)
if (page->mapping) {
if (pte_dirty(pte))
set_page_dirty(page);
if (pte_young(pte))
mark_page_accessed(page);
}
free_page_and_swap_cache(page);
......@@ -939,10 +937,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
break;
/* Recheck swapcachedness once the page is locked */
can_reuse = exclusive_swap_page(old_page);
#if 1
if (can_reuse)
delete_from_swap_cache_nolock(old_page);
#endif
delete_from_swap_cache(old_page);
UnlockPage(old_page);
if (!can_reuse)
break;
......@@ -1088,23 +1084,19 @@ void swapin_readahead(swp_entry_t entry)
unsigned long offset;
/*
* Get the number of handles we should do readahead io to. Also,
* grab temporary references on them, releasing them as io completes.
* Get the number of handles we should do readahead io to.
*/
num = valid_swaphandles(entry, &offset);
for (i = 0; i < num; offset++, i++) {
/* Don't block on I/O for read-ahead */
if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster
* (1 << page_cluster)) {
while (i++ < num)
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++));
if (atomic_read(&nr_async_pages) >=
pager_daemon.swap_cluster << page_cluster)
break;
}
/* Ok, do the async read-ahead now */
new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
if (new_page != NULL)
page_cache_release(new_page);
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset));
if (!new_page)
break;
page_cache_release(new_page);
}
return;
}
......@@ -1164,11 +1156,12 @@ static int do_swap_page(struct mm_struct * mm,
pte = mk_pte(page, vma->vm_page_prot);
swap_free(entry);
mark_page_accessed(page);
if (exclusive_swap_page(page)) {
if (vma->vm_flags & VM_WRITE)
pte = pte_mkwrite(pte);
pte = pte_mkdirty(pte);
delete_from_swap_cache_nolock(page);
delete_from_swap_cache(page);
}
UnlockPage(page);
......
......@@ -234,45 +234,55 @@ static int shmem_writepage(struct page * page)
int error;
struct shmem_inode_info *info;
swp_entry_t *entry, swap;
struct address_space *mapping;
unsigned long index;
struct inode *inode;
if (!PageLocked(page))
BUG();
inode = page->mapping->host;
mapping = page->mapping;
index = page->index;
inode = mapping->host;
info = &inode->u.shmem_i;
swap = __get_swap_page(2);
error = -ENOMEM;
if (!swap.val) {
activate_page(page);
SetPageDirty(page);
goto out;
}
spin_lock(&info->lock);
entry = shmem_swp_entry(info, page->index);
if (IS_ERR(entry)) /* this had been allocted on page allocation */
entry = shmem_swp_entry(info, index);
if (IS_ERR(entry)) /* this had been allocated on page allocation */
BUG();
shmem_recalc_inode(page->mapping->host);
error = -EAGAIN;
shmem_recalc_inode(inode);
if (entry->val)
BUG();
*entry = swap;
error = 0;
/* Remove the from the page cache */
/* Remove it from the page cache */
lru_cache_del(page);
remove_inode_page(page);
swap_list_lock();
swap = get_swap_page();
if (!swap.val) {
swap_list_unlock();
/* Add it back to the page cache */
add_to_page_cache_locked(page, mapping, index);
activate_page(page);
SetPageDirty(page);
error = -ENOMEM;
goto out;
}
/* Add it to the swap cache */
add_to_swap_cache(page, swap);
page_cache_release(page);
info->swapped++;
swap_list_unlock();
spin_unlock(&info->lock);
set_page_dirty(page);
info->swapped++;
*entry = swap;
error = 0;
out:
spin_unlock(&info->lock);
UnlockPage(page);
page_cache_release(page);
return error;
}
......@@ -356,7 +366,7 @@ static struct page * shmem_getpage_locked(struct inode * inode, unsigned long id
swap_free(*entry);
*entry = (swp_entry_t) {0};
delete_from_swap_cache_nolock(page);
delete_from_swap_cache(page);
flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
page->flags = flags | (1 << PG_dirty);
add_to_page_cache_locked(page, mapping, idx);
......
......@@ -54,7 +54,6 @@ void deactivate_page_nolock(struct page * page)
del_page_from_active_list(page);
add_page_to_inactive_list(page);
}
ClearPageReferenced(page);
}
void deactivate_page(struct page * page)
......@@ -73,7 +72,6 @@ void activate_page_nolock(struct page * page)
del_page_from_inactive_list(page);
add_page_to_active_list(page);
}
SetPageReferenced(page);
}
void activate_page(struct page * page)
......
......@@ -23,17 +23,11 @@
*/
static int swap_writepage(struct page *page)
{
/* One for the page cache, one for this user, one for page->buffers */
if (page_count(page) > 2 + !!page->buffers)
goto in_use;
if (swap_count(page) > 1)
goto in_use;
delete_from_swap_cache_nolock(page);
UnlockPage(page);
return 0;
in_use:
if (exclusive_swap_page(page)) {
delete_from_swap_cache(page);
UnlockPage(page);
return 0;
}
rw_swap_page(WRITE, page);
return 0;
}
......@@ -75,8 +69,6 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
#endif
if (!PageLocked(page))
BUG();
if (PageTestandSetSwapCache(page))
BUG();
if (page->mapping)
BUG();
......@@ -92,51 +84,42 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
*/
void __delete_from_swap_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
swp_entry_t entry;
#ifdef SWAP_CACHE_INFO
swap_cache_del_total++;
#endif
if (mapping != &swapper_space)
if (!PageLocked(page))
BUG();
if (!PageSwapCache(page) || !PageLocked(page))
if (!PageSwapCache(page))
BUG();
entry.val = page->index;
PageClearSwapCache(page);
ClearPageDirty(page);
__remove_inode_page(page);
swap_free(entry);
}
/*
* This will never put the page into the free list, the caller has
* a reference on the page.
* This must be called only on pages that have
* been verified to be in the swap cache and locked.
* It will never put the page into the free list,
* the caller has a reference on the page.
*/
void delete_from_swap_cache_nolock(struct page *page)
void delete_from_swap_cache(struct page *page)
{
swp_entry_t entry;
if (!PageLocked(page))
BUG();
if (block_flushpage(page, 0))
lru_cache_del(page);
entry.val = page->index;
spin_lock(&pagecache_lock);
__delete_from_swap_cache(page);
spin_unlock(&pagecache_lock);
page_cache_release(page);
}
/*
* This must be called only on pages that have
* been verified to be in the swap cache and locked.
*/
void delete_from_swap_cache(struct page *page)
{
lock_page(page);
delete_from_swap_cache_nolock(page);
UnlockPage(page);
swap_free(entry);
page_cache_release(page);
}
/*
......@@ -156,7 +139,7 @@ void free_page_and_swap_cache(struct page *page)
*/
if (PageSwapCache(page) && !TryLockPage(page)) {
if (exclusive_swap_page(page))
delete_from_swap_cache_nolock(page);
delete_from_swap_cache(page);
UnlockPage(page);
}
page_cache_release(page);
......@@ -213,19 +196,24 @@ struct page * read_swap_cache_async(swp_entry_t entry)
new_page = alloc_page(GFP_HIGHUSER);
if (!new_page)
goto out; /* Out of memory */
if (TryLockPage(new_page))
BUG();
/*
* Check the swap cache again, in case we stalled above.
* The BKL is guarding against races between this check
* swap_list_lock is guarding against races between this check
* and where the new page is added to the swap cache below.
* It is also guarding against race where try_to_swap_out
* allocates entry with get_swap_page then adds to cache.
*/
swap_list_lock();
found_page = __find_get_page(&swapper_space, entry.val, hash);
if (found_page)
goto out_free_page;
/*
* Make sure the swap entry is still in use. It could have gone
* while caller waited for BKL, or while allocating page above,
* since caller dropped page_table_lock, while allocating page above,
* or while allocating page in prior call via swapin_readahead.
*/
if (!swap_duplicate(entry)) /* Account for the swap cache */
......@@ -234,13 +222,15 @@ struct page * read_swap_cache_async(swp_entry_t entry)
/*
* Add it to the swap cache and read its contents.
*/
if (TryLockPage(new_page))
BUG();
add_to_swap_cache(new_page, entry);
swap_list_unlock();
rw_swap_page(READ, new_page);
return new_page;
out_free_page:
swap_list_unlock();
UnlockPage(new_page);
page_cache_release(new_page);
out:
return found_page;
......
......@@ -14,6 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/shm.h>
#include <linux/compiler.h>
#include <asm/pgtable.h>
......@@ -33,7 +34,7 @@ struct swap_info_struct swap_info[MAX_SWAPFILES];
#define SWAPFILE_CLUSTER 256
static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
static inline int scan_swap_map(struct swap_info_struct *si)
{
unsigned long offset;
/*
......@@ -86,7 +87,8 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun
si->lowest_bit = si->max;
si->highest_bit = 0;
}
si->swap_map[offset] = count;
/* Initial count 1 for user reference + 1 for swap cache */
si->swap_map[offset] = 2;
nr_swap_pages--;
si->cluster_next = offset+1;
return offset;
......@@ -96,7 +98,12 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun
return 0;
}
swp_entry_t __get_swap_page(unsigned short count)
/*
* Callers of get_swap_page must hold swap_list_lock across the call,
* and across the following add_to_swap_cache, to guard against races
* with read_swap_cache_async.
*/
swp_entry_t get_swap_page(void)
{
struct swap_info_struct * p;
unsigned long offset;
......@@ -104,20 +111,17 @@ swp_entry_t __get_swap_page(unsigned short count)
int type, wrapped = 0;
entry.val = 0; /* Out of memory */
if (count >= SWAP_MAP_MAX)
goto bad_count;
swap_list_lock();
type = swap_list.next;
if (type < 0)
goto out;
if (nr_swap_pages == 0)
if (nr_swap_pages <= 0)
goto out;
while (1) {
p = &swap_info[type];
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
swap_device_lock(p);
offset = scan_swap_map(p, count);
offset = scan_swap_map(p);
swap_device_unlock(p);
if (offset) {
entry = SWP_ENTRY(type,offset);
......@@ -142,21 +146,14 @@ swp_entry_t __get_swap_page(unsigned short count)
goto out; /* out of swap space */
}
out:
swap_list_unlock();
return entry;
bad_count:
printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
count, __builtin_return_address(0));
return entry;
}
/*
* Caller has made sure that the swapdevice corresponding to entry
* is still around or has not been recycled.
*/
void __swap_free(swp_entry_t entry, unsigned short count)
void swap_free(swp_entry_t entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
......@@ -180,9 +177,7 @@ void __swap_free(swp_entry_t entry, unsigned short count)
swap_list.next = type;
swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) {
if (p->swap_map[offset] < count)
goto bad_count;
if (!(p->swap_map[offset] -= count)) {
if (!--(p->swap_map[offset])) {
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
......@@ -207,11 +202,6 @@ void __swap_free(swp_entry_t entry, unsigned short count)
bad_free:
printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
goto out;
bad_count:
swap_device_unlock(p);
swap_list_unlock();
printk(KERN_ERR "swap_free: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
goto out;
}
/*
......@@ -229,9 +219,9 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
{
pte_t pte = *dir;
if (pte_to_swp_entry(pte).val != entry.val)
if (likely(pte_to_swp_entry(pte).val != entry.val))
return;
if (pte_none(pte) || pte_present(pte))
if (unlikely(pte_none(pte) || pte_present(pte)))
return;
get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
......@@ -458,7 +448,7 @@ static int try_to_unuse(unsigned int type)
*/
lock_page(page);
if (PageSwapCache(page))
delete_from_swap_cache_nolock(page);
delete_from_swap_cache(page);
SetPageDirty(page);
UnlockPage(page);
flush_page_to_ram(page);
......@@ -567,14 +557,8 @@ asmlinkage long sys_swapoff(const char * specialfile)
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
if (p->swap_file) {
if (p->swap_file == nd.dentry)
break;
} else {
if (S_ISBLK(nd.dentry->d_inode->i_mode)
&& (p->swap_device == nd.dentry->d_inode->i_rdev))
break;
}
if (p->swap_file == nd.dentry)
break;
}
prev = type;
}
......@@ -616,19 +600,21 @@ asmlinkage long sys_swapoff(const char * specialfile)
goto out_dput;
}
if (p->swap_device)
blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
path_release(&nd);
swap_list_lock();
nd.dentry = p->swap_file;
p->swap_file = NULL;
swap_device_lock(p);
nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file;
p->swap_vfsmnt = NULL;
p->swap_file = NULL;
p->swap_device = 0;
p->max = 0;
swap_map = p->swap_map;
p->swap_map = NULL;
p->flags = 0;
swap_device_unlock(p);
swap_list_unlock();
vfree(swap_map);
err = 0;
......@@ -711,6 +697,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
unsigned long maxpages = 1;
int swapfilesize;
struct block_device *bdev = NULL;
unsigned short *swap_map;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -760,6 +747,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
p->swap_device = dev;
set_blocksize(dev, PAGE_SIZE);
bd_acquire(swap_inode);
bdev = swap_inode->i_bdev;
bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode));
if (bdops) bdev->bd_op = bdops;
......@@ -772,29 +760,24 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (!dev || (blk_size[MAJOR(dev)] &&
!blk_size[MAJOR(dev)][MINOR(dev)]))
goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type)
continue;
if (dev == swap_info[i].swap_device)
goto bad_swap;
}
swapfilesize = 0;
if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10);
} else if (S_ISREG(swap_inode->i_mode)) {
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type || !swap_info[i].swap_file)
continue;
if (swap_inode == swap_info[i].swap_file->d_inode)
goto bad_swap;
}
} else if (S_ISREG(swap_inode->i_mode))
swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
} else
else
goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
struct swap_info_struct *q = &swap_info[i];
if (i == type || !q->swap_file)
continue;
if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
goto bad_swap;
}
swap_header = (void *) __get_free_page(GFP_USER);
if (!swap_header) {
printk("Unable to start swapping: out of memory :-)\n");
......@@ -900,6 +883,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
}
p->swap_map[0] = SWAP_MAP_BAD;
swap_list_lock();
swap_device_lock(p);
p->max = maxpages;
p->flags = SWP_WRITEOK;
p->pages = nr_good_pages;
......@@ -922,6 +906,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} else {
swap_info[prev].next = p - swap_info;
}
swap_device_unlock(p);
swap_list_unlock();
error = 0;
goto out;
......@@ -929,11 +914,10 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (bdev)
blkdev_put(bdev, BDEV_SWAP);
bad_swap_2:
if (p->swap_map)
vfree(p->swap_map);
swap_list_lock();
swap_map = p->swap_map;
nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file;
swap_list_lock();
p->swap_device = 0;
p->swap_file = NULL;
p->swap_vfsmnt = NULL;
......@@ -942,6 +926,8 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
swap_list_unlock();
if (swap_map)
vfree(swap_map);
path_release(&nd);
out:
if (swap_header)
......@@ -987,43 +973,31 @@ int swap_duplicate(swp_entry_t entry)
unsigned long offset, type;
int result = 0;
/* Swap entry 0 is illegal */
if (!entry.val)
goto out;
type = SWP_TYPE(entry);
if (type >= nr_swapfiles)
goto bad_file;
p = type + swap_info;
offset = SWP_OFFSET(entry);
if (offset >= p->max)
goto bad_offset;
if (!p->swap_map[offset])
goto bad_unused;
/*
* Entry is valid, so increment the map count.
*/
swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX)
p->swap_map[offset]++;
else {
if (swap_overflow++ < 5)
printk(KERN_WARNING "swap_dup: swap entry overflow\n");
p->swap_map[offset] = SWAP_MAP_MAX;
if (offset < p->max && p->swap_map[offset]) {
if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
p->swap_map[offset]++;
result = 1;
} else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
if (swap_overflow++ < 5)
printk(KERN_WARNING "swap_dup: swap entry overflow\n");
p->swap_map[offset] = SWAP_MAP_MAX;
result = 1;
}
}
swap_device_unlock(p);
result = 1;
out:
return result;
bad_file:
printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
goto out;
bad_offset:
/* Don't report: can happen in read_swap_cache_async after swapoff */
goto out;
bad_unused:
/* Don't report: can happen in read_swap_cache_async after blocking */
goto out;
}
/*
......@@ -1068,7 +1042,7 @@ int swap_count(struct page *page)
}
/*
* Kernel_lock protects against swap device deletion.
* Prior swap_duplicate protects against swap device deletion.
*/
void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
kdev_t *dev, struct inode **swapf)
......@@ -1108,8 +1082,8 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
}
/*
* Kernel_lock protects against swap device deletion. Grab an extra
* reference on the swaphandle so that it dos not become unused.
* swap_device_lock prevents swap_map being freed. Don't grab an extra
* reference on the swaphandle, it doesn't matter if it becomes unused.
*/
int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{
......@@ -1117,20 +1091,23 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
unsigned long toff;
struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
*offset = SWP_OFFSET(entry);
toff = *offset = (*offset >> page_cluster) << page_cluster;
if (!page_cluster) /* no readahead */
return 0;
toff = (SWP_OFFSET(entry) >> page_cluster) << page_cluster;
if (!toff) /* first page is swap header */
toff++, i--;
*offset = toff;
swap_device_lock(swapdev);
do {
/* Don't read-ahead past the end of the swap area */
if (toff >= swapdev->max)
break;
/* Don't read in bad or busy pages */
/* Don't read in free or bad pages */
if (!swapdev->swap_map[toff])
break;
if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
break;
swapdev->swap_map[toff]++;
toff++;
ret++;
} while (--i);
......
......@@ -52,14 +52,9 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
/* Don't look at this pte if it's been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {
flush_tlb_page(vma, address);
mark_page_accessed(page);
return 0;
}
/* Don't bother with it if the page is otherwise active */
if (PageActive(page))
return 0;
if (TryLockPage(page))
return 0;
......@@ -85,8 +80,8 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
entry.val = page->index;
if (pte_dirty(pte))
set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry);
set_swap_pte:
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
mm->rss--;
......@@ -130,16 +125,18 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
* we have the swap cache set up to associate the
* page with that swap entry.
*/
swap_list_lock();
entry = get_swap_page();
if (!entry.val)
goto out_unlock_restore; /* No swap space left */
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
set_page_dirty(page);
goto set_swap_pte;
if (entry.val) {
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
swap_list_unlock();
set_page_dirty(page);
goto set_swap_pte;
}
out_unlock_restore:
/* No swap space left */
swap_list_unlock();
set_pte(page_table, pte);
UnlockPage(page);
return 0;
......@@ -243,9 +240,9 @@ static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vm
struct mm_struct *swap_mm = &init_mm;
/*
* Returns non-zero if we scanned all `count' pages
* Returns remaining count of pages to be swapped out by followup call.
*/
static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone_t * classzone)
static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone)
{
unsigned long address;
struct vm_area_struct* vma;
......@@ -255,11 +252,12 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone
* and ptes.
*/
spin_lock(&mm->page_table_lock);
*race = 1;
if (swap_mm != mm)
goto out_unlock;
*race = 0;
address = mm->swap_address;
if (address == TASK_SIZE || swap_mm != mm) {
/* We raced: don't count this mm but try again */
++*mmcounter;
goto out_unlock;
}
vma = find_vma(mm, address);
if (vma) {
if (address < vma->vm_start)
......@@ -267,31 +265,26 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone
for (;;) {
count = swap_out_vma(mm, vma, address, count, classzone);
if (!count)
goto out_unlock;
vma = vma->vm_next;
if (!vma)
break;
if (!count)
goto out_unlock;
address = vma->vm_start;
}
}
/* Reset to 0 when we reach the end of address space */
mm->swap_address = 0;
spin_lock(&mmlist_lock);
swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
spin_unlock(&mmlist_lock);
/* Indicate that we reached the end of address space */
mm->swap_address = TASK_SIZE;
out_unlock:
spin_unlock(&mm->page_table_lock);
return count;
}
static int FASTCALL(swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages));
static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)
{
int counter, race;
int counter;
struct mm_struct *mm;
/* Then, look at the other mm's */
......@@ -304,9 +297,10 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
spin_lock(&mmlist_lock);
mm = swap_mm;
if (mm == &init_mm) {
while (mm->swap_address == TASK_SIZE || mm == &init_mm) {
mm->swap_address = 0;
mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
if (mm == &init_mm)
if (mm == swap_mm)
goto empty;
swap_mm = mm;
}
......@@ -315,13 +309,13 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
atomic_inc(&mm->mm_users);
spin_unlock(&mmlist_lock);
nr_pages = swap_out_mm(mm, nr_pages, &race, classzone);
nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone);
mmput(mm);
if (!nr_pages)
return 1;
} while (race || --counter >= 0);
} while (--counter >= 0);
return 0;
......@@ -330,15 +324,15 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
return 0;
}
static int FASTCALL(shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask));
static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask)
static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask));
static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask)
{
struct list_head * entry;
int __max_scan = *max_scan;
spin_lock(&pagemap_lru_lock);
while (__max_scan && this_max_scan && (entry = lru->prev) != lru) {
while (max_scan && (entry = inactive_list.prev) != &inactive_list) {
struct page * page;
swp_entry_t swap;
if (unlikely(current->need_resched)) {
spin_unlock(&pagemap_lru_lock);
......@@ -353,18 +347,16 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
if (unlikely(!PageInactive(page) && !PageActive(page)))
BUG();
this_max_scan--;
list_del(entry);
list_add(entry, lru);
list_add(entry, &inactive_list);
if (PageTestandClearReferenced(page))
continue;
max_scan--;
if (unlikely(!memclass(page->zone, classzone)))
continue;
__max_scan--;
/* Racy check to avoid trylocking when not worthwhile */
if (!page->buffers && page_count(page) != 1)
continue;
......@@ -479,14 +471,24 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
}
/* point of no return */
if (likely(!PageSwapCache(page)))
if (likely(!PageSwapCache(page))) {
swap.val = 0;
__remove_inode_page(page);
else
} else {
swap.val = page->index;
__delete_from_swap_cache(page);
}
spin_unlock(&pagecache_lock);
__lru_cache_del(page);
if (unlikely(swap.val != 0)) {
/* must drop lru lock if getting swap_list lock */
spin_unlock(&pagemap_lru_lock);
swap_free(swap);
spin_lock(&pagemap_lru_lock);
}
UnlockPage(page);
/* effectively free the page here */
......@@ -498,7 +500,6 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
}
spin_unlock(&pagemap_lru_lock);
*max_scan = __max_scan;
return nr_pages;
}
......@@ -509,14 +510,10 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
* We move them the other way when we see the
* reference bit on the page.
*/
static void balance_inactive(int nr_pages)
static void refill_inactive(int nr_pages)
{
struct list_head * entry;
/* If we have more inactive pages than active don't do anything */
if (nr_active_pages < nr_inactive_pages)
return;
spin_lock(&pagemap_lru_lock);
entry = active_list.prev;
while (nr_pages-- && entry != &active_list) {
......@@ -541,14 +538,17 @@ static void balance_inactive(int nr_pages)
static int FASTCALL(shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages));
static int shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)
{
int max_scan = (nr_inactive_pages + nr_active_pages / DEF_PRIORITY) / priority;
int max_scan = nr_inactive_pages / priority;
nr_pages -= kmem_cache_reap(gfp_mask);
if (nr_pages <= 0)
return 0;
balance_inactive(nr_pages);
nr_pages = shrink_cache(&inactive_list, &max_scan, nr_inactive_pages, nr_pages, classzone, gfp_mask);
/* Do we want to age the active list? */
if (nr_inactive_pages < nr_active_pages*2)
refill_inactive(nr_pages);
nr_pages = shrink_cache(nr_pages, max_scan, classzone, gfp_mask);
if (nr_pages <= 0)
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment