Commit e2f6721a authored by Linus Torvalds's avatar Linus Torvalds

v2.4.9.14 -> v2.4.9.15

  - Jan Harkes: make Coda work with arbitrary host filesystems, not
  just filesystems that use generic_file_read/write
  - Al Viro: block device cleanups
  - Hugh Dickins: swap device lock fixes - fix swap readahead race
  - me, Andrea: more reference bit cleanups
parent 269f8f70
VERSION = 2 VERSION = 2
PATCHLEVEL = 4 PATCHLEVEL = 4
SUBLEVEL = 10 SUBLEVEL = 10
EXTRAVERSION =-pre14 EXTRAVERSION =-pre15
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
......
...@@ -491,7 +491,6 @@ static void __exit rd_cleanup (void) ...@@ -491,7 +491,6 @@ static void __exit rd_cleanup (void)
bdev->bd_cache_openers--; bdev->bd_cache_openers--;
truncate_inode_pages(bdev->bd_inode->i_mapping, 0); truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
blkdev_put(bdev, BDEV_FILE); blkdev_put(bdev, BDEV_FILE);
bdput(bdev);
} }
destroy_buffers(MKDEV(MAJOR_NR, i)); destroy_buffers(MKDEV(MAJOR_NR, i));
} }
...@@ -780,7 +779,7 @@ static void __init rd_load_image(kdev_t device, int offset, int unit) ...@@ -780,7 +779,7 @@ static void __init rd_load_image(kdev_t device, int offset, int unit)
if (i && (i % devblocks == 0)) { if (i && (i % devblocks == 0)) {
printk("done disk #%d.\n", i/devblocks); printk("done disk #%d.\n", i/devblocks);
rotate = 0; rotate = 0;
if (blkdev_close(inode, &infile) != 0) { if (infile.f_op->release(inode, &infile) != 0) {
printk("Error closing the disk.\n"); printk("Error closing the disk.\n");
goto noclose_input; goto noclose_input;
} }
......
...@@ -103,6 +103,7 @@ int raw_open(struct inode *inode, struct file *filp) ...@@ -103,6 +103,7 @@ int raw_open(struct inode *inode, struct file *filp)
if (!bdev) if (!bdev)
goto out; goto out;
atomic_inc(&bdev->bd_count);
rdev = to_kdev_t(bdev->bd_dev); rdev = to_kdev_t(bdev->bd_dev);
err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW); err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW);
if (err) if (err)
......
...@@ -270,6 +270,8 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) ...@@ -270,6 +270,8 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
return 0; return 0;
} }
EXPORT_SYMBOL(tty_register_ldisc);
/* Set the discipline of a tty line. */ /* Set the discipline of a tty line. */
static int tty_set_ldisc(struct tty_struct *tty, int ldisc) static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
{ {
......
...@@ -279,6 +279,7 @@ static void __init probedisk(int major, int minor,int device) ...@@ -279,6 +279,7 @@ static void __init probedisk(int major, int minor,int device)
int i; int i;
struct highpoint_raid_conf *prom; struct highpoint_raid_conf *prom;
static unsigned char block[4096]; static unsigned char block[4096];
struct block_device *bdev;
if (maxsectors(major,minor)==0) if (maxsectors(major,minor)==0)
return; return;
...@@ -301,12 +302,12 @@ static void __init probedisk(int major, int minor,int device) ...@@ -301,12 +302,12 @@ static void __init probedisk(int major, int minor,int device)
if (i>8) if (i>8)
return; return;
raid[device].disk[i].bdev = bdget(MKDEV(major,minor)); bdev = bdget(MKDEV(major,minor));
if (raid[device].disk[i].bdev != NULL) { if (bdev && blkdev_get(bdev,FMODE_READ|FMODE_WRITE,0,BDEV_RAW) == 0) {
int j=0; int j=0;
struct gendisk *gd; struct gendisk *gd;
/* This is supposed to prevent others from stealing our underlying disks */
blkdev_get(raid[device].disk[i].bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW); raid[device].disk[i].bdev = bdev;
/* now blank the /proc/partitions table for the wrong partition table, /* now blank the /proc/partitions table for the wrong partition table,
so that scripts don't accidentally mount it and crash the kernel */ so that scripts don't accidentally mount it and crash the kernel */
/* XXX: the 0 is an utter hack --hch */ /* XXX: the 0 is an utter hack --hch */
...@@ -408,12 +409,12 @@ static void __exit hptraid_exit (void) ...@@ -408,12 +409,12 @@ static void __exit hptraid_exit (void)
{ {
int i,device; int i,device;
for (device = 0; device<16; device++) { for (device = 0; device<16; device++) {
for (i=0;i<8;i++) for (i=0;i<8;i++) {
if (raid[device].disk[i].bdev) { struct block_device *bdev = raid[device].disk[i].bdev;
blkdev_put(raid[device].disk[i].bdev, BDEV_RAW); raid[device].disk[i].bdev = NULL;
bdput(raid[device].disk[i].bdev); if (bdev)
raid[device].disk[i].bdev = NULL; blkdev_put(bdev, BDEV_RAW);
} }
if (raid[device].sectors) if (raid[device].sectors)
ataraid_release_device(device); ataraid_release_device(device);
} }
......
...@@ -311,12 +311,12 @@ static void __init probedisk(int major, int minor,int device) ...@@ -311,12 +311,12 @@ static void __init probedisk(int major, int minor,int device)
for (i=0;(i<prom->raid.total_disks)&&(i<8);i++) { for (i=0;(i<prom->raid.total_disks)&&(i<8);i++) {
if ( (prom->raid.disk[i].channel== prom->raid.channel) && if ( (prom->raid.disk[i].channel== prom->raid.channel) &&
(prom->raid.disk[i].device == prom->raid.device) ) { (prom->raid.disk[i].device == prom->raid.device) ) {
raid[device].disk[i].bdev = bdget(MKDEV(major,minor)); struct block_device *bdev = bdget(MKDEV(major,minor));
if (raid[device].disk[i].bdev != NULL) { if (bdev && blkdev_get(bdev,FMODE_READ|FMODE_WRITE,0,BDEV_RAW) == 0) {
struct gendisk *gd; struct gendisk *gd;
int j; int j;
/* This is supposed to prevent others from stealing our underlying disks */ /* This is supposed to prevent others from stealing our underlying disks */
blkdev_get(raid[device].disk[i].bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW); raid[device].disk[i].bdev = bdev;
gd=get_gendisk(major); gd=get_gendisk(major);
if (gd!=NULL) { if (gd!=NULL) {
for (j=1+(minor<<gd->minor_shift);j<((minor+1)<<gd->minor_shift);j++) for (j=1+(minor<<gd->minor_shift);j<((minor+1)<<gd->minor_shift);j++)
...@@ -418,13 +418,12 @@ static void __exit pdcraid_exit (void) ...@@ -418,13 +418,12 @@ static void __exit pdcraid_exit (void)
{ {
int i,device; int i,device;
for (device = 0; device<16; device++) { for (device = 0; device<16; device++) {
for (i=0;i<8;i++) for (i=0;i<8;i++) {
if (raid[device].disk[i].bdev) { struct block_device *bdev = raid[device].disk[i].bdev;
blkdev_put(raid[device].disk[i].bdev, BDEV_RAW); raid[device].disk[i].bdev = NULL;
bdput(raid[device].disk[i].bdev); if (bdev)
raid[device].disk[i].bdev = NULL; blkdev_put(bdev, BDEV_RAW);
}
}
if (raid[device].sectors) if (raid[device].sectors)
ataraid_release_device(device); ataraid_release_device(device);
} }
......
...@@ -649,11 +649,11 @@ static int lock_rdev (mdk_rdev_t *rdev) ...@@ -649,11 +649,11 @@ static int lock_rdev (mdk_rdev_t *rdev)
static void unlock_rdev (mdk_rdev_t *rdev) static void unlock_rdev (mdk_rdev_t *rdev)
{ {
if (!rdev->bdev) struct block_device *bdev = rdev->bdev;
MD_BUG();
blkdev_put(rdev->bdev, BDEV_RAW);
bdput(rdev->bdev);
rdev->bdev = NULL; rdev->bdev = NULL;
if (!bdev)
MD_BUG();
blkdev_put(bdev, BDEV_RAW);
} }
void md_autodetect_dev (kdev_t dev); void md_autodetect_dev (kdev_t dev);
......
...@@ -404,7 +404,6 @@ static int get_inode(struct block_device *bdev) ...@@ -404,7 +404,6 @@ static int get_inode(struct block_device *bdev)
if (!inode) if (!inode)
return -ENOMEM; return -ENOMEM;
inode->i_rdev = to_kdev_t(bdev->bd_dev); inode->i_rdev = to_kdev_t(bdev->bd_dev);
atomic_inc(&bdev->bd_count); /* will go away */
inode->i_bdev = bdev; inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops; inode->i_data.a_ops = &def_blk_aops;
bdev->bd_inode = inode; bdev->bd_inode = inode;
...@@ -437,6 +436,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -437,6 +436,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
{ {
memset(bdev, 0, sizeof(*bdev)); memset(bdev, 0, sizeof(*bdev));
sema_init(&bdev->bd_sem, 1); sema_init(&bdev->bd_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
} }
} }
...@@ -522,17 +522,58 @@ struct block_device *bdget(dev_t dev) ...@@ -522,17 +522,58 @@ struct block_device *bdget(dev_t dev)
void bdput(struct block_device *bdev) void bdput(struct block_device *bdev)
{ {
if (atomic_dec_and_test(&bdev->bd_count)) { if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
struct list_head *p;
if (bdev->bd_openers) if (bdev->bd_openers)
BUG(); BUG();
if (bdev->bd_cache_openers) if (bdev->bd_cache_openers)
BUG(); BUG();
spin_lock(&bdev_lock);
list_del(&bdev->bd_hash); list_del(&bdev->bd_hash);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
struct inode *inode;
inode = list_entry(p, struct inode, i_devices);
list_del_init(p);
inode->i_bdev = NULL;
}
spin_unlock(&bdev_lock); spin_unlock(&bdev_lock);
destroy_bdev(bdev); destroy_bdev(bdev);
} }
} }
int bd_acquire(struct inode *inode)
{
struct block_device *bdev;
spin_lock(&bdev_lock);
if (inode->i_bdev) {
atomic_inc(&inode->i_bdev->bd_count);
spin_unlock(&bdev_lock);
return 0;
}
spin_unlock(&bdev_lock);
bdev = bdget(kdev_t_to_nr(inode->i_rdev));
if (!bdev)
return -ENOMEM;
spin_lock(&bdev_lock);
if (!inode->i_bdev) {
inode->i_bdev = bdev;
list_add(&inode->i_devices, &bdev->bd_inodes);
} else if (inode->i_bdev != bdev)
BUG();
spin_unlock(&bdev_lock);
return 0;
}
/* Call when you free inode */
void bd_forget(struct inode *inode)
{
spin_lock(&bdev_lock);
if (inode->i_bdev) {
list_del_init(&inode->i_devices);
inode->i_bdev = NULL;
}
spin_unlock(&bdev_lock);
}
static struct { static struct {
const char *name; const char *name;
...@@ -706,13 +747,15 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind) ...@@ -706,13 +747,15 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
} }
unlock_kernel(); unlock_kernel();
up(&bdev->bd_sem); up(&bdev->bd_sem);
if (ret)
bdput(bdev);
return ret; return ret;
} }
int blkdev_open(struct inode * inode, struct file * filp) int blkdev_open(struct inode * inode, struct file * filp)
{ {
int ret = -ENXIO; int ret;
struct block_device *bdev = inode->i_bdev; struct block_device *bdev;
/* /*
* Preserve backwards compatibility and allow large file access * Preserve backwards compatibility and allow large file access
...@@ -722,13 +765,15 @@ int blkdev_open(struct inode * inode, struct file * filp) ...@@ -722,13 +765,15 @@ int blkdev_open(struct inode * inode, struct file * filp)
*/ */
filp->f_flags |= O_LARGEFILE; filp->f_flags |= O_LARGEFILE;
bd_acquire(inode);
bdev = inode->i_bdev;
down(&bdev->bd_sem); down(&bdev->bd_sem);
if (get_inode(bdev)) { ret = get_inode(bdev);
up(&bdev->bd_sem); if (ret)
return -ENOMEM; goto out;
}
ret = -ENXIO;
lock_kernel(); lock_kernel();
if (!bdev->bd_op) if (!bdev->bd_op)
bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev)); bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
...@@ -749,7 +794,10 @@ int blkdev_open(struct inode * inode, struct file * filp) ...@@ -749,7 +794,10 @@ int blkdev_open(struct inode * inode, struct file * filp)
} }
} }
unlock_kernel(); unlock_kernel();
out:
up(&bdev->bd_sem); up(&bdev->bd_sem);
if (ret)
bdput(bdev);
return ret; return ret;
} }
...@@ -777,6 +825,7 @@ int blkdev_put(struct block_device *bdev, int kind) ...@@ -777,6 +825,7 @@ int blkdev_put(struct block_device *bdev, int kind)
} }
unlock_kernel(); unlock_kernel();
up(&bdev->bd_sem); up(&bdev->bd_sem);
bdput(bdev);
return ret; return ret;
} }
...@@ -841,6 +890,7 @@ int blkdev_close(struct inode * inode, struct file * filp) ...@@ -841,6 +890,7 @@ int blkdev_close(struct inode * inode, struct file * filp)
} }
unlock_kernel(); unlock_kernel();
up(&bdev->bd_sem); up(&bdev->bd_sem);
bdput(bdev);
return ret; return ret;
} }
......
...@@ -31,28 +31,65 @@ ...@@ -31,28 +31,65 @@
int use_coda_close; int use_coda_close;
static ssize_t static ssize_t
coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) coda_file_read(struct file *file, char *buf, size_t count, loff_t *ppos)
{ {
struct inode *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
struct file *cfile; struct file *cfile;
cfile = cii->c_container;
if (!cfile) BUG();
if (!cfile->f_op || !cfile->f_op->read)
return -EINVAL;
return cfile->f_op->read(cfile, buf, count, ppos);
}
static ssize_t
coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
{
struct inode *cinode, *inode = file->f_dentry->d_inode; struct inode *cinode, *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode); struct coda_inode_info *cii = ITOC(inode);
ssize_t n; struct file *cfile;
ssize_t ret;
int flags;
cfile = cii->c_container; cfile = cii->c_container;
if (!cfile) BUG(); if (!cfile) BUG();
if (!cfile->f_op || cfile->f_op->write != generic_file_write) if (!cfile->f_op || !cfile->f_op->write)
BUG(); return -EINVAL;
cinode = cfile->f_dentry->d_inode; cinode = cfile->f_dentry->d_inode;
down(&cinode->i_sem); down(&inode->i_sem);
flags = cfile->f_flags;
cfile->f_flags |= file->f_flags & (O_APPEND | O_SYNC);
n = generic_file_write(file, buf, count, ppos); ret = cfile->f_op->write(cfile, buf, count, ppos);
cfile->f_flags = flags;
inode->i_size = cinode->i_size; inode->i_size = cinode->i_size;
up(&inode->i_sem);
return ret;
}
static int
coda_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
struct file *cfile;
cfile = cii->c_container;
if (!cfile) BUG();
up(&cinode->i_sem); if (!cfile->f_op || !cfile->f_op->mmap)
return -ENODEV;
return n; return cfile->f_op->mmap(cfile, vma);
} }
int coda_open(struct inode *i, struct file *f) int coda_open(struct inode *i, struct file *f)
...@@ -237,9 +274,9 @@ int coda_fsync(struct file *file, struct dentry *dentry, int datasync) ...@@ -237,9 +274,9 @@ int coda_fsync(struct file *file, struct dentry *dentry, int datasync)
struct file_operations coda_file_operations = { struct file_operations coda_file_operations = {
llseek: generic_file_llseek, llseek: generic_file_llseek,
read: generic_file_read, read: coda_file_read,
write: coda_file_write, write: coda_file_write,
mmap: generic_file_mmap, mmap: coda_file_mmap,
open: coda_open, open: coda_open,
flush: coda_flush, flush: coda_flush,
release: coda_release, release: coda_release,
......
...@@ -414,7 +414,7 @@ MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>"); ...@@ -414,7 +414,7 @@ MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>");
static int __init init_coda(void) static int __init init_coda(void)
{ {
int status; int status;
printk(KERN_INFO "Coda Kernel/Venus communications, v5.3.14, coda@cs.cmu.edu\n"); printk(KERN_INFO "Coda Kernel/Venus communications, v5.3.15, coda@cs.cmu.edu\n");
status = init_coda_psdev(); status = init_coda_psdev();
if ( status ) { if ( status ) {
......
...@@ -2291,9 +2291,16 @@ static int devfs_statfs (struct super_block *sb, struct statfs *buf) ...@@ -2291,9 +2291,16 @@ static int devfs_statfs (struct super_block *sb, struct statfs *buf)
return 0; return 0;
} /* End Function devfs_statfs */ } /* End Function devfs_statfs */
static void devfs_clear_inode(struct inode *inode)
{
if (S_ISBLK(inode->i_mode))
bdput(inode->i_bdev);
}
static struct super_operations devfs_sops = static struct super_operations devfs_sops =
{ {
put_inode: force_delete, put_inode: force_delete,
clear_inode: devfs_clear_inode,
statfs: devfs_statfs, statfs: devfs_statfs,
}; };
...@@ -2351,9 +2358,7 @@ static struct inode *get_vfs_inode (struct super_block *sb, ...@@ -2351,9 +2358,7 @@ static struct inode *get_vfs_inode (struct super_block *sb,
{ {
inode->i_rdev = MKDEV (de->u.fcb.u.device.major, inode->i_rdev = MKDEV (de->u.fcb.u.device.major,
de->u.fcb.u.device.minor); de->u.fcb.u.device.minor);
inode->i_bdev = bdget ( kdev_t_to_nr (inode->i_rdev) ); if (bd_acquire(inode) == 0)
inode->i_mapping->a_ops = &def_blk_aops;
if (inode->i_bdev)
{ {
if (!inode->i_bdev->bd_op && de->u.fcb.ops) if (!inode->i_bdev->bd_op && de->u.fcb.ops)
inode->i_bdev->bd_op = de->u.fcb.ops; inode->i_bdev->bd_op = de->u.fcb.ops;
......
...@@ -207,7 +207,6 @@ void init_special_inode(struct inode *inode, umode_t mode, int rdev) ...@@ -207,7 +207,6 @@ void init_special_inode(struct inode *inode, umode_t mode, int rdev)
} else if (S_ISBLK(mode)) { } else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops; inode->i_fop = &def_blk_fops;
inode->i_rdev = to_kdev_t(rdev); inode->i_rdev = to_kdev_t(rdev);
inode->i_bdev = bdget(rdev);
} else if (S_ISFIFO(mode)) } else if (S_ISFIFO(mode))
inode->i_fop = &def_fifo_fops; inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode)) else if (S_ISSOCK(mode))
......
...@@ -106,6 +106,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -106,6 +106,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_dirty_buffers); INIT_LIST_HEAD(&inode->i_dirty_buffers);
INIT_LIST_HEAD(&inode->i_dirty_data_buffers); INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
INIT_LIST_HEAD(&inode->i_devices);
sema_init(&inode->i_sem, 1); sema_init(&inode->i_sem, 1);
sema_init(&inode->i_zombie, 1); sema_init(&inode->i_zombie, 1);
spin_lock_init(&inode->i_data.i_shared_lock); spin_lock_init(&inode->i_data.i_shared_lock);
...@@ -516,11 +517,9 @@ void clear_inode(struct inode *inode) ...@@ -516,11 +517,9 @@ void clear_inode(struct inode *inode)
DQUOT_DROP(inode); DQUOT_DROP(inode);
if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode) if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
inode->i_sb->s_op->clear_inode(inode); inode->i_sb->s_op->clear_inode(inode);
if (inode->i_bdev) { if (inode->i_bdev)
bdput(inode->i_bdev); bd_forget(inode);
inode->i_bdev = NULL; else if (inode->i_cdev) {
}
if (inode->i_cdev) {
cdput(inode->i_cdev); cdput(inode->i_cdev);
inode->i_cdev = NULL; inode->i_cdev = NULL;
} }
......
...@@ -47,9 +47,10 @@ get_drive_geometry(int kdev,struct hd_geometry *geo) ...@@ -47,9 +47,10 @@ get_drive_geometry(int kdev,struct hd_geometry *geo)
{ {
struct block_device *bdev = bdget(kdev_t_to_nr(kdev)); struct block_device *bdev = bdget(kdev_t_to_nr(kdev));
int rc = blkdev_get(bdev, 0, 1, BDEV_FILE); int rc = blkdev_get(bdev, 0, 1, BDEV_FILE);
if ( rc == 0 ) if ( rc == 0 ) {
rc = ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo); rc = ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo);
blkdev_put(bdev,BDEV_FILE); blkdev_put(bdev, BDEV_FILE);
}
return rc; return rc;
} }
...@@ -58,9 +59,10 @@ get_drive_info(int kdev,dasd_information_t *info) ...@@ -58,9 +59,10 @@ get_drive_info(int kdev,dasd_information_t *info)
{ {
struct block_device *bdev = bdget(kdev_t_to_nr(kdev)); struct block_device *bdev = bdget(kdev_t_to_nr(kdev));
int rc = blkdev_get(bdev, 0, 1, BDEV_FILE); int rc = blkdev_get(bdev, 0, 1, BDEV_FILE);
if ( rc == 0 ) if ( rc == 0 ) {
rc = ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)(info)); rc = ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)(info));
blkdev_put(bdev,BDEV_FILE); blkdev_put(bdev, BDEV_FILE);
}
return rc; return rc;
} }
......
...@@ -925,6 +925,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ...@@ -925,6 +925,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
error = -EACCES; error = -EACCES;
if (nd.mnt->mnt_flags & MNT_NODEV) if (nd.mnt->mnt_flags & MNT_NODEV)
goto out; goto out;
bd_acquire(inode);
bdev = inode->i_bdev; bdev = inode->i_bdev;
bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) ); bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
if (bdops) bdev->bd_op = bdops; if (bdops) bdev->bd_op = bdops;
...@@ -982,8 +983,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ...@@ -982,8 +983,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
if (!fs_type->read_super(s, data, 0)) if (!fs_type->read_super(s, data, 0))
goto out_fail; goto out_fail;
unlock_super(s); unlock_super(s);
/* tell bdcache that we are going to keep this one */
atomic_inc(&bdev->bd_count);
get_filesystem(fs_type); get_filesystem(fs_type);
path_release(&nd); path_release(&nd);
return s; return s;
...@@ -1128,10 +1127,9 @@ static void kill_super(struct super_block *sb) ...@@ -1128,10 +1127,9 @@ static void kill_super(struct super_block *sb)
sb->s_type = NULL; sb->s_type = NULL;
unlock_super(sb); unlock_super(sb);
unlock_kernel(); unlock_kernel();
if (bdev) { if (bdev)
blkdev_put(bdev, BDEV_FS); blkdev_put(bdev, BDEV_FS);
bdput(bdev); else
} else
put_unnamed_dev(dev); put_unnamed_dev(dev);
spin_lock(&sb_lock); spin_lock(&sb_lock);
list_del(&sb->s_list); list_del(&sb->s_list);
...@@ -1718,6 +1716,7 @@ void __init mount_root(void) ...@@ -1718,6 +1716,7 @@ void __init mount_root(void)
if (!ROOT_DEV) if (!ROOT_DEV)
panic("I have no root and I want to scream"); panic("I have no root and I want to scream");
retry:
bdev = bdget(kdev_t_to_nr(ROOT_DEV)); bdev = bdget(kdev_t_to_nr(ROOT_DEV));
if (!bdev) if (!bdev)
panic(__FUNCTION__ ": unable to allocate root device"); panic(__FUNCTION__ ": unable to allocate root device");
...@@ -1729,7 +1728,7 @@ void __init mount_root(void) ...@@ -1729,7 +1728,7 @@ void __init mount_root(void)
retval = blkdev_get(bdev, mode, 0, BDEV_FS); retval = blkdev_get(bdev, mode, 0, BDEV_FS);
if (retval == -EROFS) { if (retval == -EROFS) {
root_mountflags |= MS_RDONLY; root_mountflags |= MS_RDONLY;
retval = blkdev_get(bdev, FMODE_READ, 0, BDEV_FS); goto retry;
} }
if (retval) { if (retval) {
/* /*
...@@ -1977,6 +1976,7 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) ...@@ -1977,6 +1976,7 @@ int __init change_root(kdev_t new_root_dev,const char *put_old)
int blivet; int blivet;
struct block_device *ramdisk = old_rootmnt->mnt_sb->s_bdev; struct block_device *ramdisk = old_rootmnt->mnt_sb->s_bdev;
atomic_inc(&ramdisk->bd_count);
blivet = blkdev_get(ramdisk, FMODE_READ, 0, BDEV_FS); blivet = blkdev_get(ramdisk, FMODE_READ, 0, BDEV_FS);
printk(KERN_NOTICE "Trying to unmount old root ... "); printk(KERN_NOTICE "Trying to unmount old root ... ");
if (!blivet) { if (!blivet) {
......
...@@ -415,6 +415,7 @@ struct block_device { ...@@ -415,6 +415,7 @@ struct block_device {
int bd_cache_openers; int bd_cache_openers;
const struct block_device_operations *bd_op; const struct block_device_operations *bd_op;
struct semaphore bd_sem; /* open/close mutex */ struct semaphore bd_sem; /* open/close mutex */
struct list_head bd_inodes;
}; };
struct inode { struct inode {
...@@ -452,6 +453,7 @@ struct inode { ...@@ -452,6 +453,7 @@ struct inode {
int i_mapping_overload; int i_mapping_overload;
struct dquot *i_dquot[MAXQUOTAS]; struct dquot *i_dquot[MAXQUOTAS];
/* These three should probably be a union */ /* These three should probably be a union */
struct list_head i_devices;
struct pipe_inode_info *i_pipe; struct pipe_inode_info *i_pipe;
struct block_device *i_bdev; struct block_device *i_bdev;
struct char_device *i_cdev; struct char_device *i_cdev;
...@@ -1046,6 +1048,8 @@ enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW}; ...@@ -1046,6 +1048,8 @@ enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW};
extern int register_blkdev(unsigned int, const char *, struct block_device_operations *); extern int register_blkdev(unsigned int, const char *, struct block_device_operations *);
extern int unregister_blkdev(unsigned int, const char *); extern int unregister_blkdev(unsigned int, const char *);
extern struct block_device *bdget(dev_t); extern struct block_device *bdget(dev_t);
extern int bd_acquire(struct inode *inode);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *); extern void bdput(struct block_device *);
extern struct char_device *cdget(dev_t); extern struct char_device *cdget(dev_t);
extern void cdput(struct char_device *); extern void cdput(struct char_device *);
......
...@@ -274,7 +274,6 @@ typedef struct page { ...@@ -274,7 +274,6 @@ typedef struct page {
#define PG_active 6 #define PG_active 6
#define PG_inactive 7 #define PG_inactive 7
#define PG_slab 8 #define PG_slab 8
#define PG_swap_cache 9
#define PG_skip 10 #define PG_skip 10
#define PG_highmem 11 #define PG_highmem 11
#define PG_checked 12 /* kill me in 2.5.<early>. */ #define PG_checked 12 /* kill me in 2.5.<early>. */
...@@ -326,18 +325,9 @@ static inline void set_page_dirty(struct page * page) ...@@ -326,18 +325,9 @@ static inline void set_page_dirty(struct page * page)
#define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags) #define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags)
#define PageTestandClearDecrAfter(page) test_and_clear_bit(PG_decr_after, &(page)->flags) #define PageTestandClearDecrAfter(page) test_and_clear_bit(PG_decr_after, &(page)->flags)
#define PageSlab(page) test_bit(PG_slab, &(page)->flags) #define PageSlab(page) test_bit(PG_slab, &(page)->flags)
#define PageSwapCache(page) test_bit(PG_swap_cache, &(page)->flags)
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) #define PageSetSlab(page) set_bit(PG_slab, &(page)->flags)
#define PageSetSwapCache(page) set_bit(PG_swap_cache, &(page)->flags) #define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define PageTestandSetSwapCache(page) test_and_set_bit(PG_swap_cache, &(page)->flags)
#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
#define PageClearSwapCache(page) clear_bit(PG_swap_cache, &(page)->flags)
#define PageTestandClearSwapCache(page) test_and_clear_bit(PG_swap_cache, &(page)->flags)
#define PageActive(page) test_bit(PG_active, &(page)->flags) #define PageActive(page) test_bit(PG_active, &(page)->flags)
#define SetPageActive(page) set_bit(PG_active, &(page)->flags) #define SetPageActive(page) set_bit(PG_active, &(page)->flags)
...@@ -465,6 +455,9 @@ extern void show_mem(void); ...@@ -465,6 +455,9 @@ extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val); extern void si_meminfo(struct sysinfo * val);
extern void swapin_readahead(swp_entry_t); extern void swapin_readahead(swp_entry_t);
extern struct address_space swapper_space;
#define PageSwapCache(page) ((page)->mapping == &swapper_space)
static inline int is_page_cache_freeable(struct page * page) static inline int is_page_cache_freeable(struct page * page)
{ {
return page_count(page) - !!page->buffers == 1; return page_count(page) - !!page->buffers == 1;
...@@ -476,15 +469,13 @@ static inline int is_page_cache_freeable(struct page * page) ...@@ -476,15 +469,13 @@ static inline int is_page_cache_freeable(struct page * page)
*/ */
static inline int exclusive_swap_page(struct page *page) static inline int exclusive_swap_page(struct page *page)
{ {
unsigned int count;
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
if (!PageSwapCache(page)) if (!PageSwapCache(page))
return 0; return 0;
count = page_count(page) - !!page->buffers; /* 2: us + swap cache */ if (page_count(page) - !!page->buffers != 2) /* 2: us + cache */
count += swap_count(page); /* +1: just swap cache */ return 0;
return count == 3; /* =3: total */ return swap_count(page) == 1; /* 1: just cache */
} }
extern void __free_pte(pte_t); extern void __free_pte(pte_t);
...@@ -565,11 +556,10 @@ extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); ...@@ -565,11 +556,10 @@ extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) #define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
#define GFP_ATOMIC (__GFP_HIGH) #define GFP_ATOMIC (__GFP_HIGH)
#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO \ #define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
| __GFP_FS | __GFP_HIGHMEM)
#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KSWAPD ( __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */ platforms, used as appropriate on others */
......
...@@ -84,7 +84,6 @@ extern unsigned int nr_free_buffer_pages(void); ...@@ -84,7 +84,6 @@ extern unsigned int nr_free_buffer_pages(void);
extern int nr_active_pages; extern int nr_active_pages;
extern int nr_inactive_pages; extern int nr_inactive_pages;
extern atomic_t nr_async_pages; extern atomic_t nr_async_pages;
extern struct address_space swapper_space;
extern atomic_t page_cache_size; extern atomic_t page_cache_size;
extern atomic_t buffermem_pages; extern atomic_t buffermem_pages;
extern spinlock_t pagecache_lock; extern spinlock_t pagecache_lock;
...@@ -122,35 +121,27 @@ extern void rw_swap_page_nolock(int, swp_entry_t, char *); ...@@ -122,35 +121,27 @@ extern void rw_swap_page_nolock(int, swp_entry_t, char *);
/* linux/mm/swap_state.c */ /* linux/mm/swap_state.c */
extern void show_swap_cache_info(void); extern void show_swap_cache_info(void);
extern void add_to_swap_cache(struct page *, swp_entry_t); extern void add_to_swap_cache(struct page *, swp_entry_t);
extern int swap_check_entry(unsigned long); extern void __delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache(struct page *page);
extern void free_page_and_swap_cache(struct page *page);
extern struct page * lookup_swap_cache(swp_entry_t); extern struct page * lookup_swap_cache(swp_entry_t);
extern struct page * read_swap_cache_async(swp_entry_t); extern struct page * read_swap_cache_async(swp_entry_t);
/* linux/mm/oom_kill.c */ /* linux/mm/oom_kill.c */
extern void oom_kill(void); extern void oom_kill(void);
/*
* Make these inline later once they are working properly.
*/
extern void __delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache_nolock(struct page *page);
extern void free_page_and_swap_cache(struct page *page);
/* linux/mm/swapfile.c */ /* linux/mm/swapfile.c */
extern unsigned int nr_swapfiles; extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[]; extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t); extern int is_swap_partition(kdev_t);
extern void si_swapinfo(struct sysinfo *); extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t __get_swap_page(unsigned short); extern swp_entry_t get_swap_page(void);
extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *, extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
struct inode **); struct inode **);
extern int swap_duplicate(swp_entry_t); extern int swap_duplicate(swp_entry_t);
extern int swap_count(struct page *); extern int swap_count(struct page *);
extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int valid_swaphandles(swp_entry_t, unsigned long *);
#define get_swap_page() __get_swap_page(1) extern void swap_free(swp_entry_t);
extern void __swap_free(swp_entry_t, unsigned short);
#define swap_free(entry) __swap_free((entry), 1)
struct swap_list_t { struct swap_list_t {
int head; /* head of priority-ordered swapfile list */ int head; /* head of priority-ordered swapfile list */
int next; /* swapfile to be used next */ int next; /* swapfile to be used next */
......
...@@ -1704,6 +1704,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, ...@@ -1704,6 +1704,7 @@ struct page * filemap_nopage(struct vm_area_struct * area,
* and possibly copy it over to another page.. * and possibly copy it over to another page..
*/ */
old_page = page; old_page = page;
mark_page_accessed(page);
if (no_share) { if (no_share) {
struct page *new_page = alloc_page(GFP_HIGHUSER); struct page *new_page = alloc_page(GFP_HIGHUSER);
...@@ -2553,7 +2554,6 @@ struct page *__read_cache_page(struct address_space *mapping, ...@@ -2553,7 +2554,6 @@ struct page *__read_cache_page(struct address_space *mapping,
} }
if (cached_page) if (cached_page)
page_cache_release(cached_page); page_cache_release(cached_page);
mark_page_accessed(page);
return page; return page;
} }
...@@ -2571,7 +2571,10 @@ struct page *read_cache_page(struct address_space *mapping, ...@@ -2571,7 +2571,10 @@ struct page *read_cache_page(struct address_space *mapping,
retry: retry:
page = __read_cache_page(mapping, index, filler, data); page = __read_cache_page(mapping, index, filler, data);
if (IS_ERR(page) || Page_Uptodate(page)) if (IS_ERR(page))
goto out;
mark_page_accessed(page);
if (Page_Uptodate(page))
goto out; goto out;
lock_page(page); lock_page(page);
...@@ -2835,6 +2838,7 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2835,6 +2838,7 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
unlock: unlock:
kunmap(page); kunmap(page);
/* Mark it unlocked again and drop the page.. */ /* Mark it unlocked again and drop the page.. */
SetPageReferenced(page);
UnlockPage(page); UnlockPage(page);
page_cache_release(page); page_cache_release(page);
......
...@@ -85,8 +85,6 @@ void __free_pte(pte_t pte) ...@@ -85,8 +85,6 @@ void __free_pte(pte_t pte)
if (page->mapping) { if (page->mapping) {
if (pte_dirty(pte)) if (pte_dirty(pte))
set_page_dirty(page); set_page_dirty(page);
if (pte_young(pte))
mark_page_accessed(page);
} }
free_page_and_swap_cache(page); free_page_and_swap_cache(page);
...@@ -939,10 +937,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -939,10 +937,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
break; break;
/* Recheck swapcachedness once the page is locked */ /* Recheck swapcachedness once the page is locked */
can_reuse = exclusive_swap_page(old_page); can_reuse = exclusive_swap_page(old_page);
#if 1
if (can_reuse) if (can_reuse)
delete_from_swap_cache_nolock(old_page); delete_from_swap_cache(old_page);
#endif
UnlockPage(old_page); UnlockPage(old_page);
if (!can_reuse) if (!can_reuse)
break; break;
...@@ -1088,23 +1084,19 @@ void swapin_readahead(swp_entry_t entry) ...@@ -1088,23 +1084,19 @@ void swapin_readahead(swp_entry_t entry)
unsigned long offset; unsigned long offset;
/* /*
* Get the number of handles we should do readahead io to. Also, * Get the number of handles we should do readahead io to.
* grab temporary references on them, releasing them as io completes.
*/ */
num = valid_swaphandles(entry, &offset); num = valid_swaphandles(entry, &offset);
for (i = 0; i < num; offset++, i++) { for (i = 0; i < num; offset++, i++) {
/* Don't block on I/O for read-ahead */ /* Don't block on I/O for read-ahead */
if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster if (atomic_read(&nr_async_pages) >=
* (1 << page_cluster)) { pager_daemon.swap_cluster << page_cluster)
while (i++ < num)
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++));
break; break;
}
/* Ok, do the async read-ahead now */ /* Ok, do the async read-ahead now */
new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
if (new_page != NULL) if (!new_page)
page_cache_release(new_page); break;
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset)); page_cache_release(new_page);
} }
return; return;
} }
...@@ -1164,11 +1156,12 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1164,11 +1156,12 @@ static int do_swap_page(struct mm_struct * mm,
pte = mk_pte(page, vma->vm_page_prot); pte = mk_pte(page, vma->vm_page_prot);
swap_free(entry); swap_free(entry);
mark_page_accessed(page);
if (exclusive_swap_page(page)) { if (exclusive_swap_page(page)) {
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
pte = pte_mkwrite(pte); pte = pte_mkwrite(pte);
pte = pte_mkdirty(pte); pte = pte_mkdirty(pte);
delete_from_swap_cache_nolock(page); delete_from_swap_cache(page);
} }
UnlockPage(page); UnlockPage(page);
......
...@@ -234,45 +234,55 @@ static int shmem_writepage(struct page * page) ...@@ -234,45 +234,55 @@ static int shmem_writepage(struct page * page)
int error; int error;
struct shmem_inode_info *info; struct shmem_inode_info *info;
swp_entry_t *entry, swap; swp_entry_t *entry, swap;
struct address_space *mapping;
unsigned long index;
struct inode *inode; struct inode *inode;
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
inode = page->mapping->host; mapping = page->mapping;
index = page->index;
inode = mapping->host;
info = &inode->u.shmem_i; info = &inode->u.shmem_i;
swap = __get_swap_page(2);
error = -ENOMEM;
if (!swap.val) {
activate_page(page);
SetPageDirty(page);
goto out;
}
spin_lock(&info->lock); spin_lock(&info->lock);
entry = shmem_swp_entry(info, page->index); entry = shmem_swp_entry(info, index);
if (IS_ERR(entry)) /* this had been allocted on page allocation */ if (IS_ERR(entry)) /* this had been allocated on page allocation */
BUG(); BUG();
shmem_recalc_inode(page->mapping->host); shmem_recalc_inode(inode);
error = -EAGAIN;
if (entry->val) if (entry->val)
BUG(); BUG();
*entry = swap; /* Remove it from the page cache */
error = 0;
/* Remove the from the page cache */
lru_cache_del(page); lru_cache_del(page);
remove_inode_page(page); remove_inode_page(page);
swap_list_lock();
swap = get_swap_page();
if (!swap.val) {
swap_list_unlock();
/* Add it back to the page cache */
add_to_page_cache_locked(page, mapping, index);
activate_page(page);
SetPageDirty(page);
error = -ENOMEM;
goto out;
}
/* Add it to the swap cache */ /* Add it to the swap cache */
add_to_swap_cache(page, swap); add_to_swap_cache(page, swap);
page_cache_release(page); swap_list_unlock();
info->swapped++;
spin_unlock(&info->lock);
set_page_dirty(page); set_page_dirty(page);
info->swapped++;
*entry = swap;
error = 0;
out: out:
spin_unlock(&info->lock);
UnlockPage(page); UnlockPage(page);
page_cache_release(page);
return error; return error;
} }
...@@ -356,7 +366,7 @@ static struct page * shmem_getpage_locked(struct inode * inode, unsigned long id ...@@ -356,7 +366,7 @@ static struct page * shmem_getpage_locked(struct inode * inode, unsigned long id
swap_free(*entry); swap_free(*entry);
*entry = (swp_entry_t) {0}; *entry = (swp_entry_t) {0};
delete_from_swap_cache_nolock(page); delete_from_swap_cache(page);
flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1); flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
page->flags = flags | (1 << PG_dirty); page->flags = flags | (1 << PG_dirty);
add_to_page_cache_locked(page, mapping, idx); add_to_page_cache_locked(page, mapping, idx);
......
...@@ -54,7 +54,6 @@ void deactivate_page_nolock(struct page * page) ...@@ -54,7 +54,6 @@ void deactivate_page_nolock(struct page * page)
del_page_from_active_list(page); del_page_from_active_list(page);
add_page_to_inactive_list(page); add_page_to_inactive_list(page);
} }
ClearPageReferenced(page);
} }
void deactivate_page(struct page * page) void deactivate_page(struct page * page)
...@@ -73,7 +72,6 @@ void activate_page_nolock(struct page * page) ...@@ -73,7 +72,6 @@ void activate_page_nolock(struct page * page)
del_page_from_inactive_list(page); del_page_from_inactive_list(page);
add_page_to_active_list(page); add_page_to_active_list(page);
} }
SetPageReferenced(page);
} }
void activate_page(struct page * page) void activate_page(struct page * page)
......
...@@ -23,17 +23,11 @@ ...@@ -23,17 +23,11 @@
*/ */
static int swap_writepage(struct page *page) static int swap_writepage(struct page *page)
{ {
/* One for the page cache, one for this user, one for page->buffers */ if (exclusive_swap_page(page)) {
if (page_count(page) > 2 + !!page->buffers) delete_from_swap_cache(page);
goto in_use; UnlockPage(page);
if (swap_count(page) > 1) return 0;
goto in_use; }
delete_from_swap_cache_nolock(page);
UnlockPage(page);
return 0;
in_use:
rw_swap_page(WRITE, page); rw_swap_page(WRITE, page);
return 0; return 0;
} }
...@@ -75,8 +69,6 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry) ...@@ -75,8 +69,6 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
#endif #endif
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
if (PageTestandSetSwapCache(page))
BUG();
if (page->mapping) if (page->mapping)
BUG(); BUG();
...@@ -92,51 +84,42 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry) ...@@ -92,51 +84,42 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
*/ */
void __delete_from_swap_cache(struct page *page) void __delete_from_swap_cache(struct page *page)
{ {
struct address_space *mapping = page->mapping;
swp_entry_t entry;
#ifdef SWAP_CACHE_INFO #ifdef SWAP_CACHE_INFO
swap_cache_del_total++; swap_cache_del_total++;
#endif #endif
if (mapping != &swapper_space) if (!PageLocked(page))
BUG(); BUG();
if (!PageSwapCache(page) || !PageLocked(page)) if (!PageSwapCache(page))
BUG(); BUG();
entry.val = page->index;
PageClearSwapCache(page);
ClearPageDirty(page); ClearPageDirty(page);
__remove_inode_page(page); __remove_inode_page(page);
swap_free(entry);
} }
/* /*
* This will never put the page into the free list, the caller has * This must be called only on pages that have
* a reference on the page. * been verified to be in the swap cache and locked.
* It will never put the page into the free list,
* the caller has a reference on the page.
*/ */
void delete_from_swap_cache_nolock(struct page *page) void delete_from_swap_cache(struct page *page)
{ {
swp_entry_t entry;
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
if (block_flushpage(page, 0)) if (block_flushpage(page, 0))
lru_cache_del(page); lru_cache_del(page);
entry.val = page->index;
spin_lock(&pagecache_lock); spin_lock(&pagecache_lock);
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
spin_unlock(&pagecache_lock); spin_unlock(&pagecache_lock);
page_cache_release(page);
}
/* swap_free(entry);
* This must be called only on pages that have page_cache_release(page);
* been verified to be in the swap cache and locked.
*/
void delete_from_swap_cache(struct page *page)
{
lock_page(page);
delete_from_swap_cache_nolock(page);
UnlockPage(page);
} }
/* /*
...@@ -156,7 +139,7 @@ void free_page_and_swap_cache(struct page *page) ...@@ -156,7 +139,7 @@ void free_page_and_swap_cache(struct page *page)
*/ */
if (PageSwapCache(page) && !TryLockPage(page)) { if (PageSwapCache(page) && !TryLockPage(page)) {
if (exclusive_swap_page(page)) if (exclusive_swap_page(page))
delete_from_swap_cache_nolock(page); delete_from_swap_cache(page);
UnlockPage(page); UnlockPage(page);
} }
page_cache_release(page); page_cache_release(page);
...@@ -213,19 +196,24 @@ struct page * read_swap_cache_async(swp_entry_t entry) ...@@ -213,19 +196,24 @@ struct page * read_swap_cache_async(swp_entry_t entry)
new_page = alloc_page(GFP_HIGHUSER); new_page = alloc_page(GFP_HIGHUSER);
if (!new_page) if (!new_page)
goto out; /* Out of memory */ goto out; /* Out of memory */
if (TryLockPage(new_page))
BUG();
/* /*
* Check the swap cache again, in case we stalled above. * Check the swap cache again, in case we stalled above.
* The BKL is guarding against races between this check * swap_list_lock is guarding against races between this check
* and where the new page is added to the swap cache below. * and where the new page is added to the swap cache below.
* It is also guarding against race where try_to_swap_out
* allocates entry with get_swap_page then adds to cache.
*/ */
swap_list_lock();
found_page = __find_get_page(&swapper_space, entry.val, hash); found_page = __find_get_page(&swapper_space, entry.val, hash);
if (found_page) if (found_page)
goto out_free_page; goto out_free_page;
/* /*
* Make sure the swap entry is still in use. It could have gone * Make sure the swap entry is still in use. It could have gone
* while caller waited for BKL, or while allocating page above, * since caller dropped page_table_lock, while allocating page above,
* or while allocating page in prior call via swapin_readahead. * or while allocating page in prior call via swapin_readahead.
*/ */
if (!swap_duplicate(entry)) /* Account for the swap cache */ if (!swap_duplicate(entry)) /* Account for the swap cache */
...@@ -234,13 +222,15 @@ struct page * read_swap_cache_async(swp_entry_t entry) ...@@ -234,13 +222,15 @@ struct page * read_swap_cache_async(swp_entry_t entry)
/* /*
* Add it to the swap cache and read its contents. * Add it to the swap cache and read its contents.
*/ */
if (TryLockPage(new_page))
BUG();
add_to_swap_cache(new_page, entry); add_to_swap_cache(new_page, entry);
swap_list_unlock();
rw_swap_page(READ, new_page); rw_swap_page(READ, new_page);
return new_page; return new_page;
out_free_page: out_free_page:
swap_list_unlock();
UnlockPage(new_page);
page_cache_release(new_page); page_cache_release(new_page);
out: out:
return found_page; return found_page;
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/shm.h> #include <linux/shm.h>
#include <linux/compiler.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -33,7 +34,7 @@ struct swap_info_struct swap_info[MAX_SWAPFILES]; ...@@ -33,7 +34,7 @@ struct swap_info_struct swap_info[MAX_SWAPFILES];
#define SWAPFILE_CLUSTER 256 #define SWAPFILE_CLUSTER 256
static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count) static inline int scan_swap_map(struct swap_info_struct *si)
{ {
unsigned long offset; unsigned long offset;
/* /*
...@@ -86,7 +87,8 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun ...@@ -86,7 +87,8 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun
si->lowest_bit = si->max; si->lowest_bit = si->max;
si->highest_bit = 0; si->highest_bit = 0;
} }
si->swap_map[offset] = count; /* Initial count 1 for user reference + 1 for swap cache */
si->swap_map[offset] = 2;
nr_swap_pages--; nr_swap_pages--;
si->cluster_next = offset+1; si->cluster_next = offset+1;
return offset; return offset;
...@@ -96,7 +98,12 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun ...@@ -96,7 +98,12 @@ static inline int scan_swap_map(struct swap_info_struct *si, unsigned short coun
return 0; return 0;
} }
swp_entry_t __get_swap_page(unsigned short count) /*
* Callers of get_swap_page must hold swap_list_lock across the call,
* and across the following add_to_swap_cache, to guard against races
* with read_swap_cache_async.
*/
swp_entry_t get_swap_page(void)
{ {
struct swap_info_struct * p; struct swap_info_struct * p;
unsigned long offset; unsigned long offset;
...@@ -104,20 +111,17 @@ swp_entry_t __get_swap_page(unsigned short count) ...@@ -104,20 +111,17 @@ swp_entry_t __get_swap_page(unsigned short count)
int type, wrapped = 0; int type, wrapped = 0;
entry.val = 0; /* Out of memory */ entry.val = 0; /* Out of memory */
if (count >= SWAP_MAP_MAX)
goto bad_count;
swap_list_lock();
type = swap_list.next; type = swap_list.next;
if (type < 0) if (type < 0)
goto out; goto out;
if (nr_swap_pages == 0) if (nr_swap_pages <= 0)
goto out; goto out;
while (1) { while (1) {
p = &swap_info[type]; p = &swap_info[type];
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
swap_device_lock(p); swap_device_lock(p);
offset = scan_swap_map(p, count); offset = scan_swap_map(p);
swap_device_unlock(p); swap_device_unlock(p);
if (offset) { if (offset) {
entry = SWP_ENTRY(type,offset); entry = SWP_ENTRY(type,offset);
...@@ -142,21 +146,14 @@ swp_entry_t __get_swap_page(unsigned short count) ...@@ -142,21 +146,14 @@ swp_entry_t __get_swap_page(unsigned short count)
goto out; /* out of swap space */ goto out; /* out of swap space */
} }
out: out:
swap_list_unlock();
return entry;
bad_count:
printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
count, __builtin_return_address(0));
return entry; return entry;
} }
/* /*
* Caller has made sure that the swapdevice corresponding to entry * Caller has made sure that the swapdevice corresponding to entry
* is still around or has not been recycled. * is still around or has not been recycled.
*/ */
void __swap_free(swp_entry_t entry, unsigned short count) void swap_free(swp_entry_t entry)
{ {
struct swap_info_struct * p; struct swap_info_struct * p;
unsigned long offset, type; unsigned long offset, type;
...@@ -180,9 +177,7 @@ void __swap_free(swp_entry_t entry, unsigned short count) ...@@ -180,9 +177,7 @@ void __swap_free(swp_entry_t entry, unsigned short count)
swap_list.next = type; swap_list.next = type;
swap_device_lock(p); swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) { if (p->swap_map[offset] < SWAP_MAP_MAX) {
if (p->swap_map[offset] < count) if (!--(p->swap_map[offset])) {
goto bad_count;
if (!(p->swap_map[offset] -= count)) {
if (offset < p->lowest_bit) if (offset < p->lowest_bit)
p->lowest_bit = offset; p->lowest_bit = offset;
if (offset > p->highest_bit) if (offset > p->highest_bit)
...@@ -207,11 +202,6 @@ void __swap_free(swp_entry_t entry, unsigned short count) ...@@ -207,11 +202,6 @@ void __swap_free(swp_entry_t entry, unsigned short count)
bad_free: bad_free:
printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
goto out; goto out;
bad_count:
swap_device_unlock(p);
swap_list_unlock();
printk(KERN_ERR "swap_free: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
goto out;
} }
/* /*
...@@ -229,9 +219,9 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, ...@@ -229,9 +219,9 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
{ {
pte_t pte = *dir; pte_t pte = *dir;
if (pte_to_swp_entry(pte).val != entry.val) if (likely(pte_to_swp_entry(pte).val != entry.val))
return; return;
if (pte_none(pte) || pte_present(pte)) if (unlikely(pte_none(pte) || pte_present(pte)))
return; return;
get_page(page); get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
...@@ -458,7 +448,7 @@ static int try_to_unuse(unsigned int type) ...@@ -458,7 +448,7 @@ static int try_to_unuse(unsigned int type)
*/ */
lock_page(page); lock_page(page);
if (PageSwapCache(page)) if (PageSwapCache(page))
delete_from_swap_cache_nolock(page); delete_from_swap_cache(page);
SetPageDirty(page); SetPageDirty(page);
UnlockPage(page); UnlockPage(page);
flush_page_to_ram(page); flush_page_to_ram(page);
...@@ -567,14 +557,8 @@ asmlinkage long sys_swapoff(const char * specialfile) ...@@ -567,14 +557,8 @@ asmlinkage long sys_swapoff(const char * specialfile)
for (type = swap_list.head; type >= 0; type = swap_info[type].next) { for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
p = swap_info + type; p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
if (p->swap_file) { if (p->swap_file == nd.dentry)
if (p->swap_file == nd.dentry) break;
break;
} else {
if (S_ISBLK(nd.dentry->d_inode->i_mode)
&& (p->swap_device == nd.dentry->d_inode->i_rdev))
break;
}
} }
prev = type; prev = type;
} }
...@@ -616,19 +600,21 @@ asmlinkage long sys_swapoff(const char * specialfile) ...@@ -616,19 +600,21 @@ asmlinkage long sys_swapoff(const char * specialfile)
goto out_dput; goto out_dput;
} }
if (p->swap_device) if (p->swap_device)
blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP); blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
path_release(&nd); path_release(&nd);
swap_list_lock(); swap_list_lock();
nd.dentry = p->swap_file; swap_device_lock(p);
p->swap_file = NULL;
nd.mnt = p->swap_vfsmnt; nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file;
p->swap_vfsmnt = NULL; p->swap_vfsmnt = NULL;
p->swap_file = NULL;
p->swap_device = 0; p->swap_device = 0;
p->max = 0; p->max = 0;
swap_map = p->swap_map; swap_map = p->swap_map;
p->swap_map = NULL; p->swap_map = NULL;
p->flags = 0; p->flags = 0;
swap_device_unlock(p);
swap_list_unlock(); swap_list_unlock();
vfree(swap_map); vfree(swap_map);
err = 0; err = 0;
...@@ -711,6 +697,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -711,6 +697,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
unsigned long maxpages = 1; unsigned long maxpages = 1;
int swapfilesize; int swapfilesize;
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
unsigned short *swap_map;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
...@@ -760,6 +747,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -760,6 +747,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
p->swap_device = dev; p->swap_device = dev;
set_blocksize(dev, PAGE_SIZE); set_blocksize(dev, PAGE_SIZE);
bd_acquire(swap_inode);
bdev = swap_inode->i_bdev; bdev = swap_inode->i_bdev;
bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode)); bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode));
if (bdops) bdev->bd_op = bdops; if (bdops) bdev->bd_op = bdops;
...@@ -772,29 +760,24 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -772,29 +760,24 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (!dev || (blk_size[MAJOR(dev)] && if (!dev || (blk_size[MAJOR(dev)] &&
!blk_size[MAJOR(dev)][MINOR(dev)])) !blk_size[MAJOR(dev)][MINOR(dev)]))
goto bad_swap; goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type)
continue;
if (dev == swap_info[i].swap_device)
goto bad_swap;
}
swapfilesize = 0; swapfilesize = 0;
if (blk_size[MAJOR(dev)]) if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10); >> (PAGE_SHIFT - 10);
} else if (S_ISREG(swap_inode->i_mode)) { } else if (S_ISREG(swap_inode->i_mode))
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type || !swap_info[i].swap_file)
continue;
if (swap_inode == swap_info[i].swap_file->d_inode)
goto bad_swap;
}
swapfilesize = swap_inode->i_size >> PAGE_SHIFT; swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
} else else
goto bad_swap; goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
struct swap_info_struct *q = &swap_info[i];
if (i == type || !q->swap_file)
continue;
if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
goto bad_swap;
}
swap_header = (void *) __get_free_page(GFP_USER); swap_header = (void *) __get_free_page(GFP_USER);
if (!swap_header) { if (!swap_header) {
printk("Unable to start swapping: out of memory :-)\n"); printk("Unable to start swapping: out of memory :-)\n");
...@@ -900,6 +883,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -900,6 +883,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} }
p->swap_map[0] = SWAP_MAP_BAD; p->swap_map[0] = SWAP_MAP_BAD;
swap_list_lock(); swap_list_lock();
swap_device_lock(p);
p->max = maxpages; p->max = maxpages;
p->flags = SWP_WRITEOK; p->flags = SWP_WRITEOK;
p->pages = nr_good_pages; p->pages = nr_good_pages;
...@@ -922,6 +906,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -922,6 +906,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} else { } else {
swap_info[prev].next = p - swap_info; swap_info[prev].next = p - swap_info;
} }
swap_device_unlock(p);
swap_list_unlock(); swap_list_unlock();
error = 0; error = 0;
goto out; goto out;
...@@ -929,11 +914,10 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -929,11 +914,10 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (bdev) if (bdev)
blkdev_put(bdev, BDEV_SWAP); blkdev_put(bdev, BDEV_SWAP);
bad_swap_2: bad_swap_2:
if (p->swap_map) swap_list_lock();
vfree(p->swap_map); swap_map = p->swap_map;
nd.mnt = p->swap_vfsmnt; nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file; nd.dentry = p->swap_file;
swap_list_lock();
p->swap_device = 0; p->swap_device = 0;
p->swap_file = NULL; p->swap_file = NULL;
p->swap_vfsmnt = NULL; p->swap_vfsmnt = NULL;
...@@ -942,6 +926,8 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -942,6 +926,8 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (!(swap_flags & SWAP_FLAG_PREFER)) if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority; ++least_priority;
swap_list_unlock(); swap_list_unlock();
if (swap_map)
vfree(swap_map);
path_release(&nd); path_release(&nd);
out: out:
if (swap_header) if (swap_header)
...@@ -987,43 +973,31 @@ int swap_duplicate(swp_entry_t entry) ...@@ -987,43 +973,31 @@ int swap_duplicate(swp_entry_t entry)
unsigned long offset, type; unsigned long offset, type;
int result = 0; int result = 0;
/* Swap entry 0 is illegal */
if (!entry.val)
goto out;
type = SWP_TYPE(entry); type = SWP_TYPE(entry);
if (type >= nr_swapfiles) if (type >= nr_swapfiles)
goto bad_file; goto bad_file;
p = type + swap_info; p = type + swap_info;
offset = SWP_OFFSET(entry); offset = SWP_OFFSET(entry);
if (offset >= p->max)
goto bad_offset;
if (!p->swap_map[offset])
goto bad_unused;
/*
* Entry is valid, so increment the map count.
*/
swap_device_lock(p); swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) if (offset < p->max && p->swap_map[offset]) {
p->swap_map[offset]++; if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
else { p->swap_map[offset]++;
if (swap_overflow++ < 5) result = 1;
printk(KERN_WARNING "swap_dup: swap entry overflow\n"); } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
p->swap_map[offset] = SWAP_MAP_MAX; if (swap_overflow++ < 5)
printk(KERN_WARNING "swap_dup: swap entry overflow\n");
p->swap_map[offset] = SWAP_MAP_MAX;
result = 1;
}
} }
swap_device_unlock(p); swap_device_unlock(p);
result = 1;
out: out:
return result; return result;
bad_file: bad_file:
printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val); printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
goto out; goto out;
bad_offset:
/* Don't report: can happen in read_swap_cache_async after swapoff */
goto out;
bad_unused:
/* Don't report: can happen in read_swap_cache_async after blocking */
goto out;
} }
/* /*
...@@ -1068,7 +1042,7 @@ int swap_count(struct page *page) ...@@ -1068,7 +1042,7 @@ int swap_count(struct page *page)
} }
/* /*
* Kernel_lock protects against swap device deletion. * Prior swap_duplicate protects against swap device deletion.
*/ */
void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
kdev_t *dev, struct inode **swapf) kdev_t *dev, struct inode **swapf)
...@@ -1108,8 +1082,8 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, ...@@ -1108,8 +1082,8 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
} }
/* /*
* Kernel_lock protects against swap device deletion. Grab an extra * swap_device_lock prevents swap_map being freed. Don't grab an extra
* reference on the swaphandle so that it dos not become unused. * reference on the swaphandle, it doesn't matter if it becomes unused.
*/ */
int valid_swaphandles(swp_entry_t entry, unsigned long *offset) int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{ {
...@@ -1117,20 +1091,23 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) ...@@ -1117,20 +1091,23 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
unsigned long toff; unsigned long toff;
struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info; struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
*offset = SWP_OFFSET(entry); if (!page_cluster) /* no readahead */
toff = *offset = (*offset >> page_cluster) << page_cluster; return 0;
toff = (SWP_OFFSET(entry) >> page_cluster) << page_cluster;
if (!toff) /* first page is swap header */
toff++, i--;
*offset = toff;
swap_device_lock(swapdev); swap_device_lock(swapdev);
do { do {
/* Don't read-ahead past the end of the swap area */ /* Don't read-ahead past the end of the swap area */
if (toff >= swapdev->max) if (toff >= swapdev->max)
break; break;
/* Don't read in bad or busy pages */ /* Don't read in free or bad pages */
if (!swapdev->swap_map[toff]) if (!swapdev->swap_map[toff])
break; break;
if (swapdev->swap_map[toff] == SWAP_MAP_BAD) if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
break; break;
swapdev->swap_map[toff]++;
toff++; toff++;
ret++; ret++;
} while (--i); } while (--i);
......
...@@ -52,14 +52,9 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* ...@@ -52,14 +52,9 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
/* Don't look at this pte if it's been accessed recently. */ /* Don't look at this pte if it's been accessed recently. */
if (ptep_test_and_clear_young(page_table)) { if (ptep_test_and_clear_young(page_table)) {
flush_tlb_page(vma, address); flush_tlb_page(vma, address);
mark_page_accessed(page);
return 0; return 0;
} }
/* Don't bother with it if the page is otherwise active */
if (PageActive(page))
return 0;
if (TryLockPage(page)) if (TryLockPage(page))
return 0; return 0;
...@@ -85,8 +80,8 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* ...@@ -85,8 +80,8 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
entry.val = page->index; entry.val = page->index;
if (pte_dirty(pte)) if (pte_dirty(pte))
set_page_dirty(page); set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry); swap_duplicate(entry);
set_swap_pte:
set_pte(page_table, swp_entry_to_pte(entry)); set_pte(page_table, swp_entry_to_pte(entry));
drop_pte: drop_pte:
mm->rss--; mm->rss--;
...@@ -130,16 +125,18 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* ...@@ -130,16 +125,18 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
* we have the swap cache set up to associate the * we have the swap cache set up to associate the
* page with that swap entry. * page with that swap entry.
*/ */
swap_list_lock();
entry = get_swap_page(); entry = get_swap_page();
if (!entry.val) if (entry.val) {
goto out_unlock_restore; /* No swap space left */ /* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
/* Add it to the swap cache and mark it dirty */ swap_list_unlock();
add_to_swap_cache(page, entry); set_page_dirty(page);
set_page_dirty(page); goto set_swap_pte;
goto set_swap_pte; }
out_unlock_restore: /* No swap space left */
swap_list_unlock();
set_pte(page_table, pte); set_pte(page_table, pte);
UnlockPage(page); UnlockPage(page);
return 0; return 0;
...@@ -243,9 +240,9 @@ static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vm ...@@ -243,9 +240,9 @@ static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vm
struct mm_struct *swap_mm = &init_mm; struct mm_struct *swap_mm = &init_mm;
/* /*
* Returns non-zero if we scanned all `count' pages * Returns remaining count of pages to be swapped out by followup call.
*/ */
static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone_t * classzone) static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone)
{ {
unsigned long address; unsigned long address;
struct vm_area_struct* vma; struct vm_area_struct* vma;
...@@ -255,11 +252,12 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone ...@@ -255,11 +252,12 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone
* and ptes. * and ptes.
*/ */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
*race = 1;
if (swap_mm != mm)
goto out_unlock;
*race = 0;
address = mm->swap_address; address = mm->swap_address;
if (address == TASK_SIZE || swap_mm != mm) {
/* We raced: don't count this mm but try again */
++*mmcounter;
goto out_unlock;
}
vma = find_vma(mm, address); vma = find_vma(mm, address);
if (vma) { if (vma) {
if (address < vma->vm_start) if (address < vma->vm_start)
...@@ -267,31 +265,26 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone ...@@ -267,31 +265,26 @@ static inline int swap_out_mm(struct mm_struct * mm, int count, int * race, zone
for (;;) { for (;;) {
count = swap_out_vma(mm, vma, address, count, classzone); count = swap_out_vma(mm, vma, address, count, classzone);
if (!count)
goto out_unlock;
vma = vma->vm_next; vma = vma->vm_next;
if (!vma) if (!vma)
break; break;
if (!count)
goto out_unlock;
address = vma->vm_start; address = vma->vm_start;
} }
} }
/* Reset to 0 when we reach the end of address space */ /* Indicate that we reached the end of address space */
mm->swap_address = 0; mm->swap_address = TASK_SIZE;
spin_lock(&mmlist_lock);
swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
spin_unlock(&mmlist_lock);
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return count; return count;
} }
static int FASTCALL(swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)); static int FASTCALL(swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages));
static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages) static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)
{ {
int counter, race; int counter;
struct mm_struct *mm; struct mm_struct *mm;
/* Then, look at the other mm's */ /* Then, look at the other mm's */
...@@ -304,9 +297,10 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_ ...@@ -304,9 +297,10 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
spin_lock(&mmlist_lock); spin_lock(&mmlist_lock);
mm = swap_mm; mm = swap_mm;
if (mm == &init_mm) { while (mm->swap_address == TASK_SIZE || mm == &init_mm) {
mm->swap_address = 0;
mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
if (mm == &init_mm) if (mm == swap_mm)
goto empty; goto empty;
swap_mm = mm; swap_mm = mm;
} }
...@@ -315,13 +309,13 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_ ...@@ -315,13 +309,13 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_users);
spin_unlock(&mmlist_lock); spin_unlock(&mmlist_lock);
nr_pages = swap_out_mm(mm, nr_pages, &race, classzone); nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone);
mmput(mm); mmput(mm);
if (!nr_pages) if (!nr_pages)
return 1; return 1;
} while (race || --counter >= 0); } while (--counter >= 0);
return 0; return 0;
...@@ -330,15 +324,15 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_ ...@@ -330,15 +324,15 @@ static int swap_out(unsigned int priority, zone_t * classzone, unsigned int gfp_
return 0; return 0;
} }
static int FASTCALL(shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask)); static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask));
static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_scan, int nr_pages, zone_t * classzone, unsigned int gfp_mask) static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask)
{ {
struct list_head * entry; struct list_head * entry;
int __max_scan = *max_scan;
spin_lock(&pagemap_lru_lock); spin_lock(&pagemap_lru_lock);
while (__max_scan && this_max_scan && (entry = lru->prev) != lru) { while (max_scan && (entry = inactive_list.prev) != &inactive_list) {
struct page * page; struct page * page;
swp_entry_t swap;
if (unlikely(current->need_resched)) { if (unlikely(current->need_resched)) {
spin_unlock(&pagemap_lru_lock); spin_unlock(&pagemap_lru_lock);
...@@ -353,18 +347,16 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca ...@@ -353,18 +347,16 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
if (unlikely(!PageInactive(page) && !PageActive(page))) if (unlikely(!PageInactive(page) && !PageActive(page)))
BUG(); BUG();
this_max_scan--;
list_del(entry); list_del(entry);
list_add(entry, lru); list_add(entry, &inactive_list);
if (PageTestandClearReferenced(page)) if (PageTestandClearReferenced(page))
continue; continue;
max_scan--;
if (unlikely(!memclass(page->zone, classzone))) if (unlikely(!memclass(page->zone, classzone)))
continue; continue;
__max_scan--;
/* Racy check to avoid trylocking when not worthwhile */ /* Racy check to avoid trylocking when not worthwhile */
if (!page->buffers && page_count(page) != 1) if (!page->buffers && page_count(page) != 1)
continue; continue;
...@@ -479,14 +471,24 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca ...@@ -479,14 +471,24 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
} }
/* point of no return */ /* point of no return */
if (likely(!PageSwapCache(page))) if (likely(!PageSwapCache(page))) {
swap.val = 0;
__remove_inode_page(page); __remove_inode_page(page);
else } else {
swap.val = page->index;
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
}
spin_unlock(&pagecache_lock); spin_unlock(&pagecache_lock);
__lru_cache_del(page); __lru_cache_del(page);
if (unlikely(swap.val != 0)) {
/* must drop lru lock if getting swap_list lock */
spin_unlock(&pagemap_lru_lock);
swap_free(swap);
spin_lock(&pagemap_lru_lock);
}
UnlockPage(page); UnlockPage(page);
/* effectively free the page here */ /* effectively free the page here */
...@@ -498,7 +500,6 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca ...@@ -498,7 +500,6 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
} }
spin_unlock(&pagemap_lru_lock); spin_unlock(&pagemap_lru_lock);
*max_scan = __max_scan;
return nr_pages; return nr_pages;
} }
...@@ -509,14 +510,10 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca ...@@ -509,14 +510,10 @@ static int shrink_cache(struct list_head * lru, int * max_scan, int this_max_sca
* We move them the other way when we see the * We move them the other way when we see the
* reference bit on the page. * reference bit on the page.
*/ */
static void balance_inactive(int nr_pages) static void refill_inactive(int nr_pages)
{ {
struct list_head * entry; struct list_head * entry;
/* If we have more inactive pages than active don't do anything */
if (nr_active_pages < nr_inactive_pages)
return;
spin_lock(&pagemap_lru_lock); spin_lock(&pagemap_lru_lock);
entry = active_list.prev; entry = active_list.prev;
while (nr_pages-- && entry != &active_list) { while (nr_pages-- && entry != &active_list) {
...@@ -541,14 +538,17 @@ static void balance_inactive(int nr_pages) ...@@ -541,14 +538,17 @@ static void balance_inactive(int nr_pages)
static int FASTCALL(shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)); static int FASTCALL(shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages));
static int shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages) static int shrink_caches(int priority, zone_t * classzone, unsigned int gfp_mask, int nr_pages)
{ {
int max_scan = (nr_inactive_pages + nr_active_pages / DEF_PRIORITY) / priority; int max_scan = nr_inactive_pages / priority;
nr_pages -= kmem_cache_reap(gfp_mask); nr_pages -= kmem_cache_reap(gfp_mask);
if (nr_pages <= 0) if (nr_pages <= 0)
return 0; return 0;
balance_inactive(nr_pages); /* Do we want to age the active list? */
nr_pages = shrink_cache(&inactive_list, &max_scan, nr_inactive_pages, nr_pages, classzone, gfp_mask); if (nr_inactive_pages < nr_active_pages*2)
refill_inactive(nr_pages);
nr_pages = shrink_cache(nr_pages, max_scan, classzone, gfp_mask);
if (nr_pages <= 0) if (nr_pages <= 0)
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment