Commit 71660e15 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://linuxusb.bkbits.net/linus-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 31c96625 2dcb8ff9
/*
* Example showing how to pin down a range of virtual pages from user-space
* to be able to do for example DMA directly into them.
*
* It is necessary because the pages the virtual pointers reference, might
* not exist in memory (could be mapped to the zero-page, filemapped etc)
* and DMA cannot trigger the MMU to force them in (and would have time
* contraints making it impossible to wait for it anyway).
*
* Author: Bjorn Wesen
*
* $Log: kiobuftest.c,v $
* Revision 1.1.1.1 2001/12/17 13:59:27 bjornw
* Import of Linux 2.5.1
*
* Revision 1.2 2001/02/27 13:52:50 bjornw
* malloc.h -> slab.h
*
* Revision 1.1 2001/01/19 15:57:49 bjornw
* Example of how to do direct HW -> user-mode DMA
*
*
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/iobuf.h>
#define KIOBUFTEST_MAJOR 124 /* in the local range, experimental */
static ssize_t
kiobuf_read(struct file *filp, char *buf, size_t len, loff_t *ppos)
{
struct kiobuf *iobuf;
int res, i;
/* Make a kiobuf that maps the entire length the reader has given
* us
*/
res = alloc_kiovec(1, &iobuf);
if (res)
return res;
if((res = map_user_kiobuf(READ, iobuf, (unsigned long)buf, len))) {
printk("map_user_kiobuf failed, return %d\n", res);
return res;
}
/* At this point, the virtual area buf[0] -> buf[len-1] will
* have corresponding pages mapped in physical memory and locked
* until we unmap the kiobuf. They cannot be swapped out or moved
* around.
*/
printk("nr_pages == %d\noffset == %d\nlength == %d\n",
iobuf->nr_pages, iobuf->offset, iobuf->length);
for(i = 0; i < iobuf->nr_pages; i++) {
printk("page_add(maplist[%d]) == 0x%x\n", i,
page_address(iobuf->maplist[i]));
}
/* This is the place to create the necessary scatter-gather vector
* for the DMA using the iobuf->maplist array and page_address
* (don't forget __pa if the DMA needs the actual physical DRAM address)
* and run it.
*/
/* Release the mapping and exit */
unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */
return len;
}
static struct file_operations kiobuf_fops = {
owner: THIS_MODULE,
read: kiobuf_read
};
static int __init
kiobuftest_init(void)
{
int res;
/* register char device */
res = register_chrdev(KIOBUFTEST_MAJOR, "kiobuftest", &kiobuf_fops);
if(res < 0) {
printk(KERN_ERR "kiobuftest: couldn't get a major number.\n");
return res;
}
printk("Initializing kiobuf-test device\n");
}
module_init(kiobuftest_init);
...@@ -70,6 +70,7 @@ static void __init allocate_pgdat(int nid) ...@@ -70,6 +70,7 @@ static void __init allocate_pgdat(int nid)
node_datasz = PFN_UP(sizeof(struct pglist_data)); node_datasz = PFN_UP(sizeof(struct pglist_data));
NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
min_low_pfn += node_datasz; min_low_pfn += node_datasz;
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
} }
/* /*
......
...@@ -49,7 +49,6 @@ fi ...@@ -49,7 +49,6 @@ fi
dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
if [ "$CONFIG_X86" = "y" -o "$CONFIG_PPC32" = "y" ]; then if [ "$CONFIG_X86" = "y" -o "$CONFIG_PPC32" = "y" ]; then
# bool 'Support for Large Block Devices' CONFIG_LBD bool 'Support for Large Block Devices' CONFIG_LBD
define_bool CONFIG_LBD y
fi fi
endmenu endmenu
...@@ -1878,7 +1878,7 @@ void generic_make_request(struct bio *bio) ...@@ -1878,7 +1878,7 @@ void generic_make_request(struct bio *bio)
*/ */
int submit_bio(int rw, struct bio *bio) int submit_bio(int rw, struct bio *bio)
{ {
int count = bio_sectors(bio) >> 1; int count = bio_sectors(bio);
BUG_ON(!bio->bi_end_io); BUG_ON(!bio->bi_end_io);
BIO_BUG_ON(!bio->bi_size); BIO_BUG_ON(!bio->bi_size);
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/iobuf.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <asm/page.h> #include <asm/page.h>
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include <linux/i2c.h> #include <linux/i2c.h>
#include <linux/i2c-algo-bit.h> #include <linux/i2c-algo-bit.h>
#include <linux/videodev.h> #include <linux/videodev.h>
#include <linux/iobuf.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <asm/scatterlist.h> #include <asm/scatterlist.h>
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/iobuf.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
...@@ -65,32 +65,31 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages) ...@@ -65,32 +65,31 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages)
return NULL; return NULL;
} }
struct scatterlist* struct scatterlist *
videobuf_iobuf_to_sg(struct kiobuf *iobuf) videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
{ {
struct scatterlist *sglist; struct scatterlist *sglist;
int i = 0; int i = 0;
sglist = kmalloc(sizeof(struct scatterlist) * iobuf->nr_pages, if (NULL == pages[0])
GFP_KERNEL); return NULL;
sglist = kmalloc(sizeof(*sglist) * nr_pages, GFP_KERNEL);
if (NULL == sglist) if (NULL == sglist)
return NULL; return NULL;
memset(sglist,0,sizeof(struct scatterlist) * iobuf->nr_pages); memset(sglist, 0, sizeof(*sglist) * nr_pages);
if (NULL == iobuf->maplist[0]) if (PageHighMem(pages[0]))
goto err;
if (PageHighMem(iobuf->maplist[0]))
/* DMA to highmem pages might not work */ /* DMA to highmem pages might not work */
goto err; goto err;
sglist[0].page = iobuf->maplist[0]; sglist[0].page = pages[0];
sglist[0].offset = iobuf->offset; sglist[0].offset = offset;
sglist[0].length = PAGE_SIZE - iobuf->offset; sglist[0].length = PAGE_SIZE - offset;
for (i = 1; i < iobuf->nr_pages; i++) { for (i = 1; i < nr_pages; i++) {
if (NULL == iobuf->maplist[i]) if (NULL == pages[i])
goto err; goto err;
if (PageHighMem(iobuf->maplist[i])) if (PageHighMem(pages[i]))
goto err; goto err;
sglist[i].page = iobuf->maplist[i]; sglist[i].page = pages[i];
sglist[i].length = PAGE_SIZE; sglist[i].length = PAGE_SIZE;
} }
return sglist; return sglist;
...@@ -100,6 +99,30 @@ videobuf_iobuf_to_sg(struct kiobuf *iobuf) ...@@ -100,6 +99,30 @@ videobuf_iobuf_to_sg(struct kiobuf *iobuf)
return NULL; return NULL;
} }
int videobuf_lock(struct page **pages, int nr_pages)
{
int i;
for (i = 0; i < nr_pages; i++)
if (TestSetPageLocked(pages[i]))
goto err;
return 0;
err:
while (i > 0)
unlock_page(pages[--i]);
return -EINVAL;
}
int videobuf_unlock(struct page **pages, int nr_pages)
{
int i;
for (i = 0; i < nr_pages; i++)
unlock_page(pages[i]);
return 0;
}
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction,
...@@ -113,14 +136,21 @@ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction, ...@@ -113,14 +136,21 @@ int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction,
case PCI_DMA_TODEVICE: rw = WRITE; break; case PCI_DMA_TODEVICE: rw = WRITE; break;
default: BUG(); default: BUG();
} }
if (0 != (err = alloc_kiovec(1,&dma->iobuf)))
return err; dma->offset = data & PAGE_MASK;
if (0 != (err = map_user_kiobuf(rw, dma->iobuf, data, size))) { dma->nr_pages = ((((data+size) & ~PAGE_MASK) -
dprintk(1,"map_user_kiobuf: %d\n",err); (data & ~PAGE_MASK)) >> PAGE_SHIFT) +1;
return err; dma->pages = kmalloc(dma->nr_pages * sizeof(struct page*),
} GFP_KERNEL);
dma->nr_pages = dma->iobuf->nr_pages; if (NULL == dma->pages)
return 0; return -ENOMEM;
down_read(&current->mm->mmap_sem);
err = get_user_pages(current,current->mm,
data, dma->nr_pages,
rw == READ, 0, /* don't force */
dma->pages, NULL);
up_read(&current->mm->mmap_sem);
return err;
} }
int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction, int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction,
...@@ -144,13 +174,15 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) ...@@ -144,13 +174,15 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma)
if (0 == dma->nr_pages) if (0 == dma->nr_pages)
BUG(); BUG();
if (dma->iobuf) { if (dma->pages) {
if (0 != (err = lock_kiovec(1,&dma->iobuf,1))) { if (0 != (err = videobuf_lock(dma->pages, dma->nr_pages))) {
dprintk(1,"lock_kiovec: %d\n",err); dprintk(1,"videobuf_lock_pages: %d\n",err);
return err; return err;
} }
dma->sglist = videobuf_iobuf_to_sg(dma->iobuf); dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages,
dma->offset);
} }
if (dma->vmalloc) { if (dma->vmalloc) {
dma->sglist = videobuf_vmalloc_to_sg dma->sglist = videobuf_vmalloc_to_sg
(dma->vmalloc,dma->nr_pages); (dma->vmalloc,dma->nr_pages);
...@@ -160,7 +192,7 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma) ...@@ -160,7 +192,7 @@ int videobuf_dma_pci_map(struct pci_dev *dev, struct videobuf_dmabuf *dma)
return -ENOMEM; return -ENOMEM;
} }
dma->sglen = pci_map_sg(dev,dma->sglist,dma->nr_pages, dma->sglen = pci_map_sg(dev,dma->sglist,dma->nr_pages,
dma->direction); dma->direction);
return 0; return 0;
} }
...@@ -182,8 +214,8 @@ int videobuf_dma_pci_unmap(struct pci_dev *dev, struct videobuf_dmabuf *dma) ...@@ -182,8 +214,8 @@ int videobuf_dma_pci_unmap(struct pci_dev *dev, struct videobuf_dmabuf *dma)
kfree(dma->sglist); kfree(dma->sglist);
dma->sglist = NULL; dma->sglist = NULL;
dma->sglen = 0; dma->sglen = 0;
if (dma->iobuf) if (dma->pages)
unlock_kiovec(1,&dma->iobuf); videobuf_lock(dma->pages, dma->nr_pages);
return 0; return 0;
} }
...@@ -192,11 +224,14 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) ...@@ -192,11 +224,14 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma)
if (dma->sglen) if (dma->sglen)
BUG(); BUG();
if (dma->iobuf) { if (dma->pages) {
unmap_kiobuf(dma->iobuf); int i;
free_kiovec(1,&dma->iobuf); for (i=0; i < dma->nr_pages; i++)
dma->iobuf = NULL; page_cache_release(dma->pages[i]);
kfree(dma->pages);
dma->pages = NULL;
} }
if (dma->vmalloc) { if (dma->vmalloc) {
vfree(dma->vmalloc); vfree(dma->vmalloc);
dma->vmalloc = NULL; dma->vmalloc = NULL;
...@@ -959,6 +994,7 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, ...@@ -959,6 +994,7 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma,
map->q = q; map->q = q;
vma->vm_ops = &videobuf_vm_ops; vma->vm_ops = &videobuf_vm_ops;
vma->vm_flags |= VM_DONTEXPAND; vma->vm_flags |= VM_DONTEXPAND;
vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */
vma->vm_private_data = map; vma->vm_private_data = map;
dprintk(1,"mmap %p: %08lx-%08lx pgoff %08lx bufs %d-%d\n", dprintk(1,"mmap %p: %08lx-%08lx pgoff %08lx bufs %d-%d\n",
map,vma->vm_start,vma->vm_end,vma->vm_pgoff,first,last); map,vma->vm_start,vma->vm_end,vma->vm_pgoff,first,last);
...@@ -972,7 +1008,6 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma, ...@@ -972,7 +1008,6 @@ int videobuf_mmap_mapper(struct vm_area_struct *vma,
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
EXPORT_SYMBOL_GPL(videobuf_vmalloc_to_sg); EXPORT_SYMBOL_GPL(videobuf_vmalloc_to_sg);
EXPORT_SYMBOL_GPL(videobuf_iobuf_to_sg);
EXPORT_SYMBOL_GPL(videobuf_dma_init_user); EXPORT_SYMBOL_GPL(videobuf_dma_init_user);
EXPORT_SYMBOL_GPL(videobuf_dma_init_kernel); EXPORT_SYMBOL_GPL(videobuf_dma_init_kernel);
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
struct scatterlist* videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages); struct scatterlist* videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages);
/* /*
* Return a scatterlist for a locked iobuf (NULL on errors). Memory * Return a scatterlist for a an array of userpages (NULL on errors). Memory
* for the scatterlist is allocated using kmalloc. The caller must * for the scatterlist is allocated using kmalloc. The caller must
* free the memory. * free the memory.
*/ */
struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); struct scatterlist *videobuf_pages_to_sg(struct page **pages, int nr_pages,
int offset);
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
...@@ -57,7 +58,8 @@ struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf); ...@@ -57,7 +58,8 @@ struct scatterlist* videobuf_iobuf_to_sg(struct kiobuf *iobuf);
struct videobuf_dmabuf { struct videobuf_dmabuf {
/* for userland buffer */ /* for userland buffer */
struct kiobuf *iobuf; struct page **pages;
int offset;
/* for kernel buffers */ /* for kernel buffers */
void *vmalloc; void *vmalloc;
......
...@@ -11,7 +11,7 @@ export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \ ...@@ -11,7 +11,7 @@ export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \
obj-y := open.o read_write.o devices.o file_table.o buffer.o \ obj-y := open.o read_write.o devices.o file_table.o buffer.o \
bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \
filesystems.o namespace.o seq_file.o xattr.o libfs.o \ filesystems.o namespace.o seq_file.o xattr.o libfs.o \
fs-writeback.o mpage.o direct-io.o aio.o fs-writeback.o mpage.o direct-io.o aio.o
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
* See ../COPYING for licensing terms. * See ../COPYING for licensing terms.
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/aio_abi.h> #include <linux/aio_abi.h>
...@@ -21,15 +22,9 @@ ...@@ -21,15 +22,9 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/vmalloc.h>
#include <linux/iobuf.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/brlock.h>
#include <linux/aio.h> #include <linux/aio.h>
#include <linux/smp_lock.h>
#include <linux/compiler.h>
#include <linux/brlock.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/blk.h> #include <linux/blk.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/iobuf.h> #include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mempool.h> #include <linux/mempool.h>
...@@ -438,128 +438,6 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, ...@@ -438,128 +438,6 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
return 0; return 0;
} }
static int bio_end_io_kio(struct bio *bio, unsigned int bytes_done, int error)
{
struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
if (bio->bi_size)
return 1;
end_kio_request(kio, error);
bio_put(bio);
return 0;
}
/**
* ll_rw_kio - submit a &struct kiobuf for I/O
* @rw: %READ or %WRITE
* @kio: the kiobuf to do I/O on
* @bdev: target device
* @sector: start location on disk
*
* Description:
* ll_rw_kio will map the page list inside the &struct kiobuf to
* &struct bio and queue them for I/O. The kiobuf given must describe
* a continous range of data, and must be fully prepared for I/O.
**/
void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector)
{
int i, offset, size, err, map_i, total_nr_pages, nr_pages;
struct bio *bio;
err = 0;
if ((rw & WRITE) && bdev_read_only(bdev)) {
printk("ll_rw_bio: WRITE to ro device %s\n", bdevname(bdev));
err = -EPERM;
goto out;
}
if (!kio->nr_pages) {
err = -EINVAL;
goto out;
}
/*
* maybe kio is bigger than the max we can easily map into a bio.
* if so, split it up in appropriately sized chunks.
*/
total_nr_pages = kio->nr_pages;
offset = kio->offset & ~PAGE_MASK;
size = kio->length;
atomic_set(&kio->io_count, 1);
map_i = 0;
next_chunk:
nr_pages = BIO_MAX_PAGES;
if (nr_pages > total_nr_pages)
nr_pages = total_nr_pages;
atomic_inc(&kio->io_count);
/*
* allocate bio and do initial setup
*/
if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) {
err = -ENOMEM;
goto out;
}
bio->bi_sector = sector;
bio->bi_bdev = bdev;
bio->bi_idx = 0;
bio->bi_end_io = bio_end_io_kio;
bio->bi_private = kio;
for (i = 0; i < nr_pages; i++, map_i++) {
int nbytes = PAGE_SIZE - offset;
if (nbytes > size)
nbytes = size;
BUG_ON(kio->maplist[map_i] == NULL);
/*
* if we can't add this page to the bio, submit for i/o
* and alloc a new one if needed
*/
if (bio_add_page(bio, kio->maplist[map_i], nbytes, offset))
break;
/*
* kiobuf only has an offset into the first page
*/
offset = 0;
sector += nbytes >> 9;
size -= nbytes;
total_nr_pages--;
kio->offset += nbytes;
}
submit_bio(rw, bio);
if (total_nr_pages)
goto next_chunk;
if (size) {
printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length);
BUG();
}
out:
if (err)
kio->errno = err;
/*
* final atomic_dec of io_count to match our initial setting of 1.
* I/O may or may not have completed at this point, final completion
* handler is only run on last decrement.
*/
end_kio_request(kio, !err);
}
/** /**
* bio_endio - end I/O on a bio * bio_endio - end I/O on a bio
* @bio: bio * @bio: bio
...@@ -662,7 +540,6 @@ module_init(init_bio); ...@@ -662,7 +540,6 @@ module_init(init_bio);
EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_alloc);
EXPORT_SYMBOL(bio_put); EXPORT_SYMBOL(bio_put);
EXPORT_SYMBOL(ll_rw_kio);
EXPORT_SYMBOL(bio_endio); EXPORT_SYMBOL(bio_endio);
EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(bio_init);
EXPORT_SYMBOL(bio_copy); EXPORT_SYMBOL(bio_copy);
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <linux/major.h> #include <linux/major.h>
#include <linux/devfs_fs_kernel.h> #include <linux/devfs_fs_kernel.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/iobuf.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/module.h> #include <linux/module.h>
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/iobuf.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/writeback.h> #include <linux/writeback.h>
...@@ -2300,65 +2299,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, ...@@ -2300,65 +2299,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
return tmp.b_blocknr; return tmp.b_blocknr;
} }
/*
* Start I/O on a physical range of kernel memory, defined by a vector
* of kiobuf structs (much like a user-space iovec list).
*
* The kiobuf must already be locked for IO. IO is submitted
* asynchronously: you need to check page->locked and page->uptodate.
*
* It is up to the caller to make sure that there are enough blocks
* passed in to completely map the iobufs to disk.
*/
int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
struct block_device *bdev, sector_t b[], int size)
{
int transferred;
int i;
int err;
struct kiobuf * iobuf;
if (!nr)
return 0;
/*
* First, do some alignment and validity checks
*/
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1)))
return -EINVAL;
if (!iobuf->nr_pages)
panic("brw_kiovec: iobuf not initialised");
}
/*
* OK to walk down the iovec doing page IO on each page we find.
*/
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
iobuf->errno = 0;
ll_rw_kio(rw, iobuf, bdev, b[i] * (size >> 9));
}
/*
* now they are all submitted, wait for completion
*/
transferred = 0;
err = 0;
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
kiobuf_wait_for_io(iobuf);
if (iobuf->errno && !err)
err = iobuf->errno;
if (!err)
transferred += iobuf->length;
}
return err ? err : transferred;
}
static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err) static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
{ {
struct buffer_head *bh = bio->bi_private; struct buffer_head *bh = bio->bi_private;
......
...@@ -328,7 +328,7 @@ static inline void prune_one_dentry(struct dentry * dentry) ...@@ -328,7 +328,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
* all the dentries are in use. * all the dentries are in use.
*/ */
void prune_dcache(int count) static void prune_dcache(int count)
{ {
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
for (; count ; count--) { for (; count ; count--) {
...@@ -572,25 +572,24 @@ void shrink_dcache_anon(struct list_head *head) ...@@ -572,25 +572,24 @@ void shrink_dcache_anon(struct list_head *head)
* This is called from kswapd when we think we need some * This is called from kswapd when we think we need some
* more memory. * more memory.
*/ */
int shrink_dcache_memory(int ratio, unsigned int gfp_mask) static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
{ {
int entries = dentry_stat.nr_dentry / ratio + 1; if (nr) {
/* /*
* Nasty deadlock avoidance. * Nasty deadlock avoidance.
* *
* ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> * ext2_new_block->getblk->GFP->shrink_dcache_memory->
* prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op-> * prune_dcache->prune_one_dentry->dput->dentry_iput->iput->
* put_inode->ext2_discard_prealloc->ext2_free_blocks->lock_super-> * inode->i_sb->s_op->put_inode->ext2_discard_prealloc->
* DEADLOCK. * ext2_free_blocks->lock_super->DEADLOCK.
* *
* We should make sure we don't hold the superblock lock over * We should make sure we don't hold the superblock lock over
* block allocations, but for now: * block allocations, but for now:
*/ */
if (!(gfp_mask & __GFP_FS)) if (gfp_mask & __GFP_FS)
return 0; prune_dcache(nr);
}
prune_dcache(entries); return dentry_stat.nr_dentry;
return entries;
} }
#define NAME_ALLOC_LEN(len) ((len+16) & ~15) #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
...@@ -1330,6 +1329,8 @@ static void __init dcache_init(unsigned long mempages) ...@@ -1330,6 +1329,8 @@ static void __init dcache_init(unsigned long mempages)
NULL, NULL); NULL, NULL);
if (!dentry_cache) if (!dentry_cache)
panic("Cannot create dentry cache"); panic("Cannot create dentry cache");
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
#if PAGE_SHIFT < 13 #if PAGE_SHIFT < 13
mempages >>= (13 - PAGE_SHIFT); mempages >>= (13 - PAGE_SHIFT);
...@@ -1375,9 +1376,6 @@ kmem_cache_t *names_cachep; ...@@ -1375,9 +1376,6 @@ kmem_cache_t *names_cachep;
/* SLAB cache for file structures */ /* SLAB cache for file structures */
kmem_cache_t *filp_cachep; kmem_cache_t *filp_cachep;
/* SLAB cache for dquot structures */
kmem_cache_t *dquot_cachep;
EXPORT_SYMBOL(d_genocide); EXPORT_SYMBOL(d_genocide);
extern void bdev_cache_init(void); extern void bdev_cache_init(void);
...@@ -1397,14 +1395,6 @@ void __init vfs_caches_init(unsigned long mempages) ...@@ -1397,14 +1395,6 @@ void __init vfs_caches_init(unsigned long mempages)
if(!filp_cachep) if(!filp_cachep)
panic("Cannot create filp SLAB cache"); panic("Cannot create filp SLAB cache");
#if defined (CONFIG_QUOTA)
dquot_cachep = kmem_cache_create("dquot",
sizeof(struct dquot), sizeof(unsigned long) * 4,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!dquot_cachep)
panic("Cannot create dquot SLAB cache");
#endif
dcache_init(mempages); dcache_init(mempages);
inode_init(mempages); inode_init(mempages);
files_init(mempages); files_init(mempages);
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/mm.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/string.h> #include <linux/string.h>
...@@ -481,14 +482,14 @@ static void prune_dqcache(int count) ...@@ -481,14 +482,14 @@ static void prune_dqcache(int count)
* more memory * more memory
*/ */
int shrink_dqcache_memory(int ratio, unsigned int gfp_mask) static int shrink_dqcache_memory(int nr, unsigned int gfp_mask)
{ {
int entries = dqstats.allocated_dquots / ratio + 1; if (nr) {
lock_kernel();
lock_kernel(); prune_dqcache(nr);
prune_dqcache(entries); unlock_kernel();
unlock_kernel(); }
return entries; return dqstats.allocated_dquots;
} }
/* /*
...@@ -1490,6 +1491,9 @@ static ctl_table sys_table[] = { ...@@ -1490,6 +1491,9 @@ static ctl_table sys_table[] = {
{}, {},
}; };
/* SLAB cache for dquot structures */
kmem_cache_t *dquot_cachep;
static int __init dquot_init(void) static int __init dquot_init(void)
{ {
int i; int i;
...@@ -1499,9 +1503,17 @@ static int __init dquot_init(void) ...@@ -1499,9 +1503,17 @@ static int __init dquot_init(void)
INIT_LIST_HEAD(dquot_hash + i); INIT_LIST_HEAD(dquot_hash + i);
printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__); printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__);
dquot_cachep = kmem_cache_create("dquot",
sizeof(struct dquot), sizeof(unsigned long) * 4,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!dquot_cachep)
panic("Cannot create dquot SLAB cache");
set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory);
return 0; return 0;
} }
__initcall(dquot_init); module_init(dquot_init);
EXPORT_SYMBOL(register_quota_format); EXPORT_SYMBOL(register_quota_format);
EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(unregister_quota_format);
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include <linux/dnotify.h> #include <linux/dnotify.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/iobuf.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/security.h> #include <linux/security.h>
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/iobuf.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/security.h> #include <linux/security.h>
......
...@@ -243,22 +243,25 @@ void clear_inode(struct inode *inode) ...@@ -243,22 +243,25 @@ void clear_inode(struct inode *inode)
* Dispose-list gets a local list with local inodes in it, so it doesn't * Dispose-list gets a local list with local inodes in it, so it doesn't
* need to worry about list corruption and SMP locks. * need to worry about list corruption and SMP locks.
*/ */
static void dispose_list(struct list_head * head) static void dispose_list(struct list_head *head)
{ {
struct list_head * inode_entry; int nr_disposed = 0;
struct inode * inode;
while (!list_empty(head)) {
struct inode *inode;
while ((inode_entry = head->next) != head) inode = list_entry(head->next, struct inode, i_list);
{ list_del(&inode->i_list);
list_del(inode_entry);
inode = list_entry(inode_entry, struct inode, i_list);
if (inode->i_data.nrpages) if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode); clear_inode(inode);
destroy_inode(inode); destroy_inode(inode);
inodes_stat.nr_inodes--; nr_disposed++;
} }
spin_lock(&inode_lock);
inodes_stat.nr_inodes -= nr_disposed;
spin_unlock(&inode_lock);
} }
/* /*
...@@ -377,7 +380,7 @@ int invalidate_device(kdev_t dev, int do_sync) ...@@ -377,7 +380,7 @@ int invalidate_device(kdev_t dev, int do_sync)
!inode_has_buffers(inode)) !inode_has_buffers(inode))
#define INODE(entry) (list_entry(entry, struct inode, i_list)) #define INODE(entry) (list_entry(entry, struct inode, i_list))
void prune_icache(int goal) static inline void prune_icache(int goal)
{ {
LIST_HEAD(list); LIST_HEAD(list);
struct list_head *entry, *freeable = &list; struct list_head *entry, *freeable = &list;
...@@ -417,23 +420,19 @@ void prune_icache(int goal) ...@@ -417,23 +420,19 @@ void prune_icache(int goal)
* This is called from kswapd when we think we need some * This is called from kswapd when we think we need some
* more memory. * more memory.
*/ */
int shrink_icache_memory(int ratio, unsigned int gfp_mask) static int shrink_icache_memory(int nr, unsigned int gfp_mask)
{ {
int entries = inodes_stat.nr_inodes / ratio + 1; if (nr) {
/* /*
* Nasty deadlock avoidance.. * Nasty deadlock avoidance. We may hold various FS locks,
* * and we don't want to recurse into the FS that called us
* We may hold various FS locks, and we don't * in clear_inode() and friends..
* want to recurse into the FS that called us */
* in clear_inode() and friends.. if (gfp_mask & __GFP_FS)
*/ prune_icache(nr);
if (!(gfp_mask & __GFP_FS)) }
return 0; return inodes_stat.nr_inodes;
prune_icache(entries);
return entries;
} }
EXPORT_SYMBOL(shrink_icache_memory);
/* /*
* Called with the inode lock held. * Called with the inode lock held.
...@@ -1226,4 +1225,6 @@ void __init inode_init(unsigned long mempages) ...@@ -1226,4 +1225,6 @@ void __init inode_init(unsigned long mempages)
NULL); NULL);
if (!inode_cachep) if (!inode_cachep)
panic("cannot create inode slab cache"); panic("cannot create inode slab cache");
set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
} }
/*
* iobuf.c
*
* Keep track of the general-purpose IO-buffer structures used to track
* abstract kernel-space io buffers.
*
*/
#include <linux/iobuf.h>
#include <linux/slab.h>
int end_kio_request(struct kiobuf *kiobuf, int uptodate)
{
int ret = 1;
if ((!uptodate) && !kiobuf->errno)
kiobuf->errno = -EIO;
if (atomic_dec_and_test(&kiobuf->io_count)) {
ret = 0;
if (kiobuf->end_io)
kiobuf->end_io(kiobuf);
wake_up(&kiobuf->wait_queue);
}
return ret;
}
static void kiobuf_init(struct kiobuf *iobuf)
{
init_waitqueue_head(&iobuf->wait_queue);
atomic_set(&iobuf->io_count, 0);
iobuf->array_len = KIO_STATIC_PAGES;
iobuf->maplist = iobuf->map_array;
iobuf->nr_pages = 0;
iobuf->locked = 0;
iobuf->io_count.counter = 0;
iobuf->end_io = NULL;
}
int alloc_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL);
if (!iobuf) {
free_kiovec(i, bufp);
return -ENOMEM;
}
kiobuf_init(iobuf);
bufp[i] = iobuf;
}
return 0;
}
void free_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
iobuf = bufp[i];
if (iobuf->locked)
unlock_kiovec(1, &iobuf);
if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
kfree(bufp[i]);
}
}
int expand_kiobuf(struct kiobuf *iobuf, int wanted)
{
struct page ** maplist;
if (iobuf->array_len >= wanted)
return 0;
maplist = (struct page **)
kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
if (!maplist)
return -ENOMEM;
/* Did it grow while we waited? */
if (iobuf->array_len >= wanted) {
kfree(maplist);
return 0;
}
memcpy (maplist, iobuf->maplist, iobuf->array_len * sizeof(struct page **));
if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
iobuf->maplist = maplist;
iobuf->array_len = wanted;
return 0;
}
void kiobuf_wait_for_io(struct kiobuf *kiobuf)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
if (atomic_read(&kiobuf->io_count) == 0)
return;
add_wait_queue(&kiobuf->wait_queue, &wait);
repeat:
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (atomic_read(&kiobuf->io_count) != 0) {
blk_run_queues();
schedule();
if (atomic_read(&kiobuf->io_count) != 0)
goto repeat;
}
tsk->state = TASK_RUNNING;
remove_wait_queue(&kiobuf->wait_queue, &wait);
}
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/tty.h> #include <linux/tty.h>
#include <linux/iobuf.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/security.h> #include <linux/security.h>
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/iobuf.h>
STATIC int delalloc_convert(struct inode *, struct page *, int, int); STATIC int delalloc_convert(struct inode *, struct page *, int, int);
......
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include <xfs_dfrag.h> #include <xfs_dfrag.h>
#include <linux/dcache.h> #include <linux/dcache.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/iobuf.h>
extern int xfs_change_file_space(bhv_desc_t *, int, extern int xfs_change_file_space(bhv_desc_t *, int,
...@@ -605,6 +604,7 @@ xfs_ioctl( ...@@ -605,6 +604,7 @@ xfs_ioctl(
* it is set to the file system block size to * it is set to the file system block size to
* avoid having to do block zeroing on short writes. * avoid having to do block zeroing on short writes.
*/ */
#define KIO_MAX_ATOMIC_IO 512 /* FIXME: what do we really want here? */
da.d_maxiosz = XFS_FSB_TO_B(mp, da.d_maxiosz = XFS_FSB_TO_B(mp,
XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10));
......
...@@ -32,7 +32,6 @@ enum bh_state_bits { ...@@ -32,7 +32,6 @@ enum bh_state_bits {
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
struct page; struct page;
struct kiobuf;
struct buffer_head; struct buffer_head;
struct address_space; struct address_space;
typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
......
...@@ -180,17 +180,6 @@ extern void shrink_dcache_parent(struct dentry *); ...@@ -180,17 +180,6 @@ extern void shrink_dcache_parent(struct dentry *);
extern void shrink_dcache_anon(struct list_head *); extern void shrink_dcache_anon(struct list_head *);
extern int d_invalidate(struct dentry *); extern int d_invalidate(struct dentry *);
/* dcache memory management */
extern int shrink_dcache_memory(int, unsigned int);
extern void prune_dcache(int);
/* icache memory management (defined in linux/fs/inode.c) */
extern int shrink_icache_memory(int, unsigned int);
extern void prune_icache(int);
/* quota cache memory management (defined in linux/fs/dquot.c) */
extern int shrink_dqcache_memory(int, unsigned int);
/* only used at mount-time */ /* only used at mount-time */
extern struct dentry * d_alloc_root(struct inode *); extern struct dentry * d_alloc_root(struct inode *);
......
/*
* iobuf.h
*
* Defines the structures used to track abstract kernel-space io buffers.
*
*/
#ifndef __LINUX_IOBUF_H
#define __LINUX_IOBUF_H
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <asm/atomic.h>
/*
* The kiobuf structure describes a physical set of pages reserved
* locked for IO. The reference counts on each page will have been
* incremented, and the flags field will indicate whether or not we have
* pre-locked all of the pages for IO.
*
* kiobufs may be passed in arrays to form a kiovec, but we must
* preserve the property that no page is present more than once over the
* entire iovec.
*/
#define KIO_MAX_ATOMIC_IO 512 /* in kb */
#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)
/* The main kiobuf struct */
struct kiobuf
{
int nr_pages; /* Pages actually referenced */
int array_len; /* Space in the allocated lists */
int offset; /* Offset to start of valid data */
int length; /* Number of valid bytes of data */
/* Keep separate track of the physical addresses and page
* structs involved. If we do IO to a memory-mapped device
* region, there won't necessarily be page structs defined for
* every address. */
struct page ** maplist;
unsigned int locked : 1; /* If set, pages has been locked */
/* Always embed enough struct pages for atomic IO */
struct page * map_array[KIO_STATIC_PAGES];
sector_t blocks[KIO_MAX_SECTORS];
/* Dynamic state for IO completion: */
atomic_t io_count; /* IOs still in progress */
int errno; /* Status of completed IO */
void (*end_io) (struct kiobuf *); /* Completion callback */
wait_queue_head_t wait_queue;
};
/* mm/memory.c */
int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
void unmap_kiobuf(struct kiobuf *iobuf);
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait);
int unlock_kiovec(int nr, struct kiobuf *iovec[]);
void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes);
/* fs/iobuf.c */
int end_kio_request(struct kiobuf *, int);
void simple_wakeup_kiobuf(struct kiobuf *);
int alloc_kiovec(int nr, struct kiobuf **);
void free_kiovec(int nr, struct kiobuf **);
int expand_kiobuf(struct kiobuf *, int);
void kiobuf_wait_for_io(struct kiobuf *);
extern int alloc_kiobuf_bhs(struct kiobuf *);
extern void free_kiobuf_bhs(struct kiobuf *);
/* fs/buffer.c */
int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
struct block_device *bdev, sector_t [], int size);
/* fs/bio.c */
void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t block);
#endif /* __LINUX_IOBUF_H */
...@@ -391,6 +391,29 @@ extern int free_hugepages(struct vm_area_struct *); ...@@ -391,6 +391,29 @@ extern int free_hugepages(struct vm_area_struct *);
#endif #endif
/*
* Prototype to add a shrinker callback for ageable caches.
*
* These functions are passed a count `nr_to_scan' and a gfpmask. They should
* scan `nr_to_scan' objects, attempting to free them.
*
* The callback must the number of objects which remain in the cache.
*
* The callback will be passes nr_to_scan == 0 when the VM is querying the
* cache size, so a fastpath for that case is appropriate.
*/
typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
/*
* Add an aging callback. The int is the number of 'seeks' it takes
* to recreate one of the objects that these functions age.
*/
#define DEFAULT_SEEKS 2
struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
/* /*
* If the mapping doesn't provide a set_page_dirty a_op, then * If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads. * just fall through and assume that it wants buffer_heads.
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/blk.h> #include <linux/blk.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/iobuf.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/tty.h> #include <linux/tty.h>
#include <linux/percpu.h> #include <linux/percpu.h>
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/iobuf.h>
#include <linux/console.h> #include <linux/console.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
...@@ -103,6 +102,8 @@ EXPORT_SYMBOL(kmem_cache_shrink); ...@@ -103,6 +102,8 @@ EXPORT_SYMBOL(kmem_cache_shrink);
EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_alloc);
EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_free);
EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(kmem_cache_size);
EXPORT_SYMBOL(set_shrinker);
EXPORT_SYMBOL(remove_shrinker);
EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kmalloc);
EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(kfree);
EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(vfree);
...@@ -246,7 +247,6 @@ EXPORT_SYMBOL(dput); ...@@ -246,7 +247,6 @@ EXPORT_SYMBOL(dput);
EXPORT_SYMBOL(have_submounts); EXPORT_SYMBOL(have_submounts);
EXPORT_SYMBOL(d_find_alias); EXPORT_SYMBOL(d_find_alias);
EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(prune_dcache);
EXPORT_SYMBOL(shrink_dcache_sb); EXPORT_SYMBOL(shrink_dcache_sb);
EXPORT_SYMBOL(shrink_dcache_parent); EXPORT_SYMBOL(shrink_dcache_parent);
EXPORT_SYMBOL(shrink_dcache_anon); EXPORT_SYMBOL(shrink_dcache_anon);
...@@ -438,18 +438,6 @@ EXPORT_SYMBOL(__br_write_lock); ...@@ -438,18 +438,6 @@ EXPORT_SYMBOL(__br_write_lock);
EXPORT_SYMBOL(__br_write_unlock); EXPORT_SYMBOL(__br_write_unlock);
#endif #endif
/* Kiobufs */
EXPORT_SYMBOL(alloc_kiovec);
EXPORT_SYMBOL(free_kiovec);
EXPORT_SYMBOL(expand_kiobuf);
EXPORT_SYMBOL(map_user_kiobuf);
EXPORT_SYMBOL(unmap_kiobuf);
EXPORT_SYMBOL(lock_kiovec);
EXPORT_SYMBOL(unlock_kiovec);
EXPORT_SYMBOL(brw_kiovec);
EXPORT_SYMBOL(kiobuf_wait_for_io);
#ifdef HAVE_DISABLE_HLT #ifdef HAVE_DISABLE_HLT
EXPORT_SYMBOL(disable_hlt); EXPORT_SYMBOL(disable_hlt);
EXPORT_SYMBOL(enable_hlt); EXPORT_SYMBOL(enable_hlt);
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/iobuf.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
......
...@@ -40,7 +40,6 @@ ...@@ -40,7 +40,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/iobuf.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/vcache.h> #include <linux/vcache.h>
...@@ -504,7 +503,7 @@ follow_page(struct mm_struct *mm, unsigned long address, int write) ...@@ -504,7 +503,7 @@ follow_page(struct mm_struct *mm, unsigned long address, int write)
/* /*
* Given a physical address, is there a useful struct page pointing to * Given a physical address, is there a useful struct page pointing to
* it? This may become more complex in the future if we start dealing * it? This may become more complex in the future if we start dealing
* with IO-aperture pages in kiobufs. * with IO-aperture pages for direct-IO.
*/ */
static inline struct page *get_page_map(struct page *page) static inline struct page *get_page_map(struct page *page)
...@@ -589,224 +588,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -589,224 +588,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
return i; return i;
} }
/*
* Force in an entire range of pages from the current process's user VA,
* and pin them in physical memory.
*/
#define dprintk(x...)
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
{
int pgcount, err;
struct mm_struct * mm;
/* Make sure the iobuf is not already mapped somewhere. */
if (iobuf->nr_pages)
return -EINVAL;
mm = current->mm;
dprintk ("map_user_kiobuf: begin\n");
pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE;
/* mapping 0 bytes is not permitted */
if (!pgcount) BUG();
err = expand_kiobuf(iobuf, pgcount);
if (err)
return err;
iobuf->locked = 0;
iobuf->offset = va & (PAGE_SIZE-1);
iobuf->length = len;
/* Try to fault in all of the necessary pages */
down_read(&mm->mmap_sem);
/* rw==READ means read from disk, write into memory area */
err = get_user_pages(current, mm, va, pgcount,
(rw==READ), 0, iobuf->maplist, NULL);
up_read(&mm->mmap_sem);
if (err < 0) {
unmap_kiobuf(iobuf);
dprintk ("map_user_kiobuf: end %d\n", err);
return err;
}
iobuf->nr_pages = err;
while (pgcount--) {
/* FIXME: flush superflous for rw==READ,
* probably wrong function for rw==WRITE
*/
flush_dcache_page(iobuf->maplist[pgcount]);
}
dprintk ("map_user_kiobuf: end OK\n");
return 0;
}
/*
* Mark all of the pages in a kiobuf as dirty
*
* We need to be able to deal with short reads from disk: if an IO error
* occurs, the number of bytes read into memory may be less than the
* size of the kiobuf, so we have to stop marking pages dirty once the
* requested byte count has been reached.
*/
void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
{
int index, offset, remaining;
struct page *page;
index = iobuf->offset >> PAGE_SHIFT;
offset = iobuf->offset & ~PAGE_MASK;
remaining = bytes;
if (remaining > iobuf->length)
remaining = iobuf->length;
while (remaining > 0 && index < iobuf->nr_pages) {
page = iobuf->maplist[index];
if (!PageReserved(page))
set_page_dirty(page);
remaining -= (PAGE_SIZE - offset);
offset = 0;
index++;
}
}
/*
* Unmap all of the pages referenced by a kiobuf. We release the pages,
* and unlock them if they were locked.
*/
void unmap_kiobuf (struct kiobuf *iobuf)
{
int i;
struct page *map;
for (i = 0; i < iobuf->nr_pages; i++) {
map = iobuf->maplist[i];
if (map) {
if (iobuf->locked)
unlock_page(map);
/* FIXME: cache flush missing for rw==READ
* FIXME: call the correct reference counting function
*/
page_cache_release(map);
}
}
iobuf->nr_pages = 0;
iobuf->locked = 0;
}
/*
* Lock down all of the pages of a kiovec for IO.
*
* If any page is mapped twice in the kiovec, we return the error -EINVAL.
*
* The optional wait parameter causes the lock call to block until all
* pages can be locked if set. If wait==0, the lock operation is
* aborted if any locked pages are found and -EAGAIN is returned.
*/
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
{
struct kiobuf *iobuf;
int i, j;
struct page *page, **ppage;
int doublepage = 0;
int repeat = 0;
repeat:
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
if (iobuf->locked)
continue;
ppage = iobuf->maplist;
for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
page = *ppage;
if (!page)
continue;
if (TestSetPageLocked(page)) {
while (j--) {
struct page *tmp = *--ppage;
if (tmp)
unlock_page(tmp);
}
goto retry;
}
}
iobuf->locked = 1;
}
return 0;
retry:
/*
* We couldn't lock one of the pages. Undo the locking so far,
* wait on the page we got to, and try again.
*/
unlock_kiovec(nr, iovec);
if (!wait)
return -EAGAIN;
/*
* Did the release also unlock the page we got stuck on?
*/
if (!PageLocked(page)) {
/*
* If so, we may well have the page mapped twice
* in the IO address range. Bad news. Of
* course, it _might_ just be a coincidence,
* but if it happens more than once, chances
* are we have a double-mapped page.
*/
if (++doublepage >= 3)
return -EINVAL;
/* Try again... */
wait_on_page_locked(page);
}
if (++repeat < 16)
goto repeat;
return -EAGAIN;
}
/*
* Unlock all of the pages of a kiovec after IO.
*/
int unlock_kiovec(int nr, struct kiobuf *iovec[])
{
struct kiobuf *iobuf;
int i, j;
struct page *page, **ppage;
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
if (!iobuf->locked)
continue;
iobuf->locked = 0;
ppage = iobuf->maplist;
for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
page = *ppage;
if (!page)
continue;
unlock_page(page);
}
}
return 0;
}
static inline void zeromap_pte_range(pte_t * pte, unsigned long address, static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
unsigned long size, pgprot_t prot) unsigned long size, pgprot_t prot)
{ {
......
...@@ -137,6 +137,9 @@ static int msync_interval(struct vm_area_struct * vma, ...@@ -137,6 +137,9 @@ static int msync_interval(struct vm_area_struct * vma,
int ret = 0; int ret = 0;
struct file * file = vma->vm_file; struct file * file = vma->vm_file;
if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
return -EBUSY;
if (file && (vma->vm_flags & VM_SHARED)) { if (file && (vma->vm_flags & VM_SHARED)) {
ret = filemap_sync(vma, start, end-start, flags); ret = filemap_sync(vma, start, end-start, flags);
...@@ -173,6 +176,8 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) ...@@ -173,6 +176,8 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
goto out; goto out;
if (start & ~PAGE_MASK) if (start & ~PAGE_MASK)
goto out; goto out;
if ((flags & MS_ASYNC) && (flags & MS_SYNC))
goto out;
error = -ENOMEM; error = -ENOMEM;
len = (len + ~PAGE_MASK) & PAGE_MASK; len = (len + ~PAGE_MASK) & PAGE_MASK;
end = start + len; end = start + len;
......
...@@ -1147,6 +1147,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) ...@@ -1147,6 +1147,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
if (!ps) if (!ps)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
get_full_page_state(ps); get_full_page_state(ps);
ps->pgpgin /= 2; /* sectors -> kbytes */
ps->pgpgout /= 2;
return (unsigned long *)ps + *pos; return (unsigned long *)ps + *pos;
} }
......
...@@ -77,9 +77,94 @@ static long total_memory; ...@@ -77,9 +77,94 @@ static long total_memory;
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif #endif
#ifndef CONFIG_QUOTA /*
#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0) * The list of shrinker callbacks used by to apply pressure to
#endif * ageable caches.
*/
struct shrinker {
shrinker_t shrinker;
struct list_head list;
int seeks; /* seeks to recreate an obj */
int nr; /* objs pending delete */
};
static LIST_HEAD(shrinker_list);
static DECLARE_MUTEX(shrinker_sem);
/*
* Add a shrinker callback to be called from the vm
*/
struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker)
{
struct shrinker *shrinker;
shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL);
if (shrinker) {
shrinker->shrinker = theshrinker;
shrinker->seeks = seeks;
shrinker->nr = 0;
down(&shrinker_sem);
list_add(&shrinker->list, &shrinker_list);
up(&shrinker_sem);
}
return shrinker;
}
/*
* Remove one
*/
void remove_shrinker(struct shrinker *shrinker)
{
down(&shrinker_sem);
list_del(&shrinker->list);
up(&shrinker_sem);
kfree(shrinker);
}
#define SHRINK_BATCH 32
/*
* Call the shrink functions to age shrinkable caches
*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* If the vm encounted mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* FIXME: do not do for zone highmem
*/
static int shrink_slab(int scanned, unsigned int gfp_mask)
{
struct list_head *lh;
int pages;
if (down_trylock(&shrinker_sem))
return 0;
pages = nr_used_zone_pages();
list_for_each(lh, &shrinker_list) {
struct shrinker *shrinker;
int entries;
unsigned long delta;
shrinker = list_entry(lh, struct shrinker, list);
entries = (*shrinker->shrinker)(0, gfp_mask);
if (!entries)
continue;
delta = scanned * shrinker->seeks * entries;
shrinker->nr += delta / (pages + 1);
if (shrinker->nr > SHRINK_BATCH) {
int nr = shrinker->nr;
shrinker->nr = 0;
(*shrinker->shrinker)(nr, gfp_mask);
}
}
up(&shrinker_sem);
return 0;
}
/* Must be called with page's pte_chain_lock held. */ /* Must be called with page's pte_chain_lock held. */
static inline int page_mapping_inuse(struct page * page) static inline int page_mapping_inuse(struct page * page)
...@@ -626,32 +711,6 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, ...@@ -626,32 +711,6 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
max_scan, nr_mapped); max_scan, nr_mapped);
} }
/*
* FIXME: don't do this for ZONE_HIGHMEM
*/
/*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* NOTE: for now I do this for all zones. If we find this is too aggressive
* on large boxes we may want to exclude ZONE_HIGHMEM.
*
* If we're encountering mapped pages on the LRU then increase the pressure on
* slab to avoid swapping.
*/
static void shrink_slab(int total_scanned, int gfp_mask)
{
int shrink_ratio;
int pages = nr_used_zone_pages();
shrink_ratio = (pages / (total_scanned + 1)) + 1;
shrink_dcache_memory(shrink_ratio, gfp_mask);
shrink_icache_memory(shrink_ratio, gfp_mask);
shrink_dqcache_memory(shrink_ratio, gfp_mask);
}
/* /*
* This is the direct reclaim path, for page-allocating processes. We only * This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation * try to reclaim pages from zones which will satisfy the caller's allocation
...@@ -695,7 +754,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned, ...@@ -695,7 +754,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
} }
return ret; return ret;
} }
/* /*
* This is the main entry point to direct page reclaim. * This is the main entry point to direct page reclaim.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment