Commit a0ee5ec5 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

tmpfs: allocate on read when stacked

tmpfs is expected to limit the memory used (unless mounted with nr_blocks=0 or
size=0).  But if a stacked filesystem such as unionfs gets pages from a sparse
tmpfs file by reading holes, and then writes to them, it can easily exceed any
such limit at present.

So suppress the SGP_READ "don't allocate page" ZERO_PAGE optimization when
reading for the kernel (a KERNEL_DS check, ugh, sorry about that).  Indeed,
pessimistically mark such pages as dirty, so they cannot get reclaimed and
unaccounted by mistake.  The venerable shmem_recalc_inode code (originally to
account for the reclaim of clean pages) suffices to get the accounting right
when swappages are dropped in favour of more uptodate filepages.

This also fixes the NULL shmem_swp_entry BUG or oops in shmem_writepage,
caused by unionfs writing to a very sparse tmpfs file: to minimize memory
allocation in swapout, tmpfs requires the swap vector be allocated upfront,
which wasn't always happening in this stacked case.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d9fe526a
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
enum sgp_type { enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
SGP_WRITE, /* may exceed i_size, may allocate page */ SGP_WRITE, /* may exceed i_size, may allocate page */
}; };
...@@ -1333,6 +1334,8 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1333,6 +1334,8 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
clear_highpage(filepage); clear_highpage(filepage);
flush_dcache_page(filepage); flush_dcache_page(filepage);
SetPageUptodate(filepage); SetPageUptodate(filepage);
if (sgp == SGP_DIRTY)
set_page_dirty(filepage);
} }
done: done:
*pagep = filepage; *pagep = filepage;
...@@ -1518,6 +1521,15 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1518,6 +1521,15 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
struct inode *inode = filp->f_path.dentry->d_inode; struct inode *inode = filp->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
unsigned long index, offset; unsigned long index, offset;
enum sgp_type sgp = SGP_READ;
/*
* Might this read be for a stacking filesystem? Then when reading
* holes of a sparse file, we actually need to allocate those pages,
* and even mark them dirty, so it cannot exceed the max_blocks limit.
*/
if (segment_eq(get_fs(), KERNEL_DS))
sgp = SGP_DIRTY;
index = *ppos >> PAGE_CACHE_SHIFT; index = *ppos >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
...@@ -1536,7 +1548,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1536,7 +1548,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
break; break;
} }
desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
if (desc->error) { if (desc->error) {
if (desc->error == -EINVAL) if (desc->error == -EINVAL)
desc->error = 0; desc->error = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment