Commit 9e18eb29 authored by Andres Lagar-Cavilla's avatar Andres Lagar-Cavilla Committed by Linus Torvalds

tmpfs: mem_cgroup charge fault to vm_mm not current mm

Although shmem_fault() has been careful to count a major fault to vm_mm,
shmem_getpage_gfp() has been careless in charging a remote access fault
to current->mm owner's memcg instead of to vma->vm_mm owner's memcg:
that is inconsistent with all the mem_cgroup charging on remote access
faults in mm/memory.c.

Fix it by passing fault_mm along with fault_type to
shmem_get_page_gfp(); but in that case, now knowing the right mm, it's
better for it to handle the PGMAJFAULT updates itself.

And let's keep this clutter out of most callers' way: change the common
shmem_getpage() wrapper to hide fault_mm and fault_type as well as gfp.
Signed-off-by: default avatarAndres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 75edd345
...@@ -121,13 +121,14 @@ static bool shmem_should_replace_page(struct page *page, gfp_t gfp); ...@@ -121,13 +121,14 @@ static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
static int shmem_replace_page(struct page **pagep, gfp_t gfp, static int shmem_replace_page(struct page **pagep, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index); struct shmem_inode_info *info, pgoff_t index);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
static inline int shmem_getpage(struct inode *inode, pgoff_t index, static inline int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, int *fault_type) struct page **pagep, enum sgp_type sgp)
{ {
return shmem_getpage_gfp(inode, index, pagep, sgp, return shmem_getpage_gfp(inode, index, pagep, sgp,
mapping_gfp_mask(inode->i_mapping), fault_type); mapping_gfp_mask(inode->i_mapping), NULL, NULL);
} }
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
...@@ -527,7 +528,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, ...@@ -527,7 +528,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
if (partial_start) { if (partial_start) {
struct page *page = NULL; struct page *page = NULL;
shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); shmem_getpage(inode, start - 1, &page, SGP_READ);
if (page) { if (page) {
unsigned int top = PAGE_SIZE; unsigned int top = PAGE_SIZE;
if (start > end) { if (start > end) {
...@@ -542,7 +543,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, ...@@ -542,7 +543,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
} }
if (partial_end) { if (partial_end) {
struct page *page = NULL; struct page *page = NULL;
shmem_getpage(inode, end, &page, SGP_READ, NULL); shmem_getpage(inode, end, &page, SGP_READ);
if (page) { if (page) {
zero_user_segment(page, 0, partial_end); zero_user_segment(page, 0, partial_end);
set_page_dirty(page); set_page_dirty(page);
...@@ -1115,14 +1116,19 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, ...@@ -1115,14 +1116,19 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
* *
* If we allocate a new one we do not mark it dirty. That's up to the * If we allocate a new one we do not mark it dirty. That's up to the
* vm. If we swap it in we mark it dirty since we also free the swap * vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache * entry since a page cannot live in both the swap and page cache.
*
* fault_mm and fault_type are only supplied by shmem_fault:
* otherwise they are NULL.
*/ */
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) struct page **pagep, enum sgp_type sgp, gfp_t gfp,
struct mm_struct *fault_mm, int *fault_type)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info; struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo; struct shmem_sb_info *sbinfo;
struct mm_struct *charge_mm;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct page *page; struct page *page;
swp_entry_t swap; swp_entry_t swap;
...@@ -1168,14 +1174,19 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1168,14 +1174,19 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
*/ */
info = SHMEM_I(inode); info = SHMEM_I(inode);
sbinfo = SHMEM_SB(inode->i_sb); sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = fault_mm ? : current->mm;
if (swap.val) { if (swap.val) {
/* Look it up and read it in.. */ /* Look it up and read it in.. */
page = lookup_swap_cache(swap); page = lookup_swap_cache(swap);
if (!page) { if (!page) {
/* here we actually do the io */ /* Or update major stats only when swapin succeeds?? */
if (fault_type) if (fault_type) {
*fault_type |= VM_FAULT_MAJOR; *fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index); page = shmem_swapin(swap, gfp, info, index);
if (!page) { if (!page) {
error = -ENOMEM; error = -ENOMEM;
...@@ -1202,7 +1213,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1202,7 +1213,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
goto failed; goto failed;
} }
error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
false); false);
if (!error) { if (!error) {
error = shmem_add_to_page_cache(page, mapping, index, error = shmem_add_to_page_cache(page, mapping, index,
...@@ -1263,7 +1274,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1263,7 +1274,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
if (sgp == SGP_WRITE) if (sgp == SGP_WRITE)
__SetPageReferenced(page); __SetPageReferenced(page);
error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
false); false);
if (error) if (error)
goto decused; goto decused;
...@@ -1352,6 +1363,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1352,6 +1363,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
int error; int error;
int ret = VM_FAULT_LOCKED; int ret = VM_FAULT_LOCKED;
...@@ -1413,14 +1425,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1413,14 +1425,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
gfp, vma->vm_mm, &ret);
if (error) if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
if (ret & VM_FAULT_MAJOR) {
count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
}
return ret; return ret;
} }
...@@ -1567,7 +1575,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, ...@@ -1567,7 +1575,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
return -EPERM; return -EPERM;
} }
return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); return shmem_getpage(inode, index, pagep, SGP_WRITE);
} }
static int static int
...@@ -1633,7 +1641,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -1633,7 +1641,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
break; break;
} }
error = shmem_getpage(inode, index, &page, sgp, NULL); error = shmem_getpage(inode, index, &page, sgp);
if (error) { if (error) {
if (error == -EINVAL) if (error == -EINVAL)
error = 0; error = 0;
...@@ -1749,7 +1757,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, ...@@ -1749,7 +1757,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
error = 0; error = 0;
while (spd.nr_pages < nr_pages) { while (spd.nr_pages < nr_pages) {
error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); error = shmem_getpage(inode, index, &page, SGP_CACHE);
if (error) if (error)
break; break;
unlock_page(page); unlock_page(page);
...@@ -1771,8 +1779,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, ...@@ -1771,8 +1779,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
page = spd.pages[page_nr]; page = spd.pages[page_nr];
if (!PageUptodate(page) || page->mapping != mapping) { if (!PageUptodate(page) || page->mapping != mapping) {
error = shmem_getpage(inode, index, &page, error = shmem_getpage(inode, index, &page, SGP_CACHE);
SGP_CACHE, NULL);
if (error) if (error)
break; break;
unlock_page(page); unlock_page(page);
...@@ -2215,8 +2222,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -2215,8 +2222,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM; error = -ENOMEM;
else else
error = shmem_getpage(inode, index, &page, SGP_FALLOC, error = shmem_getpage(inode, index, &page, SGP_FALLOC);
NULL);
if (error) { if (error) {
/* Remove the !PageUptodate pages we added */ /* Remove the !PageUptodate pages we added */
shmem_undo_range(inode, shmem_undo_range(inode,
...@@ -2534,7 +2540,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s ...@@ -2534,7 +2540,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
inode->i_op = &shmem_short_symlink_operations; inode->i_op = &shmem_short_symlink_operations;
} else { } else {
inode_nohighmem(inode); inode_nohighmem(inode);
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); error = shmem_getpage(inode, 0, &page, SGP_WRITE);
if (error) { if (error) {
iput(inode); iput(inode);
return error; return error;
...@@ -2575,7 +2581,7 @@ static const char *shmem_get_link(struct dentry *dentry, ...@@ -2575,7 +2581,7 @@ static const char *shmem_get_link(struct dentry *dentry,
return ERR_PTR(-ECHILD); return ERR_PTR(-ECHILD);
} }
} else { } else {
error = shmem_getpage(inode, 0, &page, SGP_READ, NULL); error = shmem_getpage(inode, 0, &page, SGP_READ);
if (error) if (error)
return ERR_PTR(error); return ERR_PTR(error);
unlock_page(page); unlock_page(page);
...@@ -3479,7 +3485,8 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, ...@@ -3479,7 +3485,8 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
int error; int error;
BUG_ON(mapping->a_ops != &shmem_aops); BUG_ON(mapping->a_ops != &shmem_aops);
error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL); error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
gfp, NULL, NULL);
if (error) if (error)
page = ERR_PTR(error); page = ERR_PTR(error);
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment