Commit e6c509f8 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: use clear_page_mlock() in page_remove_rmap()

We had thought that pages could no longer get freed while still marked as
mlocked; but Johannes Weiner posted this program to demonstrate that
truncating an mlocked private file mapping containing COWed pages is still
mishandled:

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>

int main(void)
{
	char *map;
	int fd;

	system("grep mlockfreed /proc/vmstat");
	fd = open("chigurh", O_CREAT|O_EXCL|O_RDWR);
	unlink("chigurh");
	ftruncate(fd, 4096);
	map = mmap(NULL, 4096, PROT_WRITE, MAP_PRIVATE, fd, 0);
	map[0] = 11;
	mlock(map, sizeof(fd));
	ftruncate(fd, 0);
	close(fd);
	munlock(map, sizeof(fd));
	munmap(map, 4096);
	system("grep mlockfreed /proc/vmstat");
	return 0;
}

The anon COWed pages are not caught by truncation's clear_page_mlock() of
the pagecache pages; but unmap_mapping_range() unmaps them, so we ought to
look out for them there in page_remove_rmap().  Indeed, why should
truncation or invalidation be doing the clear_page_mlock() when removing
from pagecache?  mlock is a property of mapping in userspace, not a
property of pagecache: an mlocked unmapped page is nonsensical.
Reported-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ying Han <yinghan@google.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 39b5f29a
...@@ -201,12 +201,7 @@ extern void munlock_vma_page(struct page *page); ...@@ -201,12 +201,7 @@ extern void munlock_vma_page(struct page *page);
* If called for a page that is still mapped by mlocked vmas, all we do * If called for a page that is still mapped by mlocked vmas, all we do
* is revert to lazy LRU behaviour -- semantics are not broken. * is revert to lazy LRU behaviour -- semantics are not broken.
*/ */
extern void __clear_page_mlock(struct page *page); extern void clear_page_mlock(struct page *page);
static inline void clear_page_mlock(struct page *page)
{
if (unlikely(TestClearPageMlocked(page)))
__clear_page_mlock(page);
}
/* /*
* mlock_migrate_page - called only from migrate_page_copy() to * mlock_migrate_page - called only from migrate_page_copy() to
......
...@@ -1577,12 +1577,12 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -1577,12 +1577,12 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (page->mapping && trylock_page(page)) { if (page->mapping && trylock_page(page)) {
lru_add_drain(); /* push cached pages to LRU */ lru_add_drain(); /* push cached pages to LRU */
/* /*
* Because we lock page here and migration is * Because we lock page here, and migration is
* blocked by the pte's page reference, we need * blocked by the pte's page reference, and we
* only check for file-cache page truncation. * know the page is still mapped, we don't even
* need to check for file-cache page truncation.
*/ */
if (page->mapping) mlock_vma_page(page);
mlock_vma_page(page);
unlock_page(page); unlock_page(page);
} }
} }
......
...@@ -51,13 +51,10 @@ EXPORT_SYMBOL(can_do_mlock); ...@@ -51,13 +51,10 @@ EXPORT_SYMBOL(can_do_mlock);
/* /*
* LRU accounting for clear_page_mlock() * LRU accounting for clear_page_mlock()
*/ */
void __clear_page_mlock(struct page *page) void clear_page_mlock(struct page *page)
{ {
VM_BUG_ON(!PageLocked(page)); if (!TestClearPageMlocked(page))
if (!page->mapping) { /* truncated ? */
return; return;
}
dec_zone_page_state(page, NR_MLOCK); dec_zone_page_state(page, NR_MLOCK);
count_vm_event(UNEVICTABLE_PGCLEARED); count_vm_event(UNEVICTABLE_PGCLEARED);
...@@ -290,14 +287,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, ...@@ -290,14 +287,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
if (page && !IS_ERR(page)) { if (page && !IS_ERR(page)) {
lock_page(page); lock_page(page);
/* munlock_vma_page(page);
* Like in __mlock_vma_pages_range(),
* because we lock page here and migration is
* blocked by the elevated reference, we need
* only check for file-cache page truncation.
*/
if (page->mapping)
munlock_vma_page(page);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
} }
......
...@@ -1155,7 +1155,10 @@ void page_remove_rmap(struct page *page) ...@@ -1155,7 +1155,10 @@ void page_remove_rmap(struct page *page)
} else { } else {
__dec_zone_page_state(page, NR_FILE_MAPPED); __dec_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
mem_cgroup_end_update_page_stat(page, &locked, &flags);
} }
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
/* /*
* It would be tidy to reset the PageAnon mapping here, * It would be tidy to reset the PageAnon mapping here,
* but that might overwrite a racing page_add_anon_rmap * but that might overwrite a racing page_add_anon_rmap
...@@ -1165,6 +1168,7 @@ void page_remove_rmap(struct page *page) ...@@ -1165,6 +1168,7 @@ void page_remove_rmap(struct page *page)
* Leaving it set also helps swapoff to reinstate ptes * Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache. * faster for those pages still in swapcache.
*/ */
return;
out: out:
if (!anon) if (!anon)
mem_cgroup_end_update_page_stat(page, &locked, &flags); mem_cgroup_end_update_page_stat(page, &locked, &flags);
......
...@@ -107,7 +107,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page) ...@@ -107,7 +107,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
cancel_dirty_page(page, PAGE_CACHE_SIZE); cancel_dirty_page(page, PAGE_CACHE_SIZE);
clear_page_mlock(page);
ClearPageMappedToDisk(page); ClearPageMappedToDisk(page);
delete_from_page_cache(page); delete_from_page_cache(page);
return 0; return 0;
...@@ -132,7 +131,6 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) ...@@ -132,7 +131,6 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
if (page_has_private(page) && !try_to_release_page(page, 0)) if (page_has_private(page) && !try_to_release_page(page, 0))
return 0; return 0;
clear_page_mlock(page);
ret = remove_mapping(mapping, page); ret = remove_mapping(mapping, page);
return ret; return ret;
...@@ -394,8 +392,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) ...@@ -394,8 +392,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
return 0; return 0;
clear_page_mlock(page);
spin_lock_irq(&mapping->tree_lock); spin_lock_irq(&mapping->tree_lock);
if (PageDirty(page)) if (PageDirty(page))
goto failed; goto failed;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment