Commit b67bf49c authored by Hugh Dickins's avatar Hugh Dickins Committed by Matthew Wilcox (Oracle)

mm/munlock: delete FOLL_MLOCK and FOLL_POPULATE

If counting page mlocks, we must not double-count: follow_page_pte() can
tell if a page has already been Mlocked or not, but cannot tell if a pte
has already been counted or not: that will have to be done when the pte
is mapped in (which lru_cache_add_inactive_or_unevictable() already tracks
for new anon pages, but there's no such tracking yet for others).

Delete all the FOLL_MLOCK code - faulting in the missing pages will do
all that is necessary, without special mlock_vma_page() calls from here.

But then FOLL_POPULATE turns out to serve no purpose - it was there so
that its absence would tell faultin_page() not to faultin page when
setting up VM_LOCKONFAULT areas; but if there's no special work needed
here for mlock, then there's no work at all here for VM_LOCKONFAULT.

Have I got that right?  I've not looked into the history, but see that
FOLL_POPULATE goes back before VM_LOCKONFAULT: did it serve a different
purpose before?  Ah, yes, it was used to skip the old stack guard page.

And is it intentional that COW is not broken on existing pages when
setting up a VM_LOCKONFAULT area?  I can see that being argued either
way, and have no reason to disagree with current behaviour.
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
parent ebcbc6ea
...@@ -2925,13 +2925,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -2925,13 +2925,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
* and return without waiting upon it */ * and return without waiting upon it */
#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_MLOCK 0x1000 /* lock present pages */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */ #define FOLL_ANON 0x8000 /* don't do file mappings */
......
...@@ -597,32 +597,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, ...@@ -597,32 +597,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
*/ */
mark_page_accessed(page); mark_page_accessed(page);
} }
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
/* Do not mlock pte-mapped THP */
if (PageTransCompound(page))
goto out;
/*
* The preliminary mapping check is mainly to avoid the
* pointless overhead of lock_page on the ZERO_PAGE
* which might bounce very badly if there is contention.
*
* If the page is already locked, we don't need to
* handle it now - vmscan will handle it later if and
* when it attempts to reclaim the page.
*/
if (page->mapping && trylock_page(page)) {
lru_add_drain(); /* push cached pages to LRU */
/*
* Because we lock page here, and migration is
* blocked by the pte's page reference, and we
* know the page is still mapped, we don't even
* need to check for file-cache page truncation.
*/
mlock_vma_page(page);
unlock_page(page);
}
}
out: out:
pte_unmap_unlock(ptep, ptl); pte_unmap_unlock(ptep, ptl);
return page; return page;
...@@ -945,9 +919,6 @@ static int faultin_page(struct vm_area_struct *vma, ...@@ -945,9 +919,6 @@ static int faultin_page(struct vm_area_struct *vma,
unsigned int fault_flags = 0; unsigned int fault_flags = 0;
vm_fault_t ret; vm_fault_t ret;
/* mlock all present pages, but do not fault in new pages */
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
if (*flags & FOLL_NOFAULT) if (*flags & FOLL_NOFAULT)
return -EFAULT; return -EFAULT;
if (*flags & FOLL_WRITE) if (*flags & FOLL_WRITE)
...@@ -1198,8 +1169,6 @@ static long __get_user_pages(struct mm_struct *mm, ...@@ -1198,8 +1169,6 @@ static long __get_user_pages(struct mm_struct *mm,
case -ENOMEM: case -ENOMEM:
case -EHWPOISON: case -EHWPOISON:
goto out; goto out;
case -ENOENT:
goto next_page;
} }
BUG(); BUG();
} else if (PTR_ERR(page) == -EEXIST) { } else if (PTR_ERR(page) == -EEXIST) {
...@@ -1497,9 +1466,14 @@ long populate_vma_page_range(struct vm_area_struct *vma, ...@@ -1497,9 +1466,14 @@ long populate_vma_page_range(struct vm_area_struct *vma,
VM_BUG_ON_VMA(end > vma->vm_end, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm); mmap_assert_locked(mm);
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK; /*
* Rightly or wrongly, the VM_LOCKONFAULT case has never used
* faultin_page() to break COW, so it has no work to do here.
*/
if (vma->vm_flags & VM_LOCKONFAULT) if (vma->vm_flags & VM_LOCKONFAULT)
gup_flags &= ~FOLL_POPULATE; return nr_pages;
gup_flags = FOLL_TOUCH;
/* /*
* We want to touch writable mappings with a write fault in order * We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW * to break COW, except for shared mappings because these don't COW
...@@ -1566,10 +1540,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, ...@@ -1566,10 +1540,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* in the page table. * in the page table.
* FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
* a poisoned page. * a poisoned page.
* FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT.
* !FOLL_FORCE: Require proper access permissions. * !FOLL_FORCE: Require proper access permissions.
*/ */
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON; gup_flags = FOLL_TOUCH | FOLL_HWPOISON;
if (write) if (write)
gup_flags |= FOLL_WRITE; gup_flags |= FOLL_WRITE;
......
...@@ -1380,39 +1380,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, ...@@ -1380,39 +1380,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if (flags & FOLL_TOUCH) if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd, flags); touch_pmd(vma, addr, pmd, flags);
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
/*
* We don't mlock() pte-mapped THPs. This way we can avoid
* leaking mlocked pages into non-VM_LOCKED VMAs.
*
* For anon THP:
*
* In most cases the pmd is the only mapping of the page as we
* break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
* writable private mappings in populate_vma_page_range().
*
* The only scenario when we have the page shared here is if we
* mlocking read-only mapping shared over fork(). We skip
* mlocking such pages.
*
* For file THP:
*
* We can expect PageDoubleMap() to be stable under page lock:
* for file pages we set it in page_add_file_rmap(), which
* requires page to be locked.
*/
if (PageAnon(page) && compound_mapcount(page) != 1)
goto skip_mlock;
if (PageDoubleMap(page) || !page->mapping)
goto skip_mlock;
if (!trylock_page(page))
goto skip_mlock;
if (page->mapping && !PageDoubleMap(page))
mlock_vma_page(page);
unlock_page(page);
}
skip_mlock:
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment