Commit 82b0f8c3 authored by Jan Kara's avatar Jan Kara Committed by Linus Torvalds

mm: join struct fault_env and vm_fault

Currently we have two different structures for passing fault information
around - struct vm_fault and struct fault_env.  DAX will need more
information in struct vm_fault to handle its faults so the content of
that structure would become event closer to fault_env.  Furthermore it
would need to generate struct fault_env to be able to call some of the
generic functions.  So at this point I don't think there's much use in
keeping these two structures separate.  Just embed into struct vm_fault
all that is needed to use it for both purposes.

Link: http://lkml.kernel.org/r/1479460644-25076-2-git-send-email-jack@suse.czSigned-off-by: default avatarJan Kara <jack@suse.cz>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8b7457ef
...@@ -556,7 +556,7 @@ till "end_pgoff". ->map_pages() is called with page table locked and must ...@@ -556,7 +556,7 @@ till "end_pgoff". ->map_pages() is called with page table locked and must
not block. If it's not possible to reach a page without blocking, not block. If it's not possible to reach a page without blocking,
filesystem should skip it. Filesystem should use do_set_pte() to setup filesystem should skip it. Filesystem should use do_set_pte() to setup
page table entry. Pointer to entry associated with the page is passed in page table entry. Pointer to entry associated with the page is passed in
"pte" field in fault_env structure. Pointers to entries for other offsets "pte" field in vm_fault structure. Pointers to entries for other offsets
should be calculated relative to "pte". should be calculated relative to "pte".
->page_mkwrite() is called when a previously read-only pte is ->page_mkwrite() is called when a previously read-only pte is
......
...@@ -257,9 +257,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, ...@@ -257,9 +257,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
* fatal_signal_pending()s, and the mmap_sem must be released before * fatal_signal_pending()s, and the mmap_sem must be released before
* returning it. * returning it.
*/ */
int handle_userfault(struct fault_env *fe, unsigned long reason) int handle_userfault(struct vm_fault *vmf, unsigned long reason)
{ {
struct mm_struct *mm = fe->vma->vm_mm; struct mm_struct *mm = vmf->vma->vm_mm;
struct userfaultfd_ctx *ctx; struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq; struct userfaultfd_wait_queue uwq;
int ret; int ret;
...@@ -268,7 +268,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) ...@@ -268,7 +268,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
ctx = fe->vma->vm_userfaultfd_ctx.ctx; ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx) if (!ctx)
goto out; goto out;
...@@ -301,17 +301,18 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) ...@@ -301,17 +301,18 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
* without first stopping userland access to the memory. For * without first stopping userland access to the memory. For
* VM_UFFD_MISSING userfaults this is enough for now. * VM_UFFD_MISSING userfaults this is enough for now.
*/ */
if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) { if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) {
/* /*
* Validate the invariant that nowait must allow retry * Validate the invariant that nowait must allow retry
* to be sure not to return SIGBUS erroneously on * to be sure not to return SIGBUS erroneously on
* nowait invocations. * nowait invocations.
*/ */
BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT); BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
#ifdef CONFIG_DEBUG_VM #ifdef CONFIG_DEBUG_VM
if (printk_ratelimit()) { if (printk_ratelimit()) {
printk(KERN_WARNING printk(KERN_WARNING
"FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags); "FAULT_FLAG_ALLOW_RETRY missing %x\n",
vmf->flags);
dump_stack(); dump_stack();
} }
#endif #endif
...@@ -323,7 +324,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) ...@@ -323,7 +324,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
* and wait. * and wait.
*/ */
ret = VM_FAULT_RETRY; ret = VM_FAULT_RETRY;
if (fe->flags & FAULT_FLAG_RETRY_NOWAIT) if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
goto out; goto out;
/* take the reference before dropping the mmap_sem */ /* take the reference before dropping the mmap_sem */
...@@ -331,11 +332,11 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) ...@@ -331,11 +332,11 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current; uwq.wq.private = current;
uwq.msg = userfault_msg(fe->address, fe->flags, reason); uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx; uwq.ctx = ctx;
return_to_userland = return_to_userland =
(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
spin_lock(&ctx->fault_pending_wqh.lock); spin_lock(&ctx->fault_pending_wqh.lock);
...@@ -353,7 +354,8 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) ...@@ -353,7 +354,8 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
TASK_KILLABLE); TASK_KILLABLE);
spin_unlock(&ctx->fault_pending_wqh.lock); spin_unlock(&ctx->fault_pending_wqh.lock);
must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason); must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason);
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) && if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
......
#ifndef _LINUX_HUGE_MM_H #ifndef _LINUX_HUGE_MM_H
#define _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H
extern int do_huge_pmd_anonymous_page(struct fault_env *fe); extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma); struct vm_area_struct *vma);
extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd); extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd); extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
pmd_t *pmd, pmd_t *pmd,
...@@ -142,7 +142,7 @@ static inline int hpage_nr_pages(struct page *page) ...@@ -142,7 +142,7 @@ static inline int hpage_nr_pages(struct page *page)
return 1; return 1;
} }
extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd); extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page; extern struct page *huge_zero_page;
...@@ -212,7 +212,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, ...@@ -212,7 +212,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
return NULL; return NULL;
} }
static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd) static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
{ {
return 0; return 0;
} }
......
...@@ -292,10 +292,16 @@ extern pgprot_t protection_map[16]; ...@@ -292,10 +292,16 @@ extern pgprot_t protection_map[16];
* pgoff should be used in favour of virtual_address, if possible. * pgoff should be used in favour of virtual_address, if possible.
*/ */
struct vm_fault { struct vm_fault {
struct vm_area_struct *vma; /* Target VMA */
unsigned int flags; /* FAULT_FLAG_xxx flags */ unsigned int flags; /* FAULT_FLAG_xxx flags */
gfp_t gfp_mask; /* gfp mask to be used for allocations */ gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */ pgoff_t pgoff; /* Logical page offset based on vma */
void __user *virtual_address; /* Faulting virtual address */ unsigned long address; /* Faulting virtual address */
void __user *virtual_address; /* Faulting virtual address masked by
* PAGE_MASK */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address'
*/
struct page *cow_page; /* Handler may choose to COW */ struct page *cow_page; /* Handler may choose to COW */
struct page *page; /* ->fault handlers should return a struct page *page; /* ->fault handlers should return a
...@@ -309,19 +315,7 @@ struct vm_fault { ...@@ -309,19 +315,7 @@ struct vm_fault {
* VM_FAULT_DAX_LOCKED and fill in * VM_FAULT_DAX_LOCKED and fill in
* entry here. * entry here.
*/ */
}; /* These three entries are valid only while holding ptl lock */
/*
* Page fault context: passes though page fault handler instead of endless list
* of function arguments.
*/
struct fault_env {
struct vm_area_struct *vma; /* Target VMA */
unsigned long address; /* Faulting virtual address */
unsigned int flags; /* FAULT_FLAG_xxx flags */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address'
*/
pte_t *pte; /* Pointer to pte entry matching pte_t *pte; /* Pointer to pte entry matching
* the 'address'. NULL if the page * the 'address'. NULL if the page
* table hasn't been allocated. * table hasn't been allocated.
...@@ -351,7 +345,7 @@ struct vm_operations_struct { ...@@ -351,7 +345,7 @@ struct vm_operations_struct {
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
int (*pmd_fault)(struct vm_area_struct *, unsigned long address, int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
pmd_t *, unsigned int flags); pmd_t *, unsigned int flags);
void (*map_pages)(struct fault_env *fe, void (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff); pgoff_t start_pgoff, pgoff_t end_pgoff);
/* notification that a previously read-only page is about to become /* notification that a previously read-only page is about to become
...@@ -625,7 +619,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) ...@@ -625,7 +619,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte; return pte;
} }
int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page); struct page *page);
#endif #endif
...@@ -2094,7 +2088,7 @@ extern void truncate_inode_pages_final(struct address_space *); ...@@ -2094,7 +2088,7 @@ extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */ /* generic vm_area_ops exported for stackable file systems */
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
extern void filemap_map_pages(struct fault_env *fe, extern void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff); pgoff_t start_pgoff, pgoff_t end_pgoff);
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
extern int handle_userfault(struct fault_env *fe, unsigned long reason); extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len); unsigned long src_start, unsigned long len);
...@@ -55,7 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) ...@@ -55,7 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
#else /* CONFIG_USERFAULTFD */ #else /* CONFIG_USERFAULTFD */
/* mm helpers */ /* mm helpers */
static inline int handle_userfault(struct fault_env *fe, unsigned long reason) static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
{ {
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
......
...@@ -2164,12 +2164,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -2164,12 +2164,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
} }
EXPORT_SYMBOL(filemap_fault); EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct fault_env *fe, void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff) pgoff_t start_pgoff, pgoff_t end_pgoff)
{ {
struct radix_tree_iter iter; struct radix_tree_iter iter;
void **slot; void **slot;
struct file *file = fe->vma->vm_file; struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff; pgoff_t last_pgoff = start_pgoff;
loff_t size; loff_t size;
...@@ -2225,11 +2225,11 @@ void filemap_map_pages(struct fault_env *fe, ...@@ -2225,11 +2225,11 @@ void filemap_map_pages(struct fault_env *fe,
if (file->f_ra.mmap_miss > 0) if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--; file->f_ra.mmap_miss--;
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT; vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
if (fe->pte) if (vmf->pte)
fe->pte += iter.index - last_pgoff; vmf->pte += iter.index - last_pgoff;
last_pgoff = iter.index; last_pgoff = iter.index;
if (alloc_set_pte(fe, NULL, page)) if (alloc_set_pte(vmf, NULL, page))
goto unlock; goto unlock;
unlock_page(page); unlock_page(page);
goto next; goto next;
...@@ -2239,7 +2239,7 @@ void filemap_map_pages(struct fault_env *fe, ...@@ -2239,7 +2239,7 @@ void filemap_map_pages(struct fault_env *fe,
put_page(page); put_page(page);
next: next:
/* Huge page is mapped? No need to proceed. */ /* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*fe->pmd)) if (pmd_trans_huge(*vmf->pmd))
break; break;
if (iter.index == end_pgoff) if (iter.index == end_pgoff)
break; break;
......
...@@ -542,13 +542,13 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, ...@@ -542,13 +542,13 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
} }
EXPORT_SYMBOL_GPL(thp_get_unmapped_area); EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page, static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
gfp_t gfp) gfp_t gfp)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
pgtable_t pgtable; pgtable_t pgtable;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
VM_BUG_ON_PAGE(!PageCompound(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page);
...@@ -573,9 +573,9 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page, ...@@ -573,9 +573,9 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
*/ */
__SetPageUptodate(page); __SetPageUptodate(page);
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_none(*fe->pmd))) { if (unlikely(!pmd_none(*vmf->pmd))) {
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
mem_cgroup_cancel_charge(page, memcg, true); mem_cgroup_cancel_charge(page, memcg, true);
put_page(page); put_page(page);
pte_free(vma->vm_mm, pgtable); pte_free(vma->vm_mm, pgtable);
...@@ -586,11 +586,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page, ...@@ -586,11 +586,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
if (userfaultfd_missing(vma)) { if (userfaultfd_missing(vma)) {
int ret; int ret;
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
mem_cgroup_cancel_charge(page, memcg, true); mem_cgroup_cancel_charge(page, memcg, true);
put_page(page); put_page(page);
pte_free(vma->vm_mm, pgtable); pte_free(vma->vm_mm, pgtable);
ret = handle_userfault(fe, VM_UFFD_MISSING); ret = handle_userfault(vmf, VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK); VM_BUG_ON(ret & VM_FAULT_FALLBACK);
return ret; return ret;
} }
...@@ -600,11 +600,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page, ...@@ -600,11 +600,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
page_add_new_anon_rmap(page, vma, haddr, true); page_add_new_anon_rmap(page, vma, haddr, true);
mem_cgroup_commit_charge(page, memcg, false, true); mem_cgroup_commit_charge(page, memcg, false, true);
lru_cache_add_active_or_unevictable(page, vma); lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable); pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&vma->vm_mm->nr_ptes); atomic_long_inc(&vma->vm_mm->nr_ptes);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
count_vm_event(THP_FAULT_ALLOC); count_vm_event(THP_FAULT_ALLOC);
} }
...@@ -651,12 +651,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, ...@@ -651,12 +651,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
return true; return true;
} }
int do_huge_pmd_anonymous_page(struct fault_env *fe) int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
gfp_t gfp; gfp_t gfp;
struct page *page; struct page *page;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
...@@ -664,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe) ...@@ -664,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags))) if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (!(fe->flags & FAULT_FLAG_WRITE) && if (!(vmf->flags & FAULT_FLAG_WRITE) &&
!mm_forbids_zeropage(vma->vm_mm) && !mm_forbids_zeropage(vma->vm_mm) &&
transparent_hugepage_use_zero_page()) { transparent_hugepage_use_zero_page()) {
pgtable_t pgtable; pgtable_t pgtable;
...@@ -680,22 +680,22 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe) ...@@ -680,22 +680,22 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
ret = 0; ret = 0;
set = false; set = false;
if (pmd_none(*fe->pmd)) { if (pmd_none(*vmf->pmd)) {
if (userfaultfd_missing(vma)) { if (userfaultfd_missing(vma)) {
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
ret = handle_userfault(fe, VM_UFFD_MISSING); ret = handle_userfault(vmf, VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK); VM_BUG_ON(ret & VM_FAULT_FALLBACK);
} else { } else {
set_huge_zero_page(pgtable, vma->vm_mm, vma, set_huge_zero_page(pgtable, vma->vm_mm, vma,
haddr, fe->pmd, zero_page); haddr, vmf->pmd, zero_page);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
set = true; set = true;
} }
} else } else
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
if (!set) if (!set)
pte_free(vma->vm_mm, pgtable); pte_free(vma->vm_mm, pgtable);
return ret; return ret;
...@@ -707,7 +707,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe) ...@@ -707,7 +707,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
prep_transhuge_page(page); prep_transhuge_page(page);
return __do_huge_pmd_anonymous_page(fe, page, gfp); return __do_huge_pmd_anonymous_page(vmf, page, gfp);
} }
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
...@@ -879,30 +879,30 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -879,30 +879,30 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return ret; return ret;
} }
void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd) void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
{ {
pmd_t entry; pmd_t entry;
unsigned long haddr; unsigned long haddr;
fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
goto unlock; goto unlock;
entry = pmd_mkyoung(orig_pmd); entry = pmd_mkyoung(orig_pmd);
haddr = fe->address & HPAGE_PMD_MASK; haddr = vmf->address & HPAGE_PMD_MASK;
if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
fe->flags & FAULT_FLAG_WRITE)) vmf->flags & FAULT_FLAG_WRITE))
update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd); update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
unlock: unlock:
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
} }
static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
struct page *page) struct page *page)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
pgtable_t pgtable; pgtable_t pgtable;
pmd_t _pmd; pmd_t _pmd;
...@@ -921,7 +921,7 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, ...@@ -921,7 +921,7 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
for (i = 0; i < HPAGE_PMD_NR; i++) { for (i = 0; i < HPAGE_PMD_NR; i++) {
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE | pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
__GFP_OTHER_NODE, vma, __GFP_OTHER_NODE, vma,
fe->address, page_to_nid(page)); vmf->address, page_to_nid(page));
if (unlikely(!pages[i] || if (unlikely(!pages[i] ||
mem_cgroup_try_charge(pages[i], vma->vm_mm, mem_cgroup_try_charge(pages[i], vma->vm_mm,
GFP_KERNEL, &memcg, false))) { GFP_KERNEL, &memcg, false))) {
...@@ -952,15 +952,15 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, ...@@ -952,15 +952,15 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
goto out_free_pages; goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(!PageHead(page), page);
pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
/* leave pmd empty until pte is filled */ /* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd); pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd);
pmd_populate(vma->vm_mm, &_pmd, pgtable); pmd_populate(vma->vm_mm, &_pmd, pgtable);
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
...@@ -969,20 +969,20 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, ...@@ -969,20 +969,20 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
entry = maybe_mkwrite(pte_mkdirty(entry), vma); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
memcg = (void *)page_private(pages[i]); memcg = (void *)page_private(pages[i]);
set_page_private(pages[i], 0); set_page_private(pages[i], 0);
page_add_new_anon_rmap(pages[i], fe->vma, haddr, false); page_add_new_anon_rmap(pages[i], vmf->vma, haddr, false);
mem_cgroup_commit_charge(pages[i], memcg, false, false); mem_cgroup_commit_charge(pages[i], memcg, false, false);
lru_cache_add_active_or_unevictable(pages[i], vma); lru_cache_add_active_or_unevictable(pages[i], vma);
fe->pte = pte_offset_map(&_pmd, haddr); vmf->pte = pte_offset_map(&_pmd, haddr);
VM_BUG_ON(!pte_none(*fe->pte)); VM_BUG_ON(!pte_none(*vmf->pte));
set_pte_at(vma->vm_mm, haddr, fe->pte, entry); set_pte_at(vma->vm_mm, haddr, vmf->pte, entry);
pte_unmap(fe->pte); pte_unmap(vmf->pte);
} }
kfree(pages); kfree(pages);
smp_wmb(); /* make pte visible before pmd */ smp_wmb(); /* make pte visible before pmd */
pmd_populate(vma->vm_mm, fe->pmd, pgtable); pmd_populate(vma->vm_mm, vmf->pmd, pgtable);
page_remove_rmap(page, true); page_remove_rmap(page, true);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
...@@ -993,7 +993,7 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, ...@@ -993,7 +993,7 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
return ret; return ret;
out_free_pages: out_free_pages:
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
for (i = 0; i < HPAGE_PMD_NR; i++) { for (i = 0; i < HPAGE_PMD_NR; i++) {
memcg = (void *)page_private(pages[i]); memcg = (void *)page_private(pages[i]);
...@@ -1005,23 +1005,23 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd, ...@@ -1005,23 +1005,23 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
goto out; goto out;
} }
int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *new_page; struct page *page = NULL, *new_page;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */
gfp_t huge_gfp; /* for allocation and charge */ gfp_t huge_gfp; /* for allocation and charge */
int ret = 0; int ret = 0;
fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
VM_BUG_ON_VMA(!vma->anon_vma, vma); VM_BUG_ON_VMA(!vma->anon_vma, vma);
if (is_huge_zero_pmd(orig_pmd)) if (is_huge_zero_pmd(orig_pmd))
goto alloc; goto alloc;
spin_lock(fe->ptl); spin_lock(vmf->ptl);
if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
goto out_unlock; goto out_unlock;
page = pmd_page(orig_pmd); page = pmd_page(orig_pmd);
...@@ -1034,13 +1034,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1034,13 +1034,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
pmd_t entry; pmd_t entry;
entry = pmd_mkyoung(orig_pmd); entry = pmd_mkyoung(orig_pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry, 1)) if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
update_mmu_cache_pmd(vma, fe->address, fe->pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
ret |= VM_FAULT_WRITE; ret |= VM_FAULT_WRITE;
goto out_unlock; goto out_unlock;
} }
get_page(page); get_page(page);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) { !transparent_hugepage_debug_cow()) {
...@@ -1053,12 +1053,12 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1053,12 +1053,12 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
prep_transhuge_page(new_page); prep_transhuge_page(new_page);
} else { } else {
if (!page) { if (!page) {
split_huge_pmd(vma, fe->pmd, fe->address); split_huge_pmd(vma, vmf->pmd, vmf->address);
ret |= VM_FAULT_FALLBACK; ret |= VM_FAULT_FALLBACK;
} else { } else {
ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page); ret = do_huge_pmd_wp_page_fallback(vmf, orig_pmd, page);
if (ret & VM_FAULT_OOM) { if (ret & VM_FAULT_OOM) {
split_huge_pmd(vma, fe->pmd, fe->address); split_huge_pmd(vma, vmf->pmd, vmf->address);
ret |= VM_FAULT_FALLBACK; ret |= VM_FAULT_FALLBACK;
} }
put_page(page); put_page(page);
...@@ -1070,7 +1070,7 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1070,7 +1070,7 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
huge_gfp, &memcg, true))) { huge_gfp, &memcg, true))) {
put_page(new_page); put_page(new_page);
split_huge_pmd(vma, fe->pmd, fe->address); split_huge_pmd(vma, vmf->pmd, vmf->address);
if (page) if (page)
put_page(page); put_page(page);
ret |= VM_FAULT_FALLBACK; ret |= VM_FAULT_FALLBACK;
...@@ -1090,11 +1090,11 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1090,11 +1090,11 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
spin_lock(fe->ptl); spin_lock(vmf->ptl);
if (page) if (page)
put_page(page); put_page(page);
if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) { if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
mem_cgroup_cancel_charge(new_page, memcg, true); mem_cgroup_cancel_charge(new_page, memcg, true);
put_page(new_page); put_page(new_page);
goto out_mn; goto out_mn;
...@@ -1102,12 +1102,12 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1102,12 +1102,12 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
pmd_t entry; pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd); pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
page_add_new_anon_rmap(new_page, vma, haddr, true); page_add_new_anon_rmap(new_page, vma, haddr, true);
mem_cgroup_commit_charge(new_page, memcg, false, true); mem_cgroup_commit_charge(new_page, memcg, false, true);
lru_cache_add_active_or_unevictable(new_page, vma); lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
update_mmu_cache_pmd(vma, fe->address, fe->pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
if (!page) { if (!page) {
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
} else { } else {
...@@ -1117,13 +1117,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) ...@@ -1117,13 +1117,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
} }
ret |= VM_FAULT_WRITE; ret |= VM_FAULT_WRITE;
} }
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
out_mn: out_mn:
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
out: out:
return ret; return ret;
out_unlock: out_unlock:
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
return ret; return ret;
} }
...@@ -1196,12 +1196,12 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, ...@@ -1196,12 +1196,12 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
} }
/* NUMA hinting page fault entry point for trans huge pmds */ /* NUMA hinting page fault entry point for trans huge pmds */
int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct anon_vma *anon_vma = NULL; struct anon_vma *anon_vma = NULL;
struct page *page; struct page *page;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id(); int page_nid = -1, this_nid = numa_node_id();
int target_nid, last_cpupid = -1; int target_nid, last_cpupid = -1;
bool page_locked; bool page_locked;
...@@ -1209,8 +1209,8 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1209,8 +1209,8 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
bool was_writable; bool was_writable;
int flags = 0; int flags = 0;
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(pmd, *fe->pmd))) if (unlikely(!pmd_same(pmd, *vmf->pmd)))
goto out_unlock; goto out_unlock;
/* /*
...@@ -1218,9 +1218,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1218,9 +1218,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
* without disrupting NUMA hinting information. Do not relock and * without disrupting NUMA hinting information. Do not relock and
* check_same as the page may no longer be mapped. * check_same as the page may no longer be mapped.
*/ */
if (unlikely(pmd_trans_migrating(*fe->pmd))) { if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
page = pmd_page(*fe->pmd); page = pmd_page(*vmf->pmd);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
wait_on_page_locked(page); wait_on_page_locked(page);
goto out; goto out;
} }
...@@ -1253,7 +1253,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1253,7 +1253,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
/* Migration could have started since the pmd_trans_migrating check */ /* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) { if (!page_locked) {
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
wait_on_page_locked(page); wait_on_page_locked(page);
page_nid = -1; page_nid = -1;
goto out; goto out;
...@@ -1264,12 +1264,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1264,12 +1264,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
* to serialises splits * to serialises splits
*/ */
get_page(page); get_page(page);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
anon_vma = page_lock_anon_vma_read(page); anon_vma = page_lock_anon_vma_read(page);
/* Confirm the PMD did not change while page_table_lock was released */ /* Confirm the PMD did not change while page_table_lock was released */
spin_lock(fe->ptl); spin_lock(vmf->ptl);
if (unlikely(!pmd_same(pmd, *fe->pmd))) { if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
page_nid = -1; page_nid = -1;
...@@ -1287,9 +1287,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1287,9 +1287,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
* Migrate the THP to the requested node, returns with page unlocked * Migrate the THP to the requested node, returns with page unlocked
* and access rights restored. * and access rights restored.
*/ */
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
fe->pmd, pmd, fe->address, page, target_nid); vmf->pmd, pmd, vmf->address, page, target_nid);
if (migrated) { if (migrated) {
flags |= TNF_MIGRATED; flags |= TNF_MIGRATED;
page_nid = target_nid; page_nid = target_nid;
...@@ -1304,18 +1304,19 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) ...@@ -1304,18 +1304,19 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
pmd = pmd_mkyoung(pmd); pmd = pmd_mkyoung(pmd);
if (was_writable) if (was_writable)
pmd = pmd_mkwrite(pmd); pmd = pmd_mkwrite(pmd);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, fe->address, fe->pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
unlock_page(page); unlock_page(page);
out_unlock: out_unlock:
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
out: out:
if (anon_vma) if (anon_vma)
page_unlock_anon_vma_read(anon_vma); page_unlock_anon_vma_read(anon_vma);
if (page_nid != -1) if (page_nid != -1)
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags); task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
vmf->flags);
return 0; return 0;
} }
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
/* Do not use these with a slab allocator */ /* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
int do_swap_page(struct fault_env *fe, pte_t orig_pte); int do_swap_page(struct vm_fault *vmf, pte_t orig_pte);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling); unsigned long floor, unsigned long ceiling);
......
...@@ -877,7 +877,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, ...@@ -877,7 +877,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
{ {
pte_t pteval; pte_t pteval;
int swapped_in = 0, ret = 0; int swapped_in = 0, ret = 0;
struct fault_env fe = { struct vm_fault vmf = {
.vma = vma, .vma = vma,
.address = address, .address = address,
.flags = FAULT_FLAG_ALLOW_RETRY, .flags = FAULT_FLAG_ALLOW_RETRY,
...@@ -889,19 +889,19 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, ...@@ -889,19 +889,19 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false; return false;
} }
fe.pte = pte_offset_map(pmd, address); vmf.pte = pte_offset_map(pmd, address);
for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
fe.pte++, fe.address += PAGE_SIZE) { vmf.pte++, vmf.address += PAGE_SIZE) {
pteval = *fe.pte; pteval = *vmf.pte;
if (!is_swap_pte(pteval)) if (!is_swap_pte(pteval))
continue; continue;
swapped_in++; swapped_in++;
ret = do_swap_page(&fe, pteval); ret = do_swap_page(&vmf, pteval);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
if (ret & VM_FAULT_RETRY) { if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
if (hugepage_vma_revalidate(mm, address, &fe.vma)) { if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
/* vma is no longer available, don't continue to swapin */ /* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false; return false;
...@@ -915,10 +915,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, ...@@ -915,10 +915,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
return false; return false;
} }
/* pte is unmapped now, we need to map it */ /* pte is unmapped now, we need to map it */
fe.pte = pte_offset_map(pmd, fe.address); vmf.pte = pte_offset_map(pmd, vmf.address);
} }
fe.pte--; vmf.pte--;
pte_unmap(fe.pte); pte_unmap(vmf.pte);
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1); trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
return true; return true;
} }
......
...@@ -2070,11 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, ...@@ -2070,11 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
* case, all we need to do here is to mark the page as writable and update * case, all we need to do here is to mark the page as writable and update
* any related book-keeping. * any related book-keeping.
*/ */
static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte,
struct page *page, int page_mkwrite, int dirty_shared) struct page *page, int page_mkwrite, int dirty_shared)
__releases(fe->ptl) __releases(vmf->ptl)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
pte_t entry; pte_t entry;
/* /*
* Clear the pages cpupid information as the existing * Clear the pages cpupid information as the existing
...@@ -2084,12 +2084,12 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, ...@@ -2084,12 +2084,12 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
if (page) if (page)
page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
entry = pte_mkyoung(orig_pte); entry = pte_mkyoung(orig_pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1)) if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
if (dirty_shared) { if (dirty_shared) {
struct address_space *mapping; struct address_space *mapping;
...@@ -2135,15 +2135,15 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, ...@@ -2135,15 +2135,15 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
* held to the old page, as well as updating the rmap. * held to the old page, as well as updating the rmap.
* - In any case, unlock the PTL and drop the reference we took to the old page. * - In any case, unlock the PTL and drop the reference we took to the old page.
*/ */
static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte,
struct page *old_page) struct page *old_page)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct page *new_page = NULL; struct page *new_page = NULL;
pte_t entry; pte_t entry;
int page_copied = 0; int page_copied = 0;
const unsigned long mmun_start = fe->address & PAGE_MASK; const unsigned long mmun_start = vmf->address & PAGE_MASK;
const unsigned long mmun_end = mmun_start + PAGE_SIZE; const unsigned long mmun_end = mmun_start + PAGE_SIZE;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
...@@ -2151,15 +2151,16 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2151,15 +2151,16 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
goto oom; goto oom;
if (is_zero_pfn(pte_pfn(orig_pte))) { if (is_zero_pfn(pte_pfn(orig_pte))) {
new_page = alloc_zeroed_user_highpage_movable(vma, fe->address); new_page = alloc_zeroed_user_highpage_movable(vma,
vmf->address);
if (!new_page) if (!new_page)
goto oom; goto oom;
} else { } else {
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
fe->address); vmf->address);
if (!new_page) if (!new_page)
goto oom; goto oom;
cow_user_page(new_page, old_page, fe->address, vma); cow_user_page(new_page, old_page, vmf->address, vma);
} }
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
...@@ -2172,8 +2173,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2172,8 +2173,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
/* /*
* Re-check the pte - we dropped the lock * Re-check the pte - we dropped the lock
*/ */
fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl); vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
if (likely(pte_same(*fe->pte, orig_pte))) { if (likely(pte_same(*vmf->pte, orig_pte))) {
if (old_page) { if (old_page) {
if (!PageAnon(old_page)) { if (!PageAnon(old_page)) {
dec_mm_counter_fast(mm, dec_mm_counter_fast(mm,
...@@ -2183,7 +2184,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2183,7 +2184,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
} else { } else {
inc_mm_counter_fast(mm, MM_ANONPAGES); inc_mm_counter_fast(mm, MM_ANONPAGES);
} }
flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); flush_cache_page(vma, vmf->address, pte_pfn(orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot); entry = mk_pte(new_page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* /*
...@@ -2192,8 +2193,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2192,8 +2193,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
* seen in the presence of one thread doing SMC and another * seen in the presence of one thread doing SMC and another
* thread doing COW. * thread doing COW.
*/ */
ptep_clear_flush_notify(vma, fe->address, fe->pte); ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
page_add_new_anon_rmap(new_page, vma, fe->address, false); page_add_new_anon_rmap(new_page, vma, vmf->address, false);
mem_cgroup_commit_charge(new_page, memcg, false, false); mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma); lru_cache_add_active_or_unevictable(new_page, vma);
/* /*
...@@ -2201,8 +2202,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2201,8 +2202,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
* mmu page tables (such as kvm shadow page tables), we want the * mmu page tables (such as kvm shadow page tables), we want the
* new page to be mapped directly into the secondary page table. * new page to be mapped directly into the secondary page table.
*/ */
set_pte_at_notify(mm, fe->address, fe->pte, entry); set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
if (old_page) { if (old_page) {
/* /*
* Only after switching the pte to the new page may * Only after switching the pte to the new page may
...@@ -2239,7 +2240,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2239,7 +2240,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
if (new_page) if (new_page)
put_page(new_page); put_page(new_page);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
if (old_page) { if (old_page) {
/* /*
...@@ -2267,43 +2268,43 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, ...@@ -2267,43 +2268,43 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
* mapping * mapping
*/ */
static int wp_pfn_shared(struct fault_env *fe, pte_t orig_pte) static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
struct vm_fault vmf = { struct vm_fault vmf2 = {
.page = NULL, .page = NULL,
.pgoff = linear_page_index(vma, fe->address), .pgoff = linear_page_index(vma, vmf->address),
.virtual_address = .virtual_address =
(void __user *)(fe->address & PAGE_MASK), (void __user *)(vmf->address & PAGE_MASK),
.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
}; };
int ret; int ret;
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); ret = vma->vm_ops->pfn_mkwrite(vma, &vmf2);
if (ret & VM_FAULT_ERROR) if (ret & VM_FAULT_ERROR)
return ret; return ret;
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
&fe->ptl); vmf->address, &vmf->ptl);
/* /*
* We might have raced with another page fault while we * We might have raced with another page fault while we
* released the pte_offset_map_lock. * released the pte_offset_map_lock.
*/ */
if (!pte_same(*fe->pte, orig_pte)) { if (!pte_same(*vmf->pte, orig_pte)) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0; return 0;
} }
} }
return wp_page_reuse(fe, orig_pte, NULL, 0, 0); return wp_page_reuse(vmf, orig_pte, NULL, 0, 0);
} }
static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte,
struct page *old_page) struct page *old_page)
__releases(fe->ptl) __releases(vmf->ptl)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
int page_mkwrite = 0; int page_mkwrite = 0;
get_page(old_page); get_page(old_page);
...@@ -2311,8 +2312,8 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, ...@@ -2311,8 +2312,8 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
if (vma->vm_ops && vma->vm_ops->page_mkwrite) { if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp; int tmp;
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vma, old_page, fe->address); tmp = do_page_mkwrite(vma, old_page, vmf->address);
if (unlikely(!tmp || (tmp & if (unlikely(!tmp || (tmp &
(VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
put_page(old_page); put_page(old_page);
...@@ -2324,18 +2325,18 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, ...@@ -2324,18 +2325,18 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
* they did, we just return, as we can count on the * they did, we just return, as we can count on the
* MMU to tell us if they didn't also make it writable. * MMU to tell us if they didn't also make it writable.
*/ */
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
&fe->ptl); vmf->address, &vmf->ptl);
if (!pte_same(*fe->pte, orig_pte)) { if (!pte_same(*vmf->pte, orig_pte)) {
unlock_page(old_page); unlock_page(old_page);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
put_page(old_page); put_page(old_page);
return 0; return 0;
} }
page_mkwrite = 1; page_mkwrite = 1;
} }
return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1); return wp_page_reuse(vmf, orig_pte, old_page, page_mkwrite, 1);
} }
/* /*
...@@ -2356,13 +2357,13 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, ...@@ -2356,13 +2357,13 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
* but allow concurrent faults), with pte both mapped and locked. * but allow concurrent faults), with pte both mapped and locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with mmap_sem still held, but pte unmapped and unlocked.
*/ */
static int do_wp_page(struct fault_env *fe, pte_t orig_pte) static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte)
__releases(fe->ptl) __releases(vmf->ptl)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *old_page; struct page *old_page;
old_page = vm_normal_page(vma, fe->address, orig_pte); old_page = vm_normal_page(vma, vmf->address, orig_pte);
if (!old_page) { if (!old_page) {
/* /*
* VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
...@@ -2373,10 +2374,10 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2373,10 +2374,10 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
*/ */
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED)) (VM_WRITE|VM_SHARED))
return wp_pfn_shared(fe, orig_pte); return wp_pfn_shared(vmf, orig_pte);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return wp_page_copy(fe, orig_pte, old_page); return wp_page_copy(vmf, orig_pte, old_page);
} }
/* /*
...@@ -2387,13 +2388,13 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2387,13 +2388,13 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
int total_mapcount; int total_mapcount;
if (!trylock_page(old_page)) { if (!trylock_page(old_page)) {
get_page(old_page); get_page(old_page);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
lock_page(old_page); lock_page(old_page);
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
fe->address, &fe->ptl); vmf->address, &vmf->ptl);
if (!pte_same(*fe->pte, orig_pte)) { if (!pte_same(*vmf->pte, orig_pte)) {
unlock_page(old_page); unlock_page(old_page);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
put_page(old_page); put_page(old_page);
return 0; return 0;
} }
...@@ -2411,12 +2412,12 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2411,12 +2412,12 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
page_move_anon_rmap(old_page, vma); page_move_anon_rmap(old_page, vma);
} }
unlock_page(old_page); unlock_page(old_page);
return wp_page_reuse(fe, orig_pte, old_page, 0, 0); return wp_page_reuse(vmf, orig_pte, old_page, 0, 0);
} }
unlock_page(old_page); unlock_page(old_page);
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) { (VM_WRITE|VM_SHARED))) {
return wp_page_shared(fe, orig_pte, old_page); return wp_page_shared(vmf, orig_pte, old_page);
} }
/* /*
...@@ -2424,8 +2425,8 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2424,8 +2425,8 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
*/ */
get_page(old_page); get_page(old_page);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return wp_page_copy(fe, orig_pte, old_page); return wp_page_copy(vmf, orig_pte, old_page);
} }
static void unmap_mapping_range_vma(struct vm_area_struct *vma, static void unmap_mapping_range_vma(struct vm_area_struct *vma,
...@@ -2513,9 +2514,9 @@ EXPORT_SYMBOL(unmap_mapping_range); ...@@ -2513,9 +2514,9 @@ EXPORT_SYMBOL(unmap_mapping_range);
* We return with the mmap_sem locked or unlocked in the same cases * We return with the mmap_sem locked or unlocked in the same cases
* as does filemap_fault(). * as does filemap_fault().
*/ */
int do_swap_page(struct fault_env *fe, pte_t orig_pte) int do_swap_page(struct vm_fault *vmf, pte_t orig_pte)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *page, *swapcache; struct page *page, *swapcache;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
swp_entry_t entry; swp_entry_t entry;
...@@ -2524,17 +2525,18 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2524,17 +2525,18 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
int exclusive = 0; int exclusive = 0;
int ret = 0; int ret = 0;
if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte)) if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, orig_pte))
goto out; goto out;
entry = pte_to_swp_entry(orig_pte); entry = pte_to_swp_entry(orig_pte);
if (unlikely(non_swap_entry(entry))) { if (unlikely(non_swap_entry(entry))) {
if (is_migration_entry(entry)) { if (is_migration_entry(entry)) {
migration_entry_wait(vma->vm_mm, fe->pmd, fe->address); migration_entry_wait(vma->vm_mm, vmf->pmd,
vmf->address);
} else if (is_hwpoison_entry(entry)) { } else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON; ret = VM_FAULT_HWPOISON;
} else { } else {
print_bad_pte(vma, fe->address, orig_pte, NULL); print_bad_pte(vma, vmf->address, orig_pte, NULL);
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
} }
goto out; goto out;
...@@ -2542,16 +2544,16 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2542,16 +2544,16 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
delayacct_set_flag(DELAYACCT_PF_SWAPIN); delayacct_set_flag(DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry); page = lookup_swap_cache(entry);
if (!page) { if (!page) {
page = swapin_readahead(entry, page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma,
GFP_HIGHUSER_MOVABLE, vma, fe->address); vmf->address);
if (!page) { if (!page) {
/* /*
* Back out if somebody else faulted in this pte * Back out if somebody else faulted in this pte
* while we released the pte lock. * while we released the pte lock.
*/ */
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
fe->address, &fe->ptl); vmf->address, &vmf->ptl);
if (likely(pte_same(*fe->pte, orig_pte))) if (likely(pte_same(*vmf->pte, orig_pte)))
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
goto unlock; goto unlock;
...@@ -2573,7 +2575,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2573,7 +2575,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
} }
swapcache = page; swapcache = page;
locked = lock_page_or_retry(page, vma->vm_mm, fe->flags); locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
if (!locked) { if (!locked) {
...@@ -2590,7 +2592,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2590,7 +2592,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
goto out_page; goto out_page;
page = ksm_might_need_to_copy(page, vma, fe->address); page = ksm_might_need_to_copy(page, vma, vmf->address);
if (unlikely(!page)) { if (unlikely(!page)) {
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
page = swapcache; page = swapcache;
...@@ -2606,9 +2608,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2606,9 +2608,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
/* /*
* Back out if somebody else already faulted in this pte. * Back out if somebody else already faulted in this pte.
*/ */
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&fe->ptl); &vmf->ptl);
if (unlikely(!pte_same(*fe->pte, orig_pte))) if (unlikely(!pte_same(*vmf->pte, orig_pte)))
goto out_nomap; goto out_nomap;
if (unlikely(!PageUptodate(page))) { if (unlikely(!PageUptodate(page))) {
...@@ -2629,22 +2631,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2629,22 +2631,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot); pte = mk_pte(page, vma->vm_page_prot);
if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma); pte = maybe_mkwrite(pte_mkdirty(pte), vma);
fe->flags &= ~FAULT_FLAG_WRITE; vmf->flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE; ret |= VM_FAULT_WRITE;
exclusive = RMAP_EXCLUSIVE; exclusive = RMAP_EXCLUSIVE;
} }
flush_icache_page(vma, page); flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte)) if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte); pte = pte_mksoft_dirty(pte);
set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
if (page == swapcache) { if (page == swapcache) {
do_page_add_anon_rmap(page, vma, fe->address, exclusive); do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
mem_cgroup_commit_charge(page, memcg, true, false); mem_cgroup_commit_charge(page, memcg, true, false);
activate_page(page); activate_page(page);
} else { /* ksm created a completely new copy */ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, fe->address, false); page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false); mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma); lru_cache_add_active_or_unevictable(page, vma);
} }
...@@ -2667,22 +2669,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) ...@@ -2667,22 +2669,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
put_page(swapcache); put_page(swapcache);
} }
if (fe->flags & FAULT_FLAG_WRITE) { if (vmf->flags & FAULT_FLAG_WRITE) {
ret |= do_wp_page(fe, pte); ret |= do_wp_page(vmf, pte);
if (ret & VM_FAULT_ERROR) if (ret & VM_FAULT_ERROR)
ret &= VM_FAULT_ERROR; ret &= VM_FAULT_ERROR;
goto out; goto out;
} }
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
unlock: unlock:
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
out: out:
return ret; return ret;
out_nomap: out_nomap:
mem_cgroup_cancel_charge(page, memcg, false); mem_cgroup_cancel_charge(page, memcg, false);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
out_page: out_page:
unlock_page(page); unlock_page(page);
out_release: out_release:
...@@ -2733,9 +2735,9 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo ...@@ -2733,9 +2735,9 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with mmap_sem still held, but pte unmapped and unlocked.
*/ */
static int do_anonymous_page(struct fault_env *fe) static int do_anonymous_page(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct page *page; struct page *page;
pte_t entry; pte_t entry;
...@@ -2745,7 +2747,7 @@ static int do_anonymous_page(struct fault_env *fe) ...@@ -2745,7 +2747,7 @@ static int do_anonymous_page(struct fault_env *fe)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
/* Check if we need to add a guard page to the stack */ /* Check if we need to add a guard page to the stack */
if (check_stack_guard_page(vma, fe->address) < 0) if (check_stack_guard_page(vma, vmf->address) < 0)
return VM_FAULT_SIGSEGV; return VM_FAULT_SIGSEGV;
/* /*
...@@ -2758,26 +2760,26 @@ static int do_anonymous_page(struct fault_env *fe) ...@@ -2758,26 +2760,26 @@ static int do_anonymous_page(struct fault_env *fe)
* *
* Here we only have down_read(mmap_sem). * Here we only have down_read(mmap_sem).
*/ */
if (pte_alloc(vma->vm_mm, fe->pmd, fe->address)) if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
return VM_FAULT_OOM; return VM_FAULT_OOM;
/* See the comment in pte_alloc_one_map() */ /* See the comment in pte_alloc_one_map() */
if (unlikely(pmd_trans_unstable(fe->pmd))) if (unlikely(pmd_trans_unstable(vmf->pmd)))
return 0; return 0;
/* Use the zero-page for reads */ /* Use the zero-page for reads */
if (!(fe->flags & FAULT_FLAG_WRITE) && if (!(vmf->flags & FAULT_FLAG_WRITE) &&
!mm_forbids_zeropage(vma->vm_mm)) { !mm_forbids_zeropage(vma->vm_mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address), entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
vma->vm_page_prot)); vma->vm_page_prot));
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
&fe->ptl); vmf->address, &vmf->ptl);
if (!pte_none(*fe->pte)) if (!pte_none(*vmf->pte))
goto unlock; goto unlock;
/* Deliver the page fault to userland, check inside PT lock */ /* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) { if (userfaultfd_missing(vma)) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return handle_userfault(fe, VM_UFFD_MISSING); return handle_userfault(vmf, VM_UFFD_MISSING);
} }
goto setpte; goto setpte;
} }
...@@ -2785,7 +2787,7 @@ static int do_anonymous_page(struct fault_env *fe) ...@@ -2785,7 +2787,7 @@ static int do_anonymous_page(struct fault_env *fe)
/* Allocate our own private page. */ /* Allocate our own private page. */
if (unlikely(anon_vma_prepare(vma))) if (unlikely(anon_vma_prepare(vma)))
goto oom; goto oom;
page = alloc_zeroed_user_highpage_movable(vma, fe->address); page = alloc_zeroed_user_highpage_movable(vma, vmf->address);
if (!page) if (!page)
goto oom; goto oom;
...@@ -2803,30 +2805,30 @@ static int do_anonymous_page(struct fault_env *fe) ...@@ -2803,30 +2805,30 @@ static int do_anonymous_page(struct fault_env *fe)
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry)); entry = pte_mkwrite(pte_mkdirty(entry));
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&fe->ptl); &vmf->ptl);
if (!pte_none(*fe->pte)) if (!pte_none(*vmf->pte))
goto release; goto release;
/* Deliver the page fault to userland, check inside PT lock */ /* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) { if (userfaultfd_missing(vma)) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
mem_cgroup_cancel_charge(page, memcg, false); mem_cgroup_cancel_charge(page, memcg, false);
put_page(page); put_page(page);
return handle_userfault(fe, VM_UFFD_MISSING); return handle_userfault(vmf, VM_UFFD_MISSING);
} }
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, fe->address, false); page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false); mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma); lru_cache_add_active_or_unevictable(page, vma);
setpte: setpte:
set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
unlock: unlock:
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0; return 0;
release: release:
mem_cgroup_cancel_charge(page, memcg, false); mem_cgroup_cancel_charge(page, memcg, false);
...@@ -2843,62 +2845,62 @@ static int do_anonymous_page(struct fault_env *fe) ...@@ -2843,62 +2845,62 @@ static int do_anonymous_page(struct fault_env *fe)
* released depending on flags and vma->vm_ops->fault() return value. * released depending on flags and vma->vm_ops->fault() return value.
* See filemap_fault() and __lock_page_retry(). * See filemap_fault() and __lock_page_retry().
*/ */
static int __do_fault(struct fault_env *fe, pgoff_t pgoff, static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff,
struct page *cow_page, struct page **page, void **entry) struct page *cow_page, struct page **page, void **entry)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct vm_fault vmf; struct vm_fault vmf2;
int ret; int ret;
vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK); vmf2.virtual_address = (void __user *)(vmf->address & PAGE_MASK);
vmf.pgoff = pgoff; vmf2.pgoff = pgoff;
vmf.flags = fe->flags; vmf2.flags = vmf->flags;
vmf.page = NULL; vmf2.page = NULL;
vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf2.gfp_mask = __get_fault_gfp_mask(vma);
vmf.cow_page = cow_page; vmf2.cow_page = cow_page;
ret = vma->vm_ops->fault(vma, &vmf); ret = vma->vm_ops->fault(vma, &vmf2);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret; return ret;
if (ret & VM_FAULT_DAX_LOCKED) { if (ret & VM_FAULT_DAX_LOCKED) {
*entry = vmf.entry; *entry = vmf2.entry;
return ret; return ret;
} }
if (unlikely(PageHWPoison(vmf.page))) { if (unlikely(PageHWPoison(vmf2.page))) {
if (ret & VM_FAULT_LOCKED) if (ret & VM_FAULT_LOCKED)
unlock_page(vmf.page); unlock_page(vmf2.page);
put_page(vmf.page); put_page(vmf2.page);
return VM_FAULT_HWPOISON; return VM_FAULT_HWPOISON;
} }
if (unlikely(!(ret & VM_FAULT_LOCKED))) if (unlikely(!(ret & VM_FAULT_LOCKED)))
lock_page(vmf.page); lock_page(vmf2.page);
else else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); VM_BUG_ON_PAGE(!PageLocked(vmf2.page), vmf2.page);
*page = vmf.page; *page = vmf2.page;
return ret; return ret;
} }
static int pte_alloc_one_map(struct fault_env *fe) static int pte_alloc_one_map(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
if (!pmd_none(*fe->pmd)) if (!pmd_none(*vmf->pmd))
goto map_pte; goto map_pte;
if (fe->prealloc_pte) { if (vmf->prealloc_pte) {
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_none(*fe->pmd))) { if (unlikely(!pmd_none(*vmf->pmd))) {
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
goto map_pte; goto map_pte;
} }
atomic_long_inc(&vma->vm_mm->nr_ptes); atomic_long_inc(&vma->vm_mm->nr_ptes);
pmd_populate(vma->vm_mm, fe->pmd, fe->prealloc_pte); pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
fe->prealloc_pte = 0; vmf->prealloc_pte = 0;
} else if (unlikely(pte_alloc(vma->vm_mm, fe->pmd, fe->address))) { } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
map_pte: map_pte:
...@@ -2913,11 +2915,11 @@ static int pte_alloc_one_map(struct fault_env *fe) ...@@ -2913,11 +2915,11 @@ static int pte_alloc_one_map(struct fault_env *fe)
* through an atomic read in C, which is what pmd_trans_unstable() * through an atomic read in C, which is what pmd_trans_unstable()
* provides. * provides.
*/ */
if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&fe->ptl); &vmf->ptl);
return 0; return 0;
} }
...@@ -2935,24 +2937,24 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, ...@@ -2935,24 +2937,24 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
return true; return true;
} }
static void deposit_prealloc_pte(struct fault_env *fe) static void deposit_prealloc_pte(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
/* /*
* We are going to consume the prealloc table, * We are going to consume the prealloc table,
* count that as nr_ptes. * count that as nr_ptes.
*/ */
atomic_long_inc(&vma->vm_mm->nr_ptes); atomic_long_inc(&vma->vm_mm->nr_ptes);
fe->prealloc_pte = 0; vmf->prealloc_pte = 0;
} }
static int do_set_pmd(struct fault_env *fe, struct page *page) static int do_set_pmd(struct vm_fault *vmf, struct page *page)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
bool write = fe->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned long haddr = fe->address & HPAGE_PMD_MASK; unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
pmd_t entry; pmd_t entry;
int i, ret; int i, ret;
...@@ -2966,15 +2968,15 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ...@@ -2966,15 +2968,15 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
* Archs like ppc64 need additonal space to store information * Archs like ppc64 need additonal space to store information
* related to pte entry. Use the preallocated table for that. * related to pte entry. Use the preallocated table for that.
*/ */
if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
if (!fe->prealloc_pte) if (!vmf->prealloc_pte)
return VM_FAULT_OOM; return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */ smp_wmb(); /* See comment in __pte_alloc() */
} }
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_none(*fe->pmd))) if (unlikely(!pmd_none(*vmf->pmd)))
goto out; goto out;
for (i = 0; i < HPAGE_PMD_NR; i++) for (i = 0; i < HPAGE_PMD_NR; i++)
...@@ -2990,11 +2992,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ...@@ -2990,11 +2992,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
* deposit and withdraw with pmd lock held * deposit and withdraw with pmd lock held
*/ */
if (arch_needs_pgtable_deposit()) if (arch_needs_pgtable_deposit())
deposit_prealloc_pte(fe); deposit_prealloc_pte(vmf);
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
update_mmu_cache_pmd(vma, haddr, fe->pmd); update_mmu_cache_pmd(vma, haddr, vmf->pmd);
/* fault is handled */ /* fault is handled */
ret = 0; ret = 0;
...@@ -3005,13 +3007,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ...@@ -3005,13 +3007,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
* withdraw with pmd lock held. * withdraw with pmd lock held.
*/ */
if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
fe->pmd); vmf->pmd);
spin_unlock(fe->ptl); spin_unlock(vmf->ptl);
return ret; return ret;
} }
#else #else
static int do_set_pmd(struct fault_env *fe, struct page *page) static int do_set_pmd(struct vm_fault *vmf, struct page *page)
{ {
BUILD_BUG(); BUILD_BUG();
return 0; return 0;
...@@ -3022,41 +3024,42 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ...@@ -3022,41 +3024,42 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
* alloc_set_pte - setup new PTE entry for given page and add reverse page * alloc_set_pte - setup new PTE entry for given page and add reverse page
* mapping. If needed, the fucntion allocates page table or use pre-allocated. * mapping. If needed, the fucntion allocates page table or use pre-allocated.
* *
* @fe: fault environment * @vmf: fault environment
* @memcg: memcg to charge page (only for private mappings) * @memcg: memcg to charge page (only for private mappings)
* @page: page to map * @page: page to map
* *
* Caller must take care of unlocking fe->ptl, if fe->pte is non-NULL on return. * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
* return.
* *
* Target users are page handler itself and implementations of * Target users are page handler itself and implementations of
* vm_ops->map_pages. * vm_ops->map_pages.
*/ */
int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page) struct page *page)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
bool write = fe->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
pte_t entry; pte_t entry;
int ret; int ret;
if (pmd_none(*fe->pmd) && PageTransCompound(page) && if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
/* THP on COW? */ /* THP on COW? */
VM_BUG_ON_PAGE(memcg, page); VM_BUG_ON_PAGE(memcg, page);
ret = do_set_pmd(fe, page); ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK) if (ret != VM_FAULT_FALLBACK)
goto fault_handled; goto fault_handled;
} }
if (!fe->pte) { if (!vmf->pte) {
ret = pte_alloc_one_map(fe); ret = pte_alloc_one_map(vmf);
if (ret) if (ret)
goto fault_handled; goto fault_handled;
} }
/* Re-check under ptl */ /* Re-check under ptl */
if (unlikely(!pte_none(*fe->pte))) { if (unlikely(!pte_none(*vmf->pte))) {
ret = VM_FAULT_NOPAGE; ret = VM_FAULT_NOPAGE;
goto fault_handled; goto fault_handled;
} }
...@@ -3068,24 +3071,24 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, ...@@ -3068,24 +3071,24 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
/* copy-on-write page */ /* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) { if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, fe->address, false); page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false); mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma); lru_cache_add_active_or_unevictable(page, vma);
} else { } else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false); page_add_file_rmap(page, false);
} }
set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
/* no need to invalidate: a not-present page won't be cached */ /* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
ret = 0; ret = 0;
fault_handled: fault_handled:
/* preallocated pagetable is unused: free it */ /* preallocated pagetable is unused: free it */
if (fe->prealloc_pte) { if (vmf->prealloc_pte) {
pte_free(fe->vma->vm_mm, fe->prealloc_pte); pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
fe->prealloc_pte = 0; vmf->prealloc_pte = 0;
} }
return ret; return ret;
} }
...@@ -3154,17 +3157,17 @@ late_initcall(fault_around_debugfs); ...@@ -3154,17 +3157,17 @@ late_initcall(fault_around_debugfs);
* fault_around_pages() value (and therefore to page order). This way it's * fault_around_pages() value (and therefore to page order). This way it's
* easier to guarantee that we don't cross page table boundaries. * easier to guarantee that we don't cross page table boundaries.
*/ */
static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) static int do_fault_around(struct vm_fault *vmf, pgoff_t start_pgoff)
{ {
unsigned long address = fe->address, nr_pages, mask; unsigned long address = vmf->address, nr_pages, mask;
pgoff_t end_pgoff; pgoff_t end_pgoff;
int off, ret = 0; int off, ret = 0;
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
fe->address = max(address & mask, fe->vma->vm_start); vmf->address = max(address & mask, vmf->vma->vm_start);
off = ((address - fe->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
start_pgoff -= off; start_pgoff -= off;
/* /*
...@@ -3172,44 +3175,45 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) ...@@ -3172,44 +3175,45 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
* or fault_around_pages() from start_pgoff, depending what is nearest. * or fault_around_pages() from start_pgoff, depending what is nearest.
*/ */
end_pgoff = start_pgoff - end_pgoff = start_pgoff -
((fe->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1; PTRS_PER_PTE - 1;
end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1, end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1); start_pgoff + nr_pages - 1);
if (pmd_none(*fe->pmd)) { if (pmd_none(*vmf->pmd)) {
fe->prealloc_pte = pte_alloc_one(fe->vma->vm_mm, fe->address); vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
if (!fe->prealloc_pte) vmf->address);
if (!vmf->prealloc_pte)
goto out; goto out;
smp_wmb(); /* See comment in __pte_alloc() */ smp_wmb(); /* See comment in __pte_alloc() */
} }
fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
/* Huge page is mapped? Page fault is solved */ /* Huge page is mapped? Page fault is solved */
if (pmd_trans_huge(*fe->pmd)) { if (pmd_trans_huge(*vmf->pmd)) {
ret = VM_FAULT_NOPAGE; ret = VM_FAULT_NOPAGE;
goto out; goto out;
} }
/* ->map_pages() haven't done anything useful. Cold page cache? */ /* ->map_pages() haven't done anything useful. Cold page cache? */
if (!fe->pte) if (!vmf->pte)
goto out; goto out;
/* check if the page fault is solved */ /* check if the page fault is solved */
fe->pte -= (fe->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
if (!pte_none(*fe->pte)) if (!pte_none(*vmf->pte))
ret = VM_FAULT_NOPAGE; ret = VM_FAULT_NOPAGE;
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
out: out:
fe->address = address; vmf->address = address;
fe->pte = NULL; vmf->pte = NULL;
return ret; return ret;
} }
static int do_read_fault(struct fault_env *fe, pgoff_t pgoff) static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *fault_page; struct page *fault_page;
int ret = 0; int ret = 0;
...@@ -3219,27 +3223,27 @@ static int do_read_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3219,27 +3223,27 @@ static int do_read_fault(struct fault_env *fe, pgoff_t pgoff)
* something). * something).
*/ */
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
ret = do_fault_around(fe, pgoff); ret = do_fault_around(vmf, pgoff);
if (ret) if (ret)
return ret; return ret;
} }
ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret; return ret;
ret |= alloc_set_pte(fe, NULL, fault_page); ret |= alloc_set_pte(vmf, NULL, fault_page);
if (fe->pte) if (vmf->pte)
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
unlock_page(fault_page); unlock_page(fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
put_page(fault_page); put_page(fault_page);
return ret; return ret;
} }
static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *fault_page, *new_page; struct page *fault_page, *new_page;
void *fault_entry; void *fault_entry;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
...@@ -3248,7 +3252,7 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3248,7 +3252,7 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
if (unlikely(anon_vma_prepare(vma))) if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM; return VM_FAULT_OOM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address); new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
if (!new_page) if (!new_page)
return VM_FAULT_OOM; return VM_FAULT_OOM;
...@@ -3258,17 +3262,17 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3258,17 +3262,17 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry); ret = __do_fault(vmf, pgoff, new_page, &fault_page, &fault_entry);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out; goto uncharge_out;
if (!(ret & VM_FAULT_DAX_LOCKED)) if (!(ret & VM_FAULT_DAX_LOCKED))
copy_user_highpage(new_page, fault_page, fe->address, vma); copy_user_highpage(new_page, fault_page, vmf->address, vma);
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
ret |= alloc_set_pte(fe, memcg, new_page); ret |= alloc_set_pte(vmf, memcg, new_page);
if (fe->pte) if (vmf->pte)
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
if (!(ret & VM_FAULT_DAX_LOCKED)) { if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page); unlock_page(fault_page);
put_page(fault_page); put_page(fault_page);
...@@ -3284,15 +3288,15 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3284,15 +3288,15 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff)
return ret; return ret;
} }
static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *fault_page; struct page *fault_page;
struct address_space *mapping; struct address_space *mapping;
int dirtied = 0; int dirtied = 0;
int ret, tmp; int ret, tmp;
ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret; return ret;
...@@ -3302,7 +3306,7 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3302,7 +3306,7 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
*/ */
if (vma->vm_ops->page_mkwrite) { if (vma->vm_ops->page_mkwrite) {
unlock_page(fault_page); unlock_page(fault_page);
tmp = do_page_mkwrite(vma, fault_page, fe->address); tmp = do_page_mkwrite(vma, fault_page, vmf->address);
if (unlikely(!tmp || if (unlikely(!tmp ||
(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
put_page(fault_page); put_page(fault_page);
...@@ -3310,9 +3314,9 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3310,9 +3314,9 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
} }
} }
ret |= alloc_set_pte(fe, NULL, fault_page); ret |= alloc_set_pte(vmf, NULL, fault_page);
if (fe->pte) if (vmf->pte)
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
VM_FAULT_RETRY))) { VM_FAULT_RETRY))) {
unlock_page(fault_page); unlock_page(fault_page);
...@@ -3350,19 +3354,19 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) ...@@ -3350,19 +3354,19 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff)
* The mmap_sem may have been released depending on flags and our * The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry(). * return value. See filemap_fault() and __lock_page_or_retry().
*/ */
static int do_fault(struct fault_env *fe) static int do_fault(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
pgoff_t pgoff = linear_page_index(vma, fe->address); pgoff_t pgoff = linear_page_index(vma, vmf->address);
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
if (!vma->vm_ops->fault) if (!vma->vm_ops->fault)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
if (!(fe->flags & FAULT_FLAG_WRITE)) if (!(vmf->flags & FAULT_FLAG_WRITE))
return do_read_fault(fe, pgoff); return do_read_fault(vmf, pgoff);
if (!(vma->vm_flags & VM_SHARED)) if (!(vma->vm_flags & VM_SHARED))
return do_cow_fault(fe, pgoff); return do_cow_fault(vmf, pgoff);
return do_shared_fault(fe, pgoff); return do_shared_fault(vmf, pgoff);
} }
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
...@@ -3380,9 +3384,9 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, ...@@ -3380,9 +3384,9 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
return mpol_misplaced(page, vma, addr); return mpol_misplaced(page, vma, addr);
} }
static int do_numa_page(struct fault_env *fe, pte_t pte) static int do_numa_page(struct vm_fault *vmf, pte_t pte)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL; struct page *page = NULL;
int page_nid = -1; int page_nid = -1;
int last_cpupid; int last_cpupid;
...@@ -3400,10 +3404,10 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) ...@@ -3400,10 +3404,10 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
* page table entry is not accessible, so there would be no * page table entry is not accessible, so there would be no
* concurrent hardware modifications to the PTE. * concurrent hardware modifications to the PTE.
*/ */
fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd); vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
spin_lock(fe->ptl); spin_lock(vmf->ptl);
if (unlikely(!pte_same(*fe->pte, pte))) { if (unlikely(!pte_same(*vmf->pte, pte))) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out; goto out;
} }
...@@ -3412,18 +3416,18 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) ...@@ -3412,18 +3416,18 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
pte = pte_mkyoung(pte); pte = pte_mkyoung(pte);
if (was_writable) if (was_writable)
pte = pte_mkwrite(pte); pte = pte_mkwrite(pte);
set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
update_mmu_cache(vma, fe->address, fe->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
page = vm_normal_page(vma, fe->address, pte); page = vm_normal_page(vma, vmf->address, pte);
if (!page) { if (!page) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0; return 0;
} }
/* TODO: handle PTE-mapped THP */ /* TODO: handle PTE-mapped THP */
if (PageCompound(page)) { if (PageCompound(page)) {
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0; return 0;
} }
...@@ -3447,9 +3451,9 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) ...@@ -3447,9 +3451,9 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
last_cpupid = page_cpupid_last(page); last_cpupid = page_cpupid_last(page);
page_nid = page_to_nid(page); page_nid = page_to_nid(page);
target_nid = numa_migrate_prep(page, vma, fe->address, page_nid, target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
&flags); &flags);
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
if (target_nid == -1) { if (target_nid == -1) {
put_page(page); put_page(page);
goto out; goto out;
...@@ -3469,28 +3473,28 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) ...@@ -3469,28 +3473,28 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
return 0; return 0;
} }
static int create_huge_pmd(struct fault_env *fe) static int create_huge_pmd(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = fe->vma; struct vm_area_struct *vma = vmf->vma;
if (vma_is_anonymous(vma)) if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(fe); return do_huge_pmd_anonymous_page(vmf);
if (vma->vm_ops->pmd_fault) if (vma->vm_ops->pmd_fault)
return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd, return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd,
fe->flags); vmf->flags);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
{ {
if (vma_is_anonymous(fe->vma)) if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(fe, orig_pmd); return do_huge_pmd_wp_page(vmf, orig_pmd);
if (fe->vma->vm_ops->pmd_fault) if (vmf->vma->vm_ops->pmd_fault)
return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd, return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address,
fe->flags); vmf->pmd, vmf->flags);
/* COW handled on pte level: split pmd */ /* COW handled on pte level: split pmd */
VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
__split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL); __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
...@@ -3515,21 +3519,21 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) ...@@ -3515,21 +3519,21 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
* The mmap_sem may have been released depending on flags and our return value. * The mmap_sem may have been released depending on flags and our return value.
* See filemap_fault() and __lock_page_or_retry(). * See filemap_fault() and __lock_page_or_retry().
*/ */
static int handle_pte_fault(struct fault_env *fe) static int handle_pte_fault(struct vm_fault *vmf)
{ {
pte_t entry; pte_t entry;
if (unlikely(pmd_none(*fe->pmd))) { if (unlikely(pmd_none(*vmf->pmd))) {
/* /*
* Leave __pte_alloc() until later: because vm_ops->fault may * Leave __pte_alloc() until later: because vm_ops->fault may
* want to allocate huge page, and if we expose page table * want to allocate huge page, and if we expose page table
* for an instant, it will be difficult to retract from * for an instant, it will be difficult to retract from
* concurrent faults and from rmap lookups. * concurrent faults and from rmap lookups.
*/ */
fe->pte = NULL; vmf->pte = NULL;
} else { } else {
/* See comment in pte_alloc_one_map() */ /* See comment in pte_alloc_one_map() */
if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
return 0; return 0;
/* /*
* A regular pmd is established and it can't morph into a huge * A regular pmd is established and it can't morph into a huge
...@@ -3537,9 +3541,9 @@ static int handle_pte_fault(struct fault_env *fe) ...@@ -3537,9 +3541,9 @@ static int handle_pte_fault(struct fault_env *fe)
* mmap_sem read mode and khugepaged takes it in write mode. * mmap_sem read mode and khugepaged takes it in write mode.
* So now it's safe to run pte_offset_map(). * So now it's safe to run pte_offset_map().
*/ */
fe->pte = pte_offset_map(fe->pmd, fe->address); vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
entry = *fe->pte; entry = *vmf->pte;
/* /*
* some architectures can have larger ptes than wordsize, * some architectures can have larger ptes than wordsize,
...@@ -3551,37 +3555,37 @@ static int handle_pte_fault(struct fault_env *fe) ...@@ -3551,37 +3555,37 @@ static int handle_pte_fault(struct fault_env *fe)
*/ */
barrier(); barrier();
if (pte_none(entry)) { if (pte_none(entry)) {
pte_unmap(fe->pte); pte_unmap(vmf->pte);
fe->pte = NULL; vmf->pte = NULL;
} }
} }
if (!fe->pte) { if (!vmf->pte) {
if (vma_is_anonymous(fe->vma)) if (vma_is_anonymous(vmf->vma))
return do_anonymous_page(fe); return do_anonymous_page(vmf);
else else
return do_fault(fe); return do_fault(vmf);
} }
if (!pte_present(entry)) if (!pte_present(entry))
return do_swap_page(fe, entry); return do_swap_page(vmf, entry);
if (pte_protnone(entry) && vma_is_accessible(fe->vma)) if (pte_protnone(entry) && vma_is_accessible(vmf->vma))
return do_numa_page(fe, entry); return do_numa_page(vmf, entry);
fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd); vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
spin_lock(fe->ptl); spin_lock(vmf->ptl);
if (unlikely(!pte_same(*fe->pte, entry))) if (unlikely(!pte_same(*vmf->pte, entry)))
goto unlock; goto unlock;
if (fe->flags & FAULT_FLAG_WRITE) { if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry)) if (!pte_write(entry))
return do_wp_page(fe, entry); return do_wp_page(vmf, entry);
entry = pte_mkdirty(entry); entry = pte_mkdirty(entry);
} }
entry = pte_mkyoung(entry); entry = pte_mkyoung(entry);
if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry, if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
fe->flags & FAULT_FLAG_WRITE)) { vmf->flags & FAULT_FLAG_WRITE)) {
update_mmu_cache(fe->vma, fe->address, fe->pte); update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
} else { } else {
/* /*
* This is needed only for protection faults but the arch code * This is needed only for protection faults but the arch code
...@@ -3589,11 +3593,11 @@ static int handle_pte_fault(struct fault_env *fe) ...@@ -3589,11 +3593,11 @@ static int handle_pte_fault(struct fault_env *fe)
* This still avoids useless tlb flushes for .text page faults * This still avoids useless tlb flushes for .text page faults
* with threads. * with threads.
*/ */
if (fe->flags & FAULT_FLAG_WRITE) if (vmf->flags & FAULT_FLAG_WRITE)
flush_tlb_fix_spurious_fault(fe->vma, fe->address); flush_tlb_fix_spurious_fault(vmf->vma, vmf->address);
} }
unlock: unlock:
pte_unmap_unlock(fe->pte, fe->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0; return 0;
} }
...@@ -3606,7 +3610,7 @@ static int handle_pte_fault(struct fault_env *fe) ...@@ -3606,7 +3610,7 @@ static int handle_pte_fault(struct fault_env *fe)
static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
unsigned int flags) unsigned int flags)
{ {
struct fault_env fe = { struct vm_fault vmf = {
.vma = vma, .vma = vma,
.address = address, .address = address,
.flags = flags, .flags = flags,
...@@ -3619,35 +3623,35 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -3619,35 +3623,35 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
pud = pud_alloc(mm, pgd, address); pud = pud_alloc(mm, pgd, address);
if (!pud) if (!pud)
return VM_FAULT_OOM; return VM_FAULT_OOM;
fe.pmd = pmd_alloc(mm, pud, address); vmf.pmd = pmd_alloc(mm, pud, address);
if (!fe.pmd) if (!vmf.pmd)
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) { if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
int ret = create_huge_pmd(&fe); int ret = create_huge_pmd(&vmf);
if (!(ret & VM_FAULT_FALLBACK)) if (!(ret & VM_FAULT_FALLBACK))
return ret; return ret;
} else { } else {
pmd_t orig_pmd = *fe.pmd; pmd_t orig_pmd = *vmf.pmd;
int ret; int ret;
barrier(); barrier();
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&fe, orig_pmd); return do_huge_pmd_numa_page(&vmf, orig_pmd);
if ((fe.flags & FAULT_FLAG_WRITE) && if ((vmf.flags & FAULT_FLAG_WRITE) &&
!pmd_write(orig_pmd)) { !pmd_write(orig_pmd)) {
ret = wp_huge_pmd(&fe, orig_pmd); ret = wp_huge_pmd(&vmf, orig_pmd);
if (!(ret & VM_FAULT_FALLBACK)) if (!(ret & VM_FAULT_FALLBACK))
return ret; return ret;
} else { } else {
huge_pmd_set_accessed(&fe, orig_pmd); huge_pmd_set_accessed(&vmf, orig_pmd);
return 0; return 0;
} }
} }
} }
return handle_pte_fault(&fe); return handle_pte_fault(&vmf);
} }
/* /*
......
...@@ -1801,7 +1801,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1801,7 +1801,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
} }
EXPORT_SYMBOL(filemap_fault); EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct fault_env *fe, void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff) pgoff_t start_pgoff, pgoff_t end_pgoff)
{ {
BUG(); BUG();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment