Commit 6b76c3ae authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/faultaround' into for-next/core

Initialise prefaulted PTEs as 'old' for arm64 when hardware access-flag
updates are supported, which drastically improves vmscan performance.

* for-next/faultaround:
  mm: filemap: Fix microblaze build failure with 'mmu_defconfig'
  mm/nommu: Fix return type of filemap_map_pages()
  mm: Mark anonymous struct field of 'struct vm_fault' as 'const'
  mm: Use static initialisers for immutable fields of 'struct vm_fault'
  mm: Avoid modifying vmf.address in __collapse_huge_page_swapin()
  mm: Pass 'address' to map to do_set_pte() and drop FAULT_FLAG_PREFAULT
  mm: Move immutable fields of 'struct vm_fault' into anonymous struct
  arm64: mm: Implement arch_wants_old_prefaulted_pte()
  mm: Allow architectures to request 'old' entries when prefaulting
  mm: Cleanup faultaround and finish_fault() codepaths
parents 90eb8c9d de591a82
......@@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void)
return !cpu_has_hw_af();
}
#define arch_faults_on_old_pte arch_faults_on_old_pte
#define arch_faults_on_old_pte arch_faults_on_old_pte
/*
* Experimentally, it's cheap to set the access flag in hardware and we
* benefit from prefaulting mappings as 'old' to start with.
*/
static inline bool arch_wants_old_prefaulted_pte(void)
{
return !arch_faults_on_old_pte();
}
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
#endif /* !__ASSEMBLY__ */
......
......@@ -1319,17 +1319,19 @@ xfs_filemap_pfn_mkwrite(
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
static void
static vm_fault_t
xfs_filemap_map_pages(
struct vm_fault *vmf,
pgoff_t start_pgoff,
pgoff_t end_pgoff)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
filemap_map_pages(vmf, start_pgoff, end_pgoff);
ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
}
static const struct vm_operations_struct xfs_file_vm_ops = {
......
......@@ -514,11 +514,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
* pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
struct vm_area_struct *vma; /* Target VMA */
unsigned int flags; /* FAULT_FLAG_xxx flags */
gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
unsigned long address; /* Faulting virtual address */
const struct {
struct vm_area_struct *vma; /* Target VMA */
gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
unsigned long address; /* Faulting virtual address */
};
unsigned int flags; /* FAULT_FLAG_xxx flags
* XXX: should really be 'const' */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
pud_t *pud; /* Pointer to pud entry matching
......@@ -542,8 +545,8 @@ struct vm_fault {
* is not NULL, otherwise pmd.
*/
pgtable_t prealloc_pte; /* Pre-allocated pte page table.
* vm_ops->map_pages() calls
* alloc_set_pte() from atomic context.
* vm_ops->map_pages() sets up a page
* table from atomic context.
* do_fault_around() pre-allocates
* page table to avoid allocation from
* atomic context.
......@@ -578,7 +581,7 @@ struct vm_operations_struct {
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
void (*map_pages)(struct vm_fault *vmf,
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
unsigned long (*pagesize)(struct vm_area_struct * area);
......@@ -988,7 +991,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
......@@ -2622,7 +2627,7 @@ extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern void filemap_map_pages(struct vm_fault *vmf,
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
......
......@@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}
/*
* the ordering of these checks is important for pmds with _page_devmap set.
* if we check pmd_trans_unstable() first we will trip the bad_pmd() check
* inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
*/
static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
{
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}
#ifndef CONFIG_NUMA_BALANCING
/*
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
......
......@@ -42,6 +42,8 @@
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
......@@ -2911,74 +2913,163 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
{
struct file *file = vmf->vma->vm_file;
struct mm_struct *mm = vmf->vma->vm_mm;
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*vmf->pmd)) {
unlock_page(page);
put_page(page);
return true;
}
if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
vm_fault_t ret = do_set_pmd(vmf, page);
if (!ret) {
/* The page is mapped successfully, reference consumed. */
unlock_page(page);
return true;
}
}
if (pmd_none(*vmf->pmd)) {
vmf->ptl = pmd_lock(mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(mm);
pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
}
/* See comment in handle_pte_fault() */
if (pmd_devmap_trans_unstable(vmf->pmd)) {
unlock_page(page);
put_page(page);
return true;
}
return false;
}
static struct page *next_uptodate_page(struct page *page,
struct address_space *mapping,
struct xa_state *xas, pgoff_t end_pgoff)
{
unsigned long max_idx;
do {
if (!page)
return NULL;
if (xas_retry(xas, page))
continue;
if (xa_is_value(page))
continue;
if (PageLocked(page))
continue;
if (!page_cache_get_speculative(page))
continue;
/* Has the page moved or been split? */
if (unlikely(page != xas_reload(xas)))
goto skip;
if (!PageUptodate(page) || PageReadahead(page))
goto skip;
if (PageHWPoison(page))
goto skip;
if (!trylock_page(page))
goto skip;
if (page->mapping != mapping)
goto unlock;
if (!PageUptodate(page))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas->xa_index >= max_idx)
goto unlock;
return page;
unlock:
unlock_page(page);
skip:
put_page(page);
} while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
return NULL;
}
static inline struct page *first_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
return next_uptodate_page(xas_find(xas, end_pgoff),
mapping, xas, end_pgoff);
}
static inline struct page *next_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
return next_uptodate_page(xas_next_entry(xas, end_pgoff),
mapping, xas, end_pgoff);
}
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct vm_area_struct *vma = vmf->vma;
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
vm_fault_t ret = 0;
rcu_read_lock();
xas_for_each(&xas, head, end_pgoff) {
if (xas_retry(&xas, head))
continue;
if (xa_is_value(head))
goto next;
head = first_map_page(mapping, &xas, end_pgoff);
if (!head)
goto out;
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
if (PageLocked(head))
goto next;
if (!page_cache_get_speculative(head))
goto next;
if (filemap_map_pmd(vmf, head)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
/* Has the page moved or been split? */
if (unlikely(head != xas_reload(&xas)))
goto skip;
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
do {
page = find_subpage(head, xas.xa_index);
if (!PageUptodate(head) ||
PageReadahead(page) ||
PageHWPoison(page))
goto skip;
if (!trylock_page(head))
goto skip;
if (head->mapping != mapping || !PageUptodate(head))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas.xa_index >= max_idx)
if (PageHWPoison(page))
goto unlock;
if (mmap_miss > 0)
mmap_miss--;
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
if (vmf->pte)
vmf->pte += xas.xa_index - last_pgoff;
addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
vmf->pte += xas.xa_index - last_pgoff;
last_pgoff = xas.xa_index;
if (alloc_set_pte(vmf, page))
if (!pte_none(*vmf->pte))
goto unlock;
/* We're about to handle the fault */
if (vmf->address == addr)
ret = VM_FAULT_NOPAGE;
do_set_pte(vmf, page, addr);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, addr, vmf->pte);
unlock_page(head);
goto next;
continue;
unlock:
unlock_page(head);
skip:
put_page(head);
next:
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*vmf->pmd))
break;
}
} while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
return ret;
}
EXPORT_SYMBOL(filemap_map_pages);
......
......@@ -991,38 +991,41 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
unsigned long haddr, pmd_t *pmd,
int referenced)
{
int swapped_in = 0;
vm_fault_t ret = 0;
struct vm_fault vmf = {
.vma = vma,
.address = address,
.flags = FAULT_FLAG_ALLOW_RETRY,
.pmd = pmd,
.pgoff = linear_page_index(vma, address),
};
vmf.pte = pte_offset_map(pmd, address);
for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
vmf.pte++, vmf.address += PAGE_SIZE) {
unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
for (address = haddr; address < end; address += PAGE_SIZE) {
struct vm_fault vmf = {
.vma = vma,
.address = address,
.pgoff = linear_page_index(vma, haddr),
.flags = FAULT_FLAG_ALLOW_RETRY,
.pmd = pmd,
};
vmf.pte = pte_offset_map(pmd, address);
vmf.orig_pte = *vmf.pte;
if (!is_swap_pte(vmf.orig_pte))
if (!is_swap_pte(vmf.orig_pte)) {
pte_unmap(vmf.pte);
continue;
}
swapped_in++;
ret = do_swap_page(&vmf);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
if (ret & VM_FAULT_RETRY) {
mmap_read_lock(mm);
if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
if (hugepage_vma_revalidate(mm, haddr, &vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
/* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd) {
if (mm_find_pmd(mm, haddr) != pmd) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
......@@ -1031,11 +1034,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
/* pte is unmapped now, we need to map it */
vmf.pte = pte_offset_map(pmd, vmf.address);
}
vmf.pte--;
pte_unmap(vmf.pte);
/* Drain LRU add pagevec to remove extra pin on the swapped in pages */
if (swapped_in)
......
This diff is collapsed.
......@@ -1668,10 +1668,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_fault *vmf,
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
BUG();
return 0;
}
EXPORT_SYMBOL(filemap_map_pages);
......
......@@ -1520,11 +1520,11 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
{
struct vm_area_struct pvma;
struct page *page;
struct vm_fault vmf;
struct vm_fault vmf = {
.vma = &pvma,
};
shmem_pseudo_vma_init(&pvma, info, index);
vmf.vma = &pvma;
vmf.address = 0;
page = swap_cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);
......
......@@ -1951,8 +1951,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
si = swap_info[type];
pte = pte_offset_map(pmd, addr);
do {
struct vm_fault vmf;
if (!is_swap_pte(*pte))
continue;
......@@ -1968,9 +1966,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
swap_map = &si->swap_map[offset];
page = lookup_swap_cache(entry, vma, addr);
if (!page) {
vmf.vma = vma;
vmf.address = addr;
vmf.pmd = pmd;
struct vm_fault vmf = {
.vma = vma,
.address = addr,
.pmd = pmd,
};
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
&vmf);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment