Commit 4daae3b4 authored by Mel Gorman's avatar Mel Gorman

mm: mempolicy: Use _PAGE_NUMA to migrate pages

Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
	sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.
Based-on-work-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
parent 149c33e1
...@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page) ...@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page)
return page; return page;
} }
extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t pmd, pmd_t *pmdp); unsigned long addr, pmd_t pmd, pmd_t *pmdp);
#else /* CONFIG_TRANSPARENT_HUGEPAGE */ #else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
...@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, ...@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
return 0; return 0;
} }
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t pmd, pmd_t *pmdp) unsigned long addr, pmd_t pmd, pmd_t *pmdp)
{ {
return 0;
} }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/migrate.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include "internal.h" #include "internal.h"
...@@ -1019,17 +1020,39 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, ...@@ -1019,17 +1020,39 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
} }
/* NUMA hinting page fault entry point for trans huge pmds */ /* NUMA hinting page fault entry point for trans huge pmds */
int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t pmd, pmd_t *pmdp) unsigned long addr, pmd_t pmd, pmd_t *pmdp)
{ {
struct page *page; struct page *page = NULL;
unsigned long haddr = addr & HPAGE_PMD_MASK; unsigned long haddr = addr & HPAGE_PMD_MASK;
int target_nid;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock; goto out_unlock;
page = pmd_page(pmd); page = pmd_page(pmd);
get_page(page);
spin_unlock(&mm->page_table_lock);
target_nid = mpol_misplaced(page, vma, haddr);
if (target_nid == -1)
goto clear_pmdnuma;
/*
* Due to lacking code to migrate thp pages, we'll split
* (which preserves the special PROT_NONE) and re-take the
* fault on the normal pages.
*/
split_huge_page(page);
put_page(page);
return 0;
clear_pmdnuma:
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
pmd = pmd_mknonnuma(pmd); pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd); set_pmd_at(mm, haddr, pmdp, pmd);
VM_BUG_ON(pmd_numa(*pmdp)); VM_BUG_ON(pmd_numa(*pmdp));
...@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, ...@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (page)
put_page(page);
return 0; return 0;
} }
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/migrate.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
{ {
struct page *page; struct page *page = NULL;
spinlock_t *ptl; spinlock_t *ptl;
int current_nid, target_nid;
/* /*
* The "pte" at this point cannot be used safely without * The "pte" at this point cannot be used safely without
...@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
ptl = pte_lockptr(mm, pmd); ptl = pte_lockptr(mm, pmd);
spin_lock(ptl); spin_lock(ptl);
if (unlikely(!pte_same(*ptep, pte))) if (unlikely(!pte_same(*ptep, pte))) {
goto out_unlock; pte_unmap_unlock(ptep, ptl);
goto out;
}
pte = pte_mknonnuma(pte); pte = pte_mknonnuma(pte);
set_pte_at(mm, addr, ptep, pte); set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep); update_mmu_cache(vma, addr, ptep);
...@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
return 0; return 0;
} }
out_unlock: get_page(page);
current_nid = page_to_nid(page);
target_nid = mpol_misplaced(page, vma, addr);
pte_unmap_unlock(ptep, ptl); pte_unmap_unlock(ptep, ptl);
if (target_nid == -1) {
/*
* Account for the fault against the current node if it not
* being replaced regardless of where the page is located.
*/
current_nid = numa_node_id();
put_page(page);
goto out;
}
/* Migrate to the requested node */
if (migrate_misplaced_page(page, target_nid))
current_nid = target_nid;
out:
return 0; return 0;
} }
...@@ -3655,7 +3677,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3655,7 +3677,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
barrier(); barrier();
if (pmd_trans_huge(orig_pmd)) { if (pmd_trans_huge(orig_pmd)) {
if (pmd_numa(*pmd)) if (pmd_numa(*pmd))
return do_huge_pmd_numa_page(mm, address, return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd); orig_pmd, pmd);
if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment