Commit 6f4576e3 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds

mempolicy: apply page table walker on queue_pages_range()

queue_pages_range() does page table walking in its own way now, but there
is some code duplicate.  This patch applies page table walker to reduce
lines of code.

queue_pages_range() has to do some precheck to determine whether we really
walk over the vma or just skip it.  Now we have test_walk() callback in
mm_walk for this purpose, so we can do this replacement cleanly.
queue_pages_test_walk() depends on not only the current vma but also the
previous one, so queue_pages->prev is introduced to remember it.
Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1757bbd9
...@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { ...@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
static void migrate_page_add(struct page *page, struct list_head *pagelist, static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags); unsigned long flags);
struct queue_pages {
struct list_head *pagelist;
unsigned long flags;
nodemask_t *nmask;
struct vm_area_struct *prev;
};
/* /*
* Scan through pages checking if pages follow certain conditions, * Scan through pages checking if pages follow certain conditions,
* and move them to the pagelist if they do. * and move them to the pagelist if they do.
*/ */
static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long addr, unsigned long end, unsigned long end, struct mm_walk *walk)
const nodemask_t *nodes, unsigned long flags,
void *private)
{ {
pte_t *orig_pte; struct vm_area_struct *vma = walk->vma;
struct page *page;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
int nid;
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); split_huge_page_pmd(vma, addr, pmd);
do { if (pmd_trans_unstable(pmd))
struct page *page; return 0;
int nid;
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
...@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (PageReserved(page)) if (PageReserved(page))
continue; continue;
nid = page_to_nid(page); nid = page_to_nid(page);
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
continue; continue;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
migrate_page_add(page, private, flags); migrate_page_add(page, qp->pagelist, flags);
else }
break; pte_unmap_unlock(pte - 1, ptl);
} while (pte++, addr += PAGE_SIZE, addr != end); cond_resched();
pte_unmap_unlock(orig_pte, ptl); return 0;
return addr != end;
} }
static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
pmd_t *pmd, const nodemask_t *nodes, unsigned long flags, unsigned long addr, unsigned long end,
void *private) struct mm_walk *walk)
{ {
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
int nid; int nid;
struct page *page; struct page *page;
spinlock_t *ptl; spinlock_t *ptl;
pte_t entry; pte_t entry;
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
entry = huge_ptep_get((pte_t *)pmd); entry = huge_ptep_get(pte);
if (!pte_present(entry)) if (!pte_present(entry))
goto unlock; goto unlock;
page = pte_page(entry); page = pte_page(entry);
nid = page_to_nid(page); nid = page_to_nid(page);
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
goto unlock; goto unlock;
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) || if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
isolate_huge_page(page, private); isolate_huge_page(page, qp->pagelist);
unlock: unlock:
spin_unlock(ptl); spin_unlock(ptl);
#else #else
BUG(); BUG();
#endif #endif
}
static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
const nodemask_t *nodes, unsigned long flags,
void *private)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd))
continue;
if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
flags, private);
continue;
}
split_huge_page_pmd(vma, addr, pmd);
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
continue;
if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
flags, private))
return -EIO;
} while (pmd++, addr = next, addr != end);
return 0;
}
static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
const nodemask_t *nodes, unsigned long flags,
void *private)
{
pud_t *pud;
unsigned long next;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
continue;
if (pud_none_or_clear_bad(pud))
continue;
if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
flags, private))
return -EIO;
} while (pud++, addr = next, addr != end);
return 0;
}
static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
const nodemask_t *nodes, unsigned long flags,
void *private)
{
pgd_t *pgd;
unsigned long next;
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
flags, private))
return -EIO;
} while (pgd++, addr = next, addr != end);
return 0; return 0;
} }
...@@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, ...@@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
} }
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
static int queue_pages_test_walk(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
struct queue_pages *qp = walk->private;
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
if (endvma > end)
endvma = end;
if (vma->vm_start > start)
start = vma->vm_start;
if (!(flags & MPOL_MF_DISCONTIG_OK)) {
if (!vma->vm_next && vma->vm_end < end)
return -EFAULT;
if (qp->prev && qp->prev->vm_end < vma->vm_start)
return -EFAULT;
}
qp->prev = vma;
if (vma->vm_flags & VM_PFNMAP)
return 1;
if (flags & MPOL_MF_LAZY) {
/* Similar to task_numa_work, skip inaccessible VMAs */
if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
change_prot_numa(vma, start, endvma);
return 1;
}
if ((flags & MPOL_MF_STRICT) ||
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
vma_migratable(vma)))
/* queue pages from current vma */
return 0;
return 1;
}
/* /*
* Walk through page tables and collect pages to be migrated. * Walk through page tables and collect pages to be migrated.
* *
...@@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, ...@@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
*/ */
static int static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
const nodemask_t *nodes, unsigned long flags, void *private) nodemask_t *nodes, unsigned long flags,
{ struct list_head *pagelist)
int err = 0; {
struct vm_area_struct *vma, *prev; struct queue_pages qp = {
.pagelist = pagelist,
vma = find_vma(mm, start); .flags = flags,
if (!vma) .nmask = nodes,
return -EFAULT; .prev = NULL,
prev = NULL; };
for (; vma && vma->vm_start < end; vma = vma->vm_next) { struct mm_walk queue_pages_walk = {
unsigned long endvma = vma->vm_end; .hugetlb_entry = queue_pages_hugetlb,
.pmd_entry = queue_pages_pte_range,
if (endvma > end) .test_walk = queue_pages_test_walk,
endvma = end; .mm = mm,
if (vma->vm_start > start) .private = &qp,
start = vma->vm_start; };
if (!(flags & MPOL_MF_DISCONTIG_OK)) { return walk_page_range(start, end, &queue_pages_walk);
if (!vma->vm_next && vma->vm_end < end)
return -EFAULT;
if (prev && prev->vm_end < vma->vm_start)
return -EFAULT;
}
if (flags & MPOL_MF_LAZY) {
/* Similar to task_numa_work, skip inaccessible VMAs */
if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
change_prot_numa(vma, start, endvma);
goto next;
}
if ((flags & MPOL_MF_STRICT) ||
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
vma_migratable(vma))) {
err = queue_pages_pgd_range(vma, start, endvma, nodes,
flags, private);
if (err)
break;
}
next:
prev = vma;
}
return err;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment