Commit 60ab3244 authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

thp: khugepaged: make khugepaged aware about madvise

MADV_HUGEPAGE and MADV_NOHUGEPAGE were fully effective only if run after
mmap and before touching the memory.  While this is enough for most
usages, it's little effort to make madvise more dynamic at runtime on an
existing mapping by making khugepaged aware about madvise.

MADV_HUGEPAGE: register in khugepaged immediately without waiting a page
fault (that may not ever happen if all pages are already mapped and the
"enabled" knob was set to madvise during the initial page faults).

MADV_NOHUGEPAGE: skip vmas marked VM_NOHUGEPAGE in khugepaged to stop
collapsing pages where not needed.

[akpm@linux-foundation.org: tweak comment]
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a664b2d8
...@@ -105,7 +105,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); ...@@ -105,7 +105,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER #if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator" #error "hugepages can't be allocated by the buddy allocator"
#endif #endif
extern int hugepage_madvise(unsigned long *vm_flags, int advice); extern int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end, unsigned long end,
...@@ -143,7 +144,8 @@ static inline int split_huge_page(struct page *page) ...@@ -143,7 +144,8 @@ static inline int split_huge_page(struct page *page)
do { } while (0) do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \ #define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0) do { } while (0)
static inline int hugepage_madvise(unsigned long *vm_flags, int advice) static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{ {
BUG(); BUG();
return 0; return 0;
......
...@@ -1389,7 +1389,8 @@ int split_huge_page(struct page *page) ...@@ -1389,7 +1389,8 @@ int split_huge_page(struct page *page)
return ret; return ret;
} }
int hugepage_madvise(unsigned long *vm_flags, int advice) int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{ {
switch (advice) { switch (advice) {
case MADV_HUGEPAGE: case MADV_HUGEPAGE:
...@@ -1404,6 +1405,13 @@ int hugepage_madvise(unsigned long *vm_flags, int advice) ...@@ -1404,6 +1405,13 @@ int hugepage_madvise(unsigned long *vm_flags, int advice)
return -EINVAL; return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE; *vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE; *vm_flags |= VM_HUGEPAGE;
/*
* If the vma become good for khugepaged to scan,
* register it here without waiting a page fault that
* may not happen any time soon.
*/
if (unlikely(khugepaged_enter_vma_merge(vma)))
return -ENOMEM;
break; break;
case MADV_NOHUGEPAGE: case MADV_NOHUGEPAGE:
/* /*
...@@ -1417,6 +1425,11 @@ int hugepage_madvise(unsigned long *vm_flags, int advice) ...@@ -1417,6 +1425,11 @@ int hugepage_madvise(unsigned long *vm_flags, int advice)
return -EINVAL; return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE; *vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE; *vm_flags |= VM_NOHUGEPAGE;
/*
* Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
* this vma even if we leave the mm registered in khugepaged if
* it got registered before VM_NOHUGEPAGE was set.
*/
break; break;
} }
...@@ -1784,7 +1797,8 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -1784,7 +1797,8 @@ static void collapse_huge_page(struct mm_struct *mm,
if (address < hstart || address + HPAGE_PMD_SIZE > hend) if (address < hstart || address + HPAGE_PMD_SIZE > hend)
goto out; goto out;
if (!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
(vma->vm_flags & VM_NOHUGEPAGE))
goto out; goto out;
/* VM_PFNMAP vmas may have vm_ops null but vm_file set */ /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
...@@ -2007,8 +2021,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, ...@@ -2007,8 +2021,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
break; break;
} }
if (!(vma->vm_flags & VM_HUGEPAGE) && if ((!(vma->vm_flags & VM_HUGEPAGE) &&
!khugepaged_always()) { !khugepaged_always()) ||
(vma->vm_flags & VM_NOHUGEPAGE)) {
progress++; progress++;
continue; continue;
} }
......
...@@ -73,7 +73,7 @@ static long madvise_behavior(struct vm_area_struct * vma, ...@@ -73,7 +73,7 @@ static long madvise_behavior(struct vm_area_struct * vma,
break; break;
case MADV_HUGEPAGE: case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE: case MADV_NOHUGEPAGE:
error = hugepage_madvise(&new_flags, behavior); error = hugepage_madvise(vma, &new_flags, behavior);
if (error) if (error)
goto out; goto out;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment