Commit 1e953d84 authored by Mike Kravetz's avatar Mike Kravetz Committed by David S. Miller

sparc64 mm: Fix more TSB sizing issues

Commit af1b1a9b ("sparc64 mm: Fix base TSB sizing when hugetlb
pages are used") addressed the difference between hugetlb and THP
pages when computing TSB sizes.  The following additional issues
were also discovered while working with the code.

In order to save memory, THP makes use of a huge zero page.  This huge
zero page does not count against a task's RSS, but it does consume TSB
entries.  This is similar to hugetlb pages.  Therefore, count huge
zero page entries in hugetlb_pte_count.

Accounting of THP pages is done in the routine set_pmd_at().
Unfortunately, this does not catch the case where a THP page is split.
To handle this case, decrement the count in pmdp_invalidate().
pmdp_invalidate is only called when splitting a THP.  However, 'sanity
checks' are added in case it is ever called for other purposes.

A more general issue exists with HPAGE_SIZE accounting.
hugetlb_pte_count tracks the number of HPAGE_SIZE (8M) pages.  This
value is used to size the TSB for HPAGE_SIZE pages.  However,
each HPAGE_SIZE page consists of two REAL_HPAGE_SIZE (4M) pages.
The TSB contains an entry for each REAL_HPAGE_SIZE page.  Therefore,
the number of REAL_HPAGE_SIZE pages should be used to size the huge
page TSB.  A new compile time constant REAL_HPAGE_PER_HPAGE is used
to multiply hugetlb_pte_count before sizing the TSB.

Changes from V1
- Fixed build issue if hugetlb or THP not configured
Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bdf2f59e
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
#endif #endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -484,6 +484,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) ...@@ -484,6 +484,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
tsb_grow(mm, MM_TSB_BASE, mm_rss); tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
mm_rss *= REAL_HPAGE_PER_HPAGE;
if (unlikely(mm_rss > if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
......
...@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return; return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
if (pmd_val(pmd) & _PAGE_PMD_HUGE) /*
* Note that this routine only sets pmds for THP pages.
* Hugetlb pages are handled elsewhere. We need to check
* for huge zero page. Huge zero pages are like hugetlb
* pages in that there is no RSS, but there is the need
* for TSB entries. So, huge zero page counts go into
* hugetlb_pte_count.
*/
if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
if (is_huge_zero_page(pmd_page(pmd)))
mm->context.hugetlb_pte_count++;
else
mm->context.thp_pte_count++; mm->context.thp_pte_count++;
} else {
if (is_huge_zero_page(pmd_page(orig)))
mm->context.hugetlb_pte_count--;
else else
mm->context.thp_pte_count--; mm->context.thp_pte_count--;
}
/* Do not try to allocate the TSB hash table if we /* Do not try to allocate the TSB hash table if we
* don't have one already. We have various locks held * don't have one already. We have various locks held
...@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
} }
} }
/*
* This routine is only called when splitting a THP
*/
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp) pmd_t *pmdp)
{ {
...@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, ...@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
set_pmd_at(vma->vm_mm, address, pmdp, entry); set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* set_pmd_at() will not be called in a way to decrement
* thp_pte_count when splitting a THP, so do it now.
* Sanity check pmd before doing the actual decrement.
*/
if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
!is_huge_zero_page(pmd_page(entry)))
(vma->vm_mm)->context.thp_pte_count--;
} }
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
......
...@@ -469,8 +469,10 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) ...@@ -469,8 +469,10 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{ {
unsigned long mm_rss = get_mm_rss(mm);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
unsigned long total_huge_pte_count; unsigned long saved_hugetlb_pte_count;
unsigned long saved_thp_pte_count;
#endif #endif
unsigned int i; unsigned int i;
...@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
* will re-increment the counters as the parent PTEs are * will re-increment the counters as the parent PTEs are
* copied into the child address space. * copied into the child address space.
*/ */
total_huge_pte_count = mm->context.hugetlb_pte_count + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
mm->context.thp_pte_count; saved_thp_pte_count = mm->context.thp_pte_count;
mm->context.hugetlb_pte_count = 0; mm->context.hugetlb_pte_count = 0;
mm->context.thp_pte_count = 0; mm->context.thp_pte_count = 0;
mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
#endif #endif
/* copy_mm() copies over the parent's mm_struct before calling /* copy_mm() copies over the parent's mm_struct before calling
...@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
/* If this is fork, inherit the parent's TSB size. We would /* If this is fork, inherit the parent's TSB size. We would
* grow it to that size on the first page fault anyways. * grow it to that size on the first page fault anyways.
*/ */
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (unlikely(total_huge_pte_count)) if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); tsb_grow(mm, MM_TSB_HUGE,
(saved_hugetlb_pte_count + saved_thp_pte_count) *
REAL_HPAGE_PER_HPAGE);
#endif #endif
if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment