Commit 8af26be0 authored by Peter Zijlstra's avatar Peter Zijlstra

perf/core: Fix arch_perf_get_page_size()

The (new) page-table walker in arch_perf_get_page_size() is broken in
various ways. Specifically while it is used in a lockless manner, it
doesn't depend on CONFIG_HAVE_FAST_GUP nor uses the proper _lockless
offset methods, nor is careful to only read each entry only once.

Also the hugetlb support is broken due to calling pte_page() without
first checking pte_special().

Rewrite the whole thing to be a proper lockless page-table walker and
employ the new pXX_leaf_size() pgtable functions to determine the
pagetable size without looking at the page-frames.

Fixes: 51b646b2 ("perf,mm: Handle non-page-table-aligned hugetlbfs")
Fixes: 8d97e718 ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE")
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarKan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20201126124207.GM3040@hirez.programming.kicks-ass.net
parent 560dabbd
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/min_heap.h> #include <linux/min_heap.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/pgtable.h>
#include "internal.h" #include "internal.h"
...@@ -7001,90 +7002,62 @@ static u64 perf_virt_to_phys(u64 virt) ...@@ -7001,90 +7002,62 @@ static u64 perf_virt_to_phys(u64 virt)
return phys_addr; return phys_addr;
} }
#ifdef CONFIG_MMU
/* /*
* Return the MMU page size of a given virtual address. * Return the pagetable size of a given virtual address.
*
* This generic implementation handles page-table aligned huge pages, as well
* as non-page-table aligned hugetlbfs compound pages.
*
* If an architecture supports and uses non-page-table aligned pages in their
* kernel mapping it will need to provide it's own implementation of this
* function.
*/ */
__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr) static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
{ {
struct page *page; u64 size = 0;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, addr); #ifdef CONFIG_HAVE_FAST_GUP
if (pgd_none(*pgd)) pgd_t *pgdp, pgd;
return 0; p4d_t *p4dp, p4d;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
p4d = p4d_offset(pgd, addr); pgdp = pgd_offset(mm, addr);
if (!p4d_present(*p4d)) pgd = READ_ONCE(*pgdp);
if (pgd_none(pgd))
return 0; return 0;
if (p4d_leaf(*p4d)) if (pgd_leaf(pgd))
return 1ULL << P4D_SHIFT; return pgd_leaf_size(pgd);
pud = pud_offset(p4d, addr); p4dp = p4d_offset_lockless(pgdp, pgd, addr);
if (!pud_present(*pud)) p4d = READ_ONCE(*p4dp);
if (!p4d_present(p4d))
return 0; return 0;
if (pud_leaf(*pud)) { if (p4d_leaf(p4d))
#ifdef pud_page return p4d_leaf_size(p4d);
page = pud_page(*pud);
if (PageHuge(page))
return page_size(compound_head(page));
#endif
return 1ULL << PUD_SHIFT;
}
pmd = pmd_offset(pud, addr); pudp = pud_offset_lockless(p4dp, p4d, addr);
if (!pmd_present(*pmd)) pud = READ_ONCE(*pudp);
if (!pud_present(pud))
return 0; return 0;
if (pmd_leaf(*pmd)) { if (pud_leaf(pud))
#ifdef pmd_page return pud_leaf_size(pud);
page = pmd_page(*pmd);
if (PageHuge(page))
return page_size(compound_head(page));
#endif
return 1ULL << PMD_SHIFT;
}
pte = pte_offset_map(pmd, addr); pmdp = pmd_offset_lockless(pudp, pud, addr);
if (!pte_present(*pte)) { pmd = READ_ONCE(*pmdp);
pte_unmap(pte); if (!pmd_present(pmd))
return 0; return 0;
}
page = pte_page(*pte); if (pmd_leaf(pmd))
if (PageHuge(page)) { return pmd_leaf_size(pmd);
u64 size = page_size(compound_head(page));
pte_unmap(pte);
return size;
}
pte_unmap(pte); ptep = pte_offset_map(&pmd, addr);
return PAGE_SIZE; pte = ptep_get_lockless(ptep);
} if (pte_present(pte))
size = pte_leaf_size(pte);
pte_unmap(ptep);
#endif /* CONFIG_HAVE_FAST_GUP */
#else return size;
static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
{
return 0;
} }
#endif
static u64 perf_get_page_size(unsigned long addr) static u64 perf_get_page_size(unsigned long addr)
{ {
struct mm_struct *mm; struct mm_struct *mm;
...@@ -7109,7 +7082,7 @@ static u64 perf_get_page_size(unsigned long addr) ...@@ -7109,7 +7082,7 @@ static u64 perf_get_page_size(unsigned long addr)
mm = &init_mm; mm = &init_mm;
} }
size = arch_perf_get_page_size(mm, addr); size = perf_get_pgtable_size(mm, addr);
local_irq_restore(flags); local_irq_restore(flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment