Commit 4aaf269c authored by Juergen Gross's avatar Juergen Gross Committed by Andrew Morton

mm: introduce arch_has_hw_nonleaf_pmd_young()

When running as a Xen PV guests commit eed9a328 ("mm: x86: add
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation in
pmdp_test_and_clear_young():

 BUG: unable to handle page fault for address: ffff8880083374d0
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0003) - permissions violation
 PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065
 Oops: 0003 [#1] PREEMPT SMP NOPTI
 CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1
 RIP: e030:pmdp_test_and_clear_young+0x25/0x40

This happens because the Xen hypervisor can't emulate direct writes to
page table entries other than PTEs.

This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young()
similar to arch_has_hw_pte_young() and test that instead of
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.

Link: https://lkml.kernel.org/r/20221123064510.16225-1-jgross@suse.com
Fixes: eed9a328 ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG")
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
Reported-by: default avatarSander Eikelenboom <linux@eikelenboom.it>
Acked-by: default avatarYu Zhao <yuzhao@google.com>
Tested-by: default avatarSander Eikelenboom <linux@eikelenboom.it>
Acked-by: David Hildenbrand <david@redhat.com>	[core changes]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6617da8f
...@@ -1439,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) ...@@ -1439,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
return true; return true;
} }
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
return !cpu_feature_enabled(X86_FEATURE_XENPV);
}
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK #ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte) static inline bool pte_user_accessible_page(pte_t pte)
{ {
......
...@@ -267,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, ...@@ -267,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif #endif
#ifndef arch_has_hw_nonleaf_pmd_young
/*
* Return whether the accessed bit in non-leaf PMD entries is supported on the
* local CPU.
*/
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
}
#endif
#ifndef arch_has_hw_pte_young #ifndef arch_has_hw_pte_young
/* /*
* Return whether the accessed bit is supported on the local CPU. * Return whether the accessed bit is supported on the local CPU.
......
...@@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area ...@@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
goto next; goto next;
if (!pmd_trans_huge(pmd[i])) { if (!pmd_trans_huge(pmd[i])) {
if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && if (arch_has_hw_nonleaf_pmd_young() &&
get_cap(LRU_GEN_NONLEAF_YOUNG)) get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i); pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next; goto next;
...@@ -4085,14 +4085,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, ...@@ -4085,14 +4085,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
#endif #endif
walk->mm_stats[MM_NONLEAF_TOTAL]++; walk->mm_stats[MM_NONLEAF_TOTAL]++;
#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG if (arch_has_hw_nonleaf_pmd_young() &&
if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (!pmd_young(val)) if (!pmd_young(val))
continue; continue;
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
} }
#endif
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
continue; continue;
...@@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c ...@@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
caps |= BIT(LRU_GEN_MM_WALK); caps |= BIT(LRU_GEN_MM_WALK);
if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
caps |= BIT(LRU_GEN_NONLEAF_YOUNG); caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment