Commit 7af01450 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/mm/cpa: Prevent large page split when ftrace flips RW on kernel text

ftrace does not use text_poke() for enabling trace functionality. It uses
its own mechanism and flips the whole kernel text to RW and back to RO.

The CPA rework removed a loop based check of 4k pages which tried to
preserve a large page by checking each 4k page whether the change would
actually cover all pages in the large page.

This resulted in endless loops for nothing as in testing it turned out that
it actually never preserved anything. Of course testing missed to include
ftrace, which is the one and only case which benefitted from the 4k loop.

As a consequence enabling function tracing or ftrace based kprobes results
in a full 4k split of the kernel text, which affects iTLB performance.

The kernel RO protection is the only valid case where this can actually
preserve large pages.

All other static protections (RO data, data NX, PCI, BIOS) are truly
static.  So a conflict with those protections which results in a split
should only ever happen when a change of memory next to a protected region
is attempted. But these conflicts are rightfully splitting the large page
to preserve the protected regions. In fact a change to the protected
regions itself is a bug and is warned about.

Add an exception for the static protection check for kernel text RO when
the to be changed region spawns a full large page which allows to preserve
the large mappings. This also prevents the syslog to be spammed about CPA
violations when ftrace is used.

The exception needs to be removed once ftrace switched over to text_poke()
which avoids the whole issue.

Fixes: 585948f4 ("x86/mm/cpa: Avoid the 4k pages check completely")
Reported-by: default avatarSong Liu <songliubraving@fb.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Tested-by: default avatarSong Liu <songliubraving@fb.com>
Reviewed-by: default avatarSong Liu <songliubraving@fb.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908282355340.1938@nanos.tec.linutronix.de
parent 42e0e954
...@@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val, ...@@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
*/ */
static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
unsigned long pfn, unsigned long npg, unsigned long pfn, unsigned long npg,
int warnlvl) unsigned long lpsize, int warnlvl)
{ {
pgprotval_t forbidden, res; pgprotval_t forbidden, res;
unsigned long end; unsigned long end;
...@@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, ...@@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
forbidden = res; forbidden = res;
/*
* Special case to preserve a large page. If the change spawns the
* full large page mapping then there is no point to split it
* up. Happens with ftrace and is going to be removed once ftrace
* switched to text_poke().
*/
if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
res = protect_kernel_text_ro(start, end); res = protect_kernel_text_ro(start, end);
check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
forbidden |= res; forbidden |= res;
}
/* Check the PFN directly */ /* Check the PFN directly */
res = protect_pci_bios(pfn, pfn + npg - 1); res = protect_pci_bios(pfn, pfn + npg - 1);
...@@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, ...@@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
* extra conditional required here. * extra conditional required here.
*/ */
chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
CPA_CONFLICT); psize, CPA_CONFLICT);
if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
/* /*
...@@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, ...@@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
* protection requirement in the large page. * protection requirement in the large page.
*/ */
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
CPA_DETECT); psize, CPA_DETECT);
/* /*
* If there is a conflict, split the large page. * If there is a conflict, split the large page.
...@@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, ...@@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
if (!cpa->force_static_prot) if (!cpa->force_static_prot)
goto set; goto set;
prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT); /* Hand in lpsize = 0 to enforce the protection mechanism */
prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
if (pgprot_val(prot) == pgprot_val(ref_prot)) if (pgprot_val(prot) == pgprot_val(ref_prot))
goto set; goto set;
...@@ -1503,7 +1512,8 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) ...@@ -1503,7 +1512,8 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
cpa_inc_4k_install(); cpa_inc_4k_install();
new_prot = static_protections(new_prot, address, pfn, 1, /* Hand in lpsize = 0 to enforce the protection mechanism */
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT); CPA_PROTECT);
new_prot = pgprot_clear_protnone_bits(new_prot); new_prot = pgprot_clear_protnone_bits(new_prot);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment