Commit 6cff64b8 authored by Dave Hansen's avatar Dave Hansen Committed by Ingo Molnar

x86/mm: Use INVPCID for __native_flush_tlb_single()

This uses INVPCID to shoot down individual lines of the user mapping
instead of marking the entire user map as invalid. This
could/might/possibly be faster.

This for sure needs tlb_single_page_flush_ceiling to be redetermined;
esp. since INVPCID is _slow_.

A detailed performance analysis is available here:

  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com

[ Peterz: Split out from big combo patch ]
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 21e94459
...@@ -197,6 +197,7 @@ ...@@ -197,6 +197,7 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
......
...@@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid) ...@@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid)
return asid + 1; return asid + 1;
} }
/*
* The user PCID is just the kernel one, plus the "switch bit".
*/
static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
#endif
return ret;
}
struct pgd_t; struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{ {
...@@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void) ...@@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void)
/* /*
* Using INVPCID is considerably faster than a pair of writes * Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore. * to CR4 sandwiched inside an IRQ flag save/restore.
*
* Note, this works with CR4.PCIDE=0 or 1.
*/ */
invpcid_flush_all(); invpcid_flush_all();
return; return;
...@@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr) ...@@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
if (!static_cpu_has(X86_FEATURE_PTI)) if (!static_cpu_has(X86_FEATURE_PTI))
return; return;
/*
* Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
* Just use invalidate_user_asid() in case we are called early.
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
invalidate_user_asid(loaded_mm_asid); invalidate_user_asid(loaded_mm_asid);
else
invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
} }
/* /*
......
...@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void) ...@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
static void setup_pcid(void) static void setup_pcid(void)
{ {
#ifdef CONFIG_X86_64 if (!IS_ENABLED(CONFIG_X86_64))
if (boot_cpu_has(X86_FEATURE_PCID)) { return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
if (boot_cpu_has(X86_FEATURE_PGE)) { if (boot_cpu_has(X86_FEATURE_PGE)) {
/* /*
* This can't be cr4_set_bits_and_update_boot() -- * This can't be cr4_set_bits_and_update_boot() -- the
* the trampoline code can't handle CR4.PCIDE and * trampoline code can't handle CR4.PCIDE and it wouldn't
* it wouldn't do any good anyway. Despite the name, * do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually * cr4_set_bits_and_update_boot() doesn't actually cause
* cause the bits in question to remain set all the * the bits in question to remain set all the way through
* way through the secondary boot asm. * the secondary boot asm.
* *
* Instead, we brute-force it and set CR4.PCIDE * Instead, we brute-force it and set CR4.PCIDE manually in
* manually in start_secondary(). * start_secondary().
*/ */
cr4_set_bits(X86_CR4_PCIDE); cr4_set_bits(X86_CR4_PCIDE);
/*
* INVPCID's single-context modes (2/3) only work if we set
* X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
* on systems that have X86_CR4_PCIDE clear, or that have
* no INVPCID support at all.
*/
if (boot_cpu_has(X86_FEATURE_INVPCID))
setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
} else { } else {
/* /*
* flush_tlb_all(), as currently implemented, won't * flush_tlb_all(), as currently implemented, won't work if
* work if PCID is on but PGE is not. Since that * PCID is on but PGE is not. Since that combination
* combination doesn't exist on real hardware, there's * doesn't exist on real hardware, there's no reason to try
* no reason to try to fully support it, but it's * to fully support it, but it's polite to avoid corrupting
* polite to avoid corrupting data if we're on * data if we're on an improperly configured VM.
* an improperly configured VM.
*/ */
setup_clear_cpu_cap(X86_FEATURE_PCID); setup_clear_cpu_cap(X86_FEATURE_PCID);
} }
}
#endif
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment