Commit 92598ae2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Borislav Petkov:

 - Rename a PKRU macro to make more sense when reading the code

 - Update pkeys documentation

 - Avoid reading contended mm's TLB generation var if not absolutely
   necessary along with fixing a case where arch_tlbbatch_flush()
   doesn't adhere to the generation scheme and thus violates the
   conditions for the above avoidance.

* tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/tlb: Ignore f->new_tlb_gen when zero
  x86/pkeys: Clarify PKRU_AD_KEY macro
  Documentation/protection-keys: Clean up documentation for User Space pkeys
  x86/mm/tlb: Avoid reading mm_tlb_gen when possible
parents 94e37e84 8f1d56f6
...@@ -4,31 +4,29 @@ ...@@ -4,31 +4,29 @@
Memory Protection Keys Memory Protection Keys
====================== ======================
Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature Memory Protection Keys provide a mechanism for enforcing page-based
which is found on Intel's Skylake (and later) "Scalable Processor" protections, but without requiring modification of the page tables when an
Server CPUs. It will be available in future non-server Intel parts application changes protection domains.
and future AMD processors.
Pkeys Userspace (PKU) is a feature which can be found on:
For anyone wishing to test or use this feature, it is available in * Intel server CPUs, Skylake and later
Amazon's EC2 C5 instances and is known to work there using an Ubuntu * Intel client CPUs, Tiger Lake (11th Gen Core) and later
17.04 image. * Future AMD CPUs
Memory Protection Keys provides a mechanism for enforcing page-based Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
protections, but without requiring modification of the page tables a "protection key", giving 16 possible keys.
when an application changes protection domains. It works by
dedicating 4 previously ignored bits in each page table entry to a Protections for each key are defined with a per-CPU user-accessible register
"protection key", giving 16 possible keys. (PKRU). Each of these is a 32-bit register storing two bits (Access Disable
and Write Disable) for each of 16 keys.
There is also a new user-accessible register (PKRU) with two separate
bits (Access Disable and Write Disable) for each key. Being a CPU Being a CPU register, PKRU is inherently thread-local, potentially giving each
register, PKRU is inherently thread-local, potentially giving each
thread a different set of protections from every other thread. thread a different set of protections from every other thread.
There are two new instructions (RDPKRU/WRPKRU) for reading and writing There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
to the new register. The feature is only available in 64-bit mode, register. The feature is only available in 64-bit mode, even though there is
even though there is theoretically space in the PAE PTEs. These theoretically space in the PAE PTEs. These permissions are enforced on data
permissions are enforced on data access only and have no effect on access only and have no effect on instruction fetches.
instruction fetches.
Syscalls Syscalls
======== ========
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
void __flush_tlb_all(void); void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL #define TLB_FLUSH_ALL -1UL
#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear); void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void); unsigned long cr4_read_shadow(void);
......
...@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey ...@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
return vma_pkey(vma); return vma_pkey(vma);
} }
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY)) #define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/* /*
* Make the default PKRU value (at execve() time) as restrictive * Make the default PKRU value (at execve() time) as restrictive
...@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey ...@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access * in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on. * to data which is pkey-protected later on.
*/ */
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) | PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) | PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) | PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15); PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
PKRU_AD_MASK(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
......
...@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info) ...@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info; const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
bool local = smp_processor_id() == f->initiating_cpu; bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0; unsigned long nr_invalidate = 0;
u64 mm_tlb_gen;
/* This code cannot presently handle being reentered. */ /* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled()); VM_WARN_ON(!irqs_disabled());
...@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info) ...@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
return; return;
} }
if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
f->new_tlb_gen <= local_tlb_gen)) {
/*
* The TLB is already up to date in respect to f->new_tlb_gen.
* While the core might be still behind mm_tlb_gen, checking
* mm_tlb_gen unnecessarily would have negative caching effects
* so avoid it.
*/
return;
}
/*
* Defer mm_tlb_gen reading as long as possible to avoid cache
* contention.
*/
mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
if (unlikely(local_tlb_gen == mm_tlb_gen)) { if (unlikely(local_tlb_gen == mm_tlb_gen)) {
/* /*
* There's nothing to do: we're already up to date. This can * There's nothing to do: we're already up to date. This can
...@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info) ...@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
/* Partial flush */ /* Partial flush */
unsigned long addr = f->start; unsigned long addr = f->start;
/* Partial flush cannot have invalid generations */
VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
/* Partial flush must have valid mm */
VM_WARN_ON(f->mm == NULL);
nr_invalidate = (f->end - f->start) >> f->stride_shift; nr_invalidate = (f->end - f->start) >> f->stride_shift;
while (addr < f->end) { while (addr < f->end) {
...@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) ...@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
struct flush_tlb_info *info; struct flush_tlb_info *info;
preempt_disable(); preempt_disable();
info = get_flush_tlb_info(NULL, start, end, 0, false, 0); info = get_flush_tlb_info(NULL, start, end, 0, false,
TLB_GENERATION_INVALID);
on_each_cpu(do_kernel_range_flush, info, 1); on_each_cpu(do_kernel_range_flush, info, 1);
...@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) ...@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
int cpu = get_cpu(); int cpu = get_cpu();
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0); info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
TLB_GENERATION_INVALID);
/* /*
* flush_tlb_multi() is not optimized for the common case in which only * flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling * a local TLB flush is needed. Optimize this use-case by calling
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment