Commit 92598ae2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Borislav Petkov:

 - Rename a PKRU macro to make more sense when reading the code

 - Update pkeys documentation

 - Avoid reading contended mm's TLB generation var if not absolutely
   necessary along with fixing a case where arch_tlbbatch_flush()
   doesn't adhere to the generation scheme and thus violates the
   conditions for the above avoidance.

* tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/tlb: Ignore f->new_tlb_gen when zero
  x86/pkeys: Clarify PKRU_AD_KEY macro
  Documentation/protection-keys: Clean up documentation for User Space pkeys
  x86/mm/tlb: Avoid reading mm_tlb_gen when possible
parents 94e37e84 8f1d56f6
......@@ -4,31 +4,29 @@
Memory Protection Keys
======================
Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
which is found on Intel's Skylake (and later) "Scalable Processor"
Server CPUs. It will be available in future non-server Intel parts
and future AMD processors.
For anyone wishing to test or use this feature, it is available in
Amazon's EC2 C5 instances and is known to work there using an Ubuntu
17.04 image.
Memory Protection Keys provides a mechanism for enforcing page-based
protections, but without requiring modification of the page tables
when an application changes protection domains. It works by
dedicating 4 previously ignored bits in each page table entry to a
"protection key", giving 16 possible keys.
There is also a new user-accessible register (PKRU) with two separate
bits (Access Disable and Write Disable) for each key. Being a CPU
register, PKRU is inherently thread-local, potentially giving each
Memory Protection Keys provide a mechanism for enforcing page-based
protections, but without requiring modification of the page tables when an
application changes protection domains.
Pkeys Userspace (PKU) is a feature which can be found on:
* Intel server CPUs, Skylake and later
* Intel client CPUs, Tiger Lake (11th Gen Core) and later
* Future AMD CPUs
Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
a "protection key", giving 16 possible keys.
Protections for each key are defined with a per-CPU user-accessible register
(PKRU). Each of these is a 32-bit register storing two bits (Access Disable
and Write Disable) for each of 16 keys.
Being a CPU register, PKRU is inherently thread-local, potentially giving each
thread a different set of protections from every other thread.
There are two new instructions (RDPKRU/WRPKRU) for reading and writing
to the new register. The feature is only available in 64-bit mode,
even though there is theoretically space in the PAE PTEs. These
permissions are enforced on data access only and have no effect on
instruction fetches.
There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
register. The feature is only available in 64-bit mode, even though there is
theoretically space in the PAE PTEs. These permissions are enforced on data
access only and have no effect on instruction fetches.
Syscalls
========
......
......@@ -16,6 +16,7 @@
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
......
......@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
return vma_pkey(vma);
}
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
#define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/*
* Make the default PKRU value (at execve() time) as restrictive
......@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
PKRU_AD_MASK(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
......
......@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0;
u64 mm_tlb_gen;
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
......@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
return;
}
if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
f->new_tlb_gen <= local_tlb_gen)) {
/*
* The TLB is already up to date in respect to f->new_tlb_gen.
* While the core might be still behind mm_tlb_gen, checking
* mm_tlb_gen unnecessarily would have negative caching effects
* so avoid it.
*/
return;
}
/*
* Defer mm_tlb_gen reading as long as possible to avoid cache
* contention.
*/
mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
if (unlikely(local_tlb_gen == mm_tlb_gen)) {
/*
* There's nothing to do: we're already up to date. This can
......@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
/* Partial flush */
unsigned long addr = f->start;
/* Partial flush cannot have invalid generations */
VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
/* Partial flush must have valid mm */
VM_WARN_ON(f->mm == NULL);
nr_invalidate = (f->end - f->start) >> f->stride_shift;
while (addr < f->end) {
......@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
struct flush_tlb_info *info;
preempt_disable();
info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
info = get_flush_tlb_info(NULL, start, end, 0, false,
TLB_GENERATION_INVALID);
on_each_cpu(do_kernel_range_flush, info, 1);
......@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
int cpu = get_cpu();
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
TLB_GENERATION_INVALID);
/*
* flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment