Commit abb7099d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull  more x86 pti fixes from Thomas Gleixner:
 "Another small stash of fixes for fallout from the PTI work:

   - Fix the modules vs. KASAN breakage which was caused by making
     MODULES_END depend of the fixmap size. That was done when the cpu
     entry area moved into the fixmap, but now that we have a separate
     map space for that this is causing more issues than it solves.

   - Use the proper cache flush methods for the debugstore buffers as
     they are mapped/unmapped during runtime and not statically mapped
     at boot time like the rest of the cpu entry area.

   - Make the map layout of the cpu_entry_area consistent for 4 and 5
     level paging and fix the KASLR vaddr_end wreckage.

   - Use PER_CPU_EXPORT for per cpu variable and while at it unbreak
     nvidia gfx drivers by dropping the GPL export. The subject line of
     the commit tells it the other way around, but I noticed that too
     late.

   - Fix the ASM alternative macros so they can be used in the middle of
     an inline asm block.

   - Rename the BUG_CPU_INSECURE flag to BUG_CPU_MELTDOWN so the attack
     vector is properly identified. The Spectre mitigations will come
     with their own bug bits later"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN
  x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm
  x86/tlb: Drop the _GPL from the cpu_tlbstate export
  x86/events/intel/ds: Use the proper cache flush method for mapping ds buffers
  x86/kaslr: Fix the vaddr_end mess
  x86/mm: Map cpu_entry_area at the same place on 4/5 level
  x86/mm: Set MODULES_END to 0xffffffffff000000
parents b03acc4c de791821
...@@ -12,8 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ...@@ -12,8 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ... ... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ... ... unused hole ...
fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI vaddr_end for KASLR
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ... ... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
...@@ -37,13 +38,15 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ...@@ -37,13 +38,15 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ... ... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ... ... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ... ... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ... ... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range [fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
...@@ -67,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed). ...@@ -67,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls. during EFI runtime calls.
The module mapping space size changes based on the CONFIG requirements for the
following fixmap section.
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized. physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time. Their order is preserved but their base will be offset early at boot time.
Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <asm/cpu_entry_area.h> #include <asm/cpu_entry_area.h>
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h> #include <asm/insn.h>
#include "../perf_event.h" #include "../perf_event.h"
...@@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer); ...@@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{ {
unsigned long start = (unsigned long)cea;
phys_addr_t pa; phys_addr_t pa;
size_t msz = 0; size_t msz = 0;
pa = virt_to_phys(addr); pa = virt_to_phys(addr);
preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot); cea_set_pte(cea, pa, prot);
/*
* This is a cross-CPU update of the cpu_entry_area, we must shoot down
* all TLB entries for it.
*/
flush_tlb_kernel_range(start, start + size);
preempt_enable();
} }
static void ds_clear_cea(void *cea, size_t size) static void ds_clear_cea(void *cea, size_t size)
{ {
unsigned long start = (unsigned long)cea;
size_t msz = 0; size_t msz = 0;
preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE); cea_set_pte(cea, 0, PAGE_NONE);
flush_tlb_kernel_range(start, start + size);
preempt_enable();
} }
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
......
...@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection\n" \ ".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection" ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \ OLDINSTR_2(oldinstr, 1, 2) \
...@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".pushsection .altinstr_replacement, \"ax\"\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection" ".popsection\n"
/* /*
* Alternative instructions for different CPU types or capabilities. * Alternative instructions for different CPU types or capabilities.
......
...@@ -341,6 +341,6 @@ ...@@ -341,6 +341,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */ #endif /* _ASM_X86_CPUFEATURES_H */
...@@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ /*
* See Documentation/x86/x86_64/mm.txt for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
...@@ -88,7 +94,7 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -88,7 +94,7 @@ typedef struct { pteval_t pte; } pte_t;
# define VMALLOC_SIZE_TB _AC(32, UL) # define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
# define LDT_PGD_ENTRY _AC(-4, UL) # define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif #endif
...@@ -104,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -104,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */ /* The module sections ends with the start of the fixmap */
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) #define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR) #define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
#define CPU_ENTRY_AREA_PGD _AC(-3, UL) #define CPU_ENTRY_AREA_PGD _AC(-4, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) #define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) #define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
......
...@@ -924,7 +924,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -924,7 +924,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS); setup_force_cpu_cap(X86_FEATURE_ALWAYS);
if (c->x86_vendor != X86_VENDOR_AMD) if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_INSECURE); setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
fpu__init_system(c); fpu__init_system(c);
......
...@@ -61,10 +61,10 @@ enum address_markers_idx { ...@@ -61,10 +61,10 @@ enum address_markers_idx {
KASAN_SHADOW_START_NR, KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR, KASAN_SHADOW_END_NR,
#endif #endif
CPU_ENTRY_AREA_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) #if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR, LDT_NR,
#endif #endif
CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64 #ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR, ESPFIX_START_NR,
#endif #endif
......
...@@ -868,7 +868,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { ...@@ -868,7 +868,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.next_asid = 1, .next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
}; };
EXPORT_SYMBOL_GPL(cpu_tlbstate); EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{ {
......
...@@ -34,25 +34,14 @@ ...@@ -34,25 +34,14 @@
#define TB_SHIFT 40 #define TB_SHIFT 40
/* /*
* Virtual address start and end range for randomization. The end changes base * Virtual address start and end range for randomization.
* on configuration to have the highest amount of space for randomization.
* It increases the possible random position for each randomized region.
* *
* You need to add an if/def entry if you introduce a new memory region * The end address could depend on more configuration options to make the
* compatible with KASLR. Your entry must be in logical order with memory * highest amount of space for randomization available, but that's too hard
* layout. For example, ESPFIX is before EFI because its virtual address is * to keep straight and caused issues already.
* before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
* ensure that this order is correct and won't be changed.
*/ */
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
#if defined(CONFIG_X86_ESPFIX64)
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
#elif defined(CONFIG_EFI)
static const unsigned long vaddr_end = EFI_VA_END;
#else
static const unsigned long vaddr_end = __START_KERNEL_map;
#endif
/* Default values */ /* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE; unsigned long page_offset_base = __PAGE_OFFSET_BASE;
...@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void) ...@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
unsigned long remain_entropy; unsigned long remain_entropy;
/* /*
* All these BUILD_BUG_ON checks ensures the memory layout is * These BUILD_BUG_ON checks ensure the memory layout is consistent
* consistent with the vaddr_start/vaddr_end variables. * with the vaddr_start/vaddr_end variables. These checks are very
* limited....
*/ */
BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(vaddr_start >= vaddr_end);
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
vaddr_end >= EFI_VA_END);
BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
IS_ENABLED(CONFIG_EFI)) &&
vaddr_end >= __START_KERNEL_map);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
if (!kaslr_memory_enabled()) if (!kaslr_memory_enabled())
......
...@@ -56,13 +56,13 @@ ...@@ -56,13 +56,13 @@
static void __init pti_print_if_insecure(const char *reason) static void __init pti_print_if_insecure(const char *reason)
{ {
if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason); pr_info("%s\n", reason);
} }
static void __init pti_print_if_secure(const char *reason) static void __init pti_print_if_secure(const char *reason)
{ {
if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason); pr_info("%s\n", reason);
} }
...@@ -96,7 +96,7 @@ void __init pti_check_boottime_disable(void) ...@@ -96,7 +96,7 @@ void __init pti_check_boottime_disable(void)
} }
autosel: autosel:
if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return; return;
enable: enable:
setup_force_cpu_cap(X86_FEATURE_PTI); setup_force_cpu_cap(X86_FEATURE_PTI);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment