Commit 8b5abde1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "A laundry list of changes: KASAN improvements/fixes for ptdump, a
  self-test fix, PAT cleanup and wbinvd() avoidance, removal of stale
  code and documentation updates"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/ptdump: Add address marker for KASAN shadow region
  x86/mm/ptdump: Optimize check for W+X mappings for CONFIG_KASAN=y
  x86/mm/pat: Use rb_entry()
  x86/mpx: Re-add MPX to selftests Makefile
  x86/mm: Remove CONFIG_DEBUG_NX_TEST
  x86/mm/cpa: Avoid wbinvd() for PREEMPT
  x86/mm: Improve documentation for low-level device I/O functions
parents a25a1d6c 025205f8
...@@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX ...@@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX
against certain classes of kernel exploits. against certain classes of kernel exploits.
If in doubt, say "N". If in doubt, say "N".
config DEBUG_NX_TEST
tristate "Testcase for the NX non-executable stack feature"
depends on DEBUG_KERNEL && m
---help---
This option enables a testcase for the CPU NX capability
and the software setup of this feature.
If in doubt, say "N"
config DOUBLEFAULT config DOUBLEFAULT
default y default y
bool "Enable doublefault exception handler" if EXPERT bool "Enable doublefault exception handler" if EXPERT
......
...@@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) ...@@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
#define virt_to_bus virt_to_phys #define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt #define bus_to_virt phys_to_virt
/*
* The default ioremap() behavior is non-cached; if you need something
* else, you probably want one of the following.
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
/** /**
* ioremap - map bus memory into CPU space * ioremap - map bus memory into CPU space
* @offset: bus address of the memory * @offset: bus address of the memory
...@@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) ...@@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* If the area you are trying to map is a PCI BAR you should have a * If the area you are trying to map is a PCI BAR you should have a
* look at pci_iomap(). * look at pci_iomap().
*/ */
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
/*
* The default ioremap() behavior is non-cached:
*/
static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
{ {
return ioremap_nocache(offset, size); return ioremap_nocache(offset, size);
...@@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void); ...@@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void);
*/ */
#define xlate_dev_kmem_ptr(p) p #define xlate_dev_kmem_ptr(p) p
/**
* memset_io Set a range of I/O memory to a constant value
* @addr: The beginning of the I/O-memory range to set
* @val: The value to set the memory to
* @count: The number of bytes to set
*
* Set a range of I/O memory to a given value.
*/
static inline void static inline void
memset_io(volatile void __iomem *addr, unsigned char val, size_t count) memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
{ {
memset((void __force *)addr, val, count); memset((void __force *)addr, val, count);
} }
/**
* memcpy_fromio Copy a block of data from I/O memory
* @dst: The (RAM) destination for the copy
* @src: The (I/O memory) source for the data
* @count: The number of bytes to copy
*
* Copy a block of data from I/O memory.
*/
static inline void static inline void
memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
{ {
memcpy(dst, (const void __force *)src, count); memcpy(dst, (const void __force *)src, count);
} }
/**
* memcpy_toio Copy a block of data into I/O memory
* @dst: The (I/O memory) destination for the copy
* @src: The (RAM) source for the data
* @count: The number of bytes to copy
*
* Copy a block of data to I/O memory.
*/
static inline void static inline void
memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
{ {
......
...@@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o ...@@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
......
/*
* test_nx.c: functional test for NX functionality
*
* (C) Copyright 2008 Intel Corporation
* Author: Arjan van de Ven <arjan@linux.intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/module.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <asm/asm.h>
extern int rodata_test_data;
/*
* This file checks 4 things:
* 1) Check if the stack is not executable
* 2) Check if kmalloc memory is not executable
* 3) Check if the .rodata section is not executable
* 4) Check if the .data section of a module is not executable
*
* To do this, the test code tries to execute memory in stack/kmalloc/etc,
* and then checks if the expected trap happens.
*
* Sadly, this implies having a dynamic exception handling table entry.
* ... which can be done (and will make Rusty cry)... but it can only
* be done in a stand-alone module with only 1 entry total.
* (otherwise we'd have to sort and that's just too messy)
*/
/*
* We want to set up an exception handling point on our stack,
* which means a variable value. This function is rather dirty
* and walks the exception table of the module, looking for a magic
* marker and replaces it with a specific function.
*/
static void fudze_exception_table(void *marker, void *new)
{
struct module *mod = THIS_MODULE;
struct exception_table_entry *extable;
/*
* Note: This module has only 1 exception table entry,
* so searching and sorting is not needed. If that changes,
* this would be the place to search and re-sort the exception
* table.
*/
if (mod->num_exentries > 1) {
printk(KERN_ERR "test_nx: too many exception table entries!\n");
printk(KERN_ERR "test_nx: test results are not reliable.\n");
return;
}
extable = (struct exception_table_entry *)mod->extable;
extable[0].insn = (unsigned long)new;
}
/*
* exception tables get their symbols translated so we need
* to use a fake function to put in there, which we can then
* replace at runtime.
*/
void foo_label(void);
/*
* returns 0 for not-executable, negative for executable
*
* Note: we cannot allow this function to be inlined, because
* that would give us more than 1 exception table entry.
* This in turn would break the assumptions above.
*/
static noinline int test_address(void *address)
{
unsigned long result;
/* Set up an exception table entry for our address */
fudze_exception_table(&foo_label, address);
result = 1;
asm volatile(
"foo_label:\n"
"0: call *%[fake_code]\n"
"1:\n"
".section .fixup,\"ax\"\n"
"2: mov %[zero], %[rslt]\n"
" ret\n"
".previous\n"
_ASM_EXTABLE(0b,2b)
: [rslt] "=r" (result)
: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
);
/* change the exception table back for the next round */
fudze_exception_table(address, &foo_label);
if (result)
return -ENODEV;
return 0;
}
static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
static int test_NX(void)
{
int ret = 0;
/* 0xC3 is the opcode for "ret" */
char stackcode[] = {0xC3, 0x90, 0 };
char *heap;
test_data = 0xC3;
printk(KERN_INFO "Testing NX protection\n");
/* Test 1: check if the stack is not executable */
if (test_address(&stackcode)) {
printk(KERN_ERR "test_nx: stack was executable\n");
ret = -ENODEV;
}
/* Test 2: Check if the heap is executable */
heap = kmalloc(64, GFP_KERNEL);
if (!heap)
return -ENOMEM;
heap[0] = 0xC3; /* opcode for "ret" */
if (test_address(heap)) {
printk(KERN_ERR "test_nx: heap was executable\n");
ret = -ENODEV;
}
kfree(heap);
/*
* The following 2 tests currently fail, this needs to get fixed
* Until then, don't run them to avoid too many people getting scared
* by the error message
*/
/* Test 3: Check if the .rodata section is executable */
if (rodata_test_data != 0xC3) {
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
ret = -ENODEV;
} else if (test_address(&rodata_test_data)) {
printk(KERN_ERR "test_nx: .rodata section is executable\n");
ret = -ENODEV;
}
#if 0
/* Test 4: Check if the .data section of a module is executable */
if (test_address(&test_data)) {
printk(KERN_ERR "test_nx: .data section is executable\n");
ret = -ENODEV;
}
#endif
return ret;
}
static void test_exit(void)
{
}
module_init(test_NX);
module_exit(test_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Testcase for the NX infrastructure");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <asm/kasan.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
/* /*
...@@ -51,6 +52,10 @@ enum address_markers_idx { ...@@ -51,6 +52,10 @@ enum address_markers_idx {
LOW_KERNEL_NR, LOW_KERNEL_NR,
VMALLOC_START_NR, VMALLOC_START_NR,
VMEMMAP_START_NR, VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
# ifdef CONFIG_X86_ESPFIX64 # ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR, ESPFIX_START_NR,
# endif # endif
...@@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = { ...@@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = {
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
{ 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/* VMALLOC_START */, "vmalloc() Area" },
{ 0/* VMEMMAP_START */, "Vmemmap" }, { 0/* VMEMMAP_START */, "Vmemmap" },
#ifdef CONFIG_KASAN
{ KASAN_SHADOW_START, "KASAN shadow" },
{ KASAN_SHADOW_END, "KASAN shadow end" },
#endif
# ifdef CONFIG_X86_ESPFIX64 # ifdef CONFIG_X86_ESPFIX64
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
# endif # endif
...@@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, ...@@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
#if PTRS_PER_PUD > 1 #if PTRS_PER_PUD > 1
/*
* This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
* KASAN fills page tables with the same values. Since there is no
* point in checking page table more than once we just skip repeated
* entries. This saves us dozens of seconds during boot.
*/
static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
{
return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
}
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
unsigned long P) unsigned long P)
{ {
int i; int i;
pud_t *start; pud_t *start;
pgprotval_t prot; pgprotval_t prot;
pud_t *prev_pud = NULL;
start = (pud_t *) pgd_page_vaddr(addr); start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) { for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) { if (!pud_none(*start) &&
!pud_already_checked(prev_pud, start, st->check_wx)) {
if (pud_large(*start) || !pud_present(*start)) { if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start); prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2); note_page(m, st, __pgprot(prot), 2);
...@@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, ...@@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
} else } else
note_page(m, st, __pgprot(0), 2); note_page(m, st, __pgprot(0), 2);
prev_pud = start;
start++; start++;
} }
} }
......
...@@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, ...@@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
int in_flags, struct page **pages) int in_flags, struct page **pages)
{ {
unsigned int i, level; unsigned int i, level;
#ifdef CONFIG_PREEMPT
/*
* Avoid wbinvd() because it causes latencies on all CPUs,
* regardless of any CPU isolation that may be in effect.
*
* This should be extended for CAT enabled systems independent of
* PREEMPT because wbinvd() does not respect the CAT partitions and
* this is exposed to unpriviledged users through the graphics
* subsystem.
*/
unsigned long do_wbinvd = 0;
#else
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
#endif
BUG_ON(irqs_disabled()); BUG_ON(irqs_disabled());
......
...@@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node) ...@@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
{ {
u64 ret = 0; u64 ret = 0;
if (node) { if (node) {
struct memtype *data = container_of(node, struct memtype, rb); struct memtype *data = rb_entry(node, struct memtype, rb);
ret = data->subtree_max_end; ret = data->subtree_max_end;
} }
return ret; return ret;
...@@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, ...@@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
struct memtype *last_lower = NULL; struct memtype *last_lower = NULL;
while (node) { while (node) {
struct memtype *data = container_of(node, struct memtype, rb); struct memtype *data = rb_entry(node, struct memtype, rb);
if (get_subtree_max_end(node->rb_left) > start) { if (get_subtree_max_end(node->rb_left) > start) {
/* Lowest overlap if any must be on left side */ /* Lowest overlap if any must be on left side */
...@@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root, ...@@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
node = rb_next(&match->rb); node = rb_next(&match->rb);
if (node) if (node)
match = container_of(node, struct memtype, rb); match = rb_entry(node, struct memtype, rb);
else else
match = NULL; match = NULL;
} }
...@@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root, ...@@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root,
node = rb_next(&match->rb); node = rb_next(&match->rb);
while (node) { while (node) {
match = container_of(node, struct memtype, rb); match = rb_entry(node, struct memtype, rb);
if (match->start >= end) /* Checked all possible matches */ if (match->start >= end) /* Checked all possible matches */
goto success; goto success;
...@@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) ...@@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
while (*node) { while (*node) {
struct memtype *data = container_of(*node, struct memtype, rb); struct memtype *data = rb_entry(*node, struct memtype, rb);
parent = *node; parent = *node;
if (data->subtree_max_end < newdata->end) if (data->subtree_max_end < newdata->end)
...@@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) ...@@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
} }
if (node) { /* pos == i */ if (node) { /* pos == i */
struct memtype *this = container_of(node, struct memtype, rb); struct memtype *this = rb_entry(node, struct memtype, rb);
*out = *this; *out = *this;
return 0; return 0;
} else { } else {
......
...@@ -5,7 +5,7 @@ include ../lib.mk ...@@ -5,7 +5,7 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean .PHONY: all all_32 all_64 warn_32bit_failure clean
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
check_initial_reg_state sigreturn ldt_gdt iopl \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
protection_keys test_vdso protection_keys test_vdso
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment