Commit 8f147727 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 irq updates from Ingo Molnar:
 "Here are the main changes in this tree:

   - Introduce x86-64 IRQ/exception/debug stack guard pages to detect
     stack overflows immediately and deterministically.

   - Clean up over a decade worth of cruft accumulated.

  The outcome of this should be more clear-cut faults/crashes when any
  of the low level x86 CPU stacks overflow, instead of silent memory
  corruption and sporadic failures much later on"

* 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  x86/irq: Fix outdated comments
  x86/irq/64: Remove stack overflow debug code
  x86/irq/64: Remap the IRQ stack with guard pages
  x86/irq/64: Split the IRQ stack into its own pages
  x86/irq/64: Init hardirq_stack_ptr during CPU hotplug
  x86/irq/32: Handle irq stack allocation failure proper
  x86/irq/32: Invoke irq_ctx_init() from init_IRQ()
  x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr
  x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr
  x86/irq/32: Make irq stack a character array
  x86/irq/32: Define IRQ_STACK_SIZE
  x86/dumpstack/64: Speedup in_exception_stack()
  x86/exceptions: Split debug IST stack
  x86/exceptions: Enable IST guard pages
  x86/exceptions: Disconnect IST index and stack order
  x86/cpu: Remove orig_ist array
  x86/cpu: Prepare TSS.IST setup for guard pages
  x86/dumpstack/64: Use cpu_entry_area instead of orig_ist
  x86/irq/64: Use cpu entry area instead of orig_ist
  x86/traps: Use cpu_entry_area instead of orig_ist
  ...
parents 53f8b081 2c464543
......@@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.
The currently assigned IST stacks are :-
* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
Used for interrupt 8 - Double Fault Exception (#DF).
......@@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
Using a separate stack allows the kernel to recover from it well enough
in many cases to still output an oops.
* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
Used for non-maskable interrupts (NMI).
......@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
middle of switching stacks. Using IST for NMI events avoids making
assumptions about the previous state of the kernel stack.
* DEBUG_STACK. DEBUG_STKSZ
* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
Used for hardware debug interrupts (interrupt 1) and for software
debug interrupts (INT3).
......@@ -86,7 +86,12 @@ The currently assigned IST stacks are :-
avoids making assumptions about the previous state of the kernel
stack.
* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
To handle nested #DB correctly there exist two instances of DB stacks. On
#DB entry the IST stackpointer for #DB is switched to the second instance
so a nested #DB starts from a clean stack. The nested #DB switches
the IST stackpointer to a guard hole to catch triple nesting.
* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
Used for interrupt 18 - Machine Check Exception (#MC).
......
......@@ -14,6 +14,7 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
......@@ -138,7 +139,6 @@ config X86
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
......
......@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
#ifdef CONFIG_RETPOLINE
......@@ -430,8 +430,8 @@ END(irq_entries_start)
* it before we actually move ourselves to the IRQ stack.
*/
movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
movq PER_CPU_VAR(irq_stack_ptr), %rsp
movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
#ifdef CONFIG_DEBUG_ENTRY
/*
......@@ -840,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
/**
* idtentry - Generate an IDT entry stub
......@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
......@@ -924,13 +924,13 @@ ENTRY(\sym)
.endif
.if \shift_ist != -1
subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
subq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
call \do_sym
.if \shift_ist != -1
addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
addq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
/* these procedures expect "no swapgs" flag in ebx */
......@@ -1128,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
......
......@@ -7,6 +7,64 @@
#include <asm/processor.h>
#include <asm/intel_ds.h>
#ifdef CONFIG_X86_64
/* Macro to enforce the same ordering and stack sizes */
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
char DF_stack_guard[guardsize]; \
char DF_stack[EXCEPTION_STKSZ]; \
char NMI_stack_guard[guardsize]; \
char NMI_stack[EXCEPTION_STKSZ]; \
char DB2_stack_guard[guardsize]; \
char DB2_stack[db2_holesize]; \
char DB1_stack_guard[guardsize]; \
char DB1_stack[EXCEPTION_STKSZ]; \
char DB_stack_guard[guardsize]; \
char DB_stack[EXCEPTION_STKSZ]; \
char MCE_stack_guard[guardsize]; \
char MCE_stack[EXCEPTION_STKSZ]; \
char IST_top_guard[guardsize]; \
/* The exception stacks' physical storage. No guard pages required */
struct exception_stacks {
ESTACKS_MEMBERS(0, 0)
};
/* The effective cpu entry area mapping with guard pages. */
struct cea_exception_stacks {
ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
};
/*
* The exception stack ordering in [cea_]exception_stacks
*/
enum exception_stack_ordering {
ESTACK_DF,
ESTACK_NMI,
ESTACK_DB2,
ESTACK_DB1,
ESTACK_DB,
ESTACK_MCE,
N_EXCEPTION_STACKS
};
#define CEA_ESTACK_SIZE(st) \
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
#define CEA_ESTACK_BOT(ceastp, st) \
((unsigned long)&(ceastp)->st## _stack)
#define CEA_ESTACK_TOP(ceastp, st) \
(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
#define CEA_ESTACK_OFFS(st) \
offsetof(struct cea_exception_stacks, st## _stack)
#define CEA_ESTACK_PAGES \
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
#endif
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
......@@ -32,12 +90,9 @@ struct cpu_entry_area {
#ifdef CONFIG_X86_64
/*
* Exception stacks used for IST entries.
*
* In the future, this should have a separate slot for each stack
* with guard pages between them.
* Exception stacks used for IST entries with guard pages.
*/
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
......@@ -57,6 +112,7 @@ struct cpu_entry_area {
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
......@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
#define __this_cpu_ist_top_va(name) \
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
#endif
......@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
{
__this_cpu_dec(debug_stack_usage);
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
......
......@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
return ((irq == 2) ? 9 : irq);
}
#ifdef CONFIG_X86_32
extern void irq_ctx_init(int cpu);
#else
# define irq_ctx_init(cpu) do { } while (0)
#endif
extern int irq_init_percpu_irqstack(unsigned int cpu);
#define __ARCH_HAS_DO_SOFTIRQ
......
......@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
......
......@@ -22,11 +22,9 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 0
#define DEBUG_STACK 0
#define MCE_STACK 0
#define N_EXCEPTION_STACKS 1
#define IRQ_STACK_SIZE THREAD_SIZE
#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
/*
......
......@@ -14,22 +14,20 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 2
#define DEBUG_STACK 3
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
/*
* The index for the tss.ist[] array. The hardware limit is 7 entries.
*/
#define IST_INDEX_DF 0
#define IST_INDEX_NMI 1
#define IST_INDEX_DB 2
#define IST_INDEX_MCE 3
/*
* Set __PAGE_OFFSET to the most negative possible address +
......
......@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
#define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else
......@@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
/*
* Save the original ist values for checking stack pointers during debugging
*/
struct orig_ist {
unsigned long ist[7];
};
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
char gs_base[40];
unsigned long stack_canary;
};
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
......@@ -427,15 +421,8 @@ struct stack_canary {
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
/*
* per-CPU IRQ handling stacks
*/
struct irq_stack {
u32 stack[THREAD_SIZE/sizeof(u32)];
} __aligned(THREAD_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
/* Per CPU softirq stack pointer */
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */
extern unsigned int fpu_kernel_xstate_size;
......
......@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
......
......@@ -13,7 +13,7 @@
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland
* fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
......@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc;
#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
......@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
this_cpu_write(irq_stack_union.stack_canary, canary);
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
#endif
......
......@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>
enum stack_type {
......
......@@ -68,10 +68,12 @@ int main(void)
#undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
offsetof(struct cea_exception_stacks, DB1_stack));
BLANK();
#ifdef CONFIG_STACKPROTECTOR
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
......
......@@ -507,19 +507,6 @@ void load_percpu_segment(int cpu)
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
#ifdef CONFIG_X86_64
/*
* Special IST stacks which the CPU switches to when it calls
* an IST-marked descriptor entry. Up to 7 stacks (hardware
* limit), all of them are 4K, except the debug stack which
* is 8K.
*/
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
#endif
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
{
......@@ -1511,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
/*
* The following percpu variables are hot. Align current_task to
......@@ -1523,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
......@@ -1562,23 +1547,7 @@ void syscall_init(void)
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}
/*
* Copies of the original ist values from the tss are only accessed during
* debugging, no special alignment required.
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);
int is_debug_stack(unsigned long addr)
{
return __this_cpu_read(debug_stack_usage) ||
(addr <= __this_cpu_read(debug_stack_addr) &&
addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
}
NOKPROBE_SYMBOL(is_debug_stack);
DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
......@@ -1690,17 +1659,14 @@ static void setup_getcpu(int cpu)
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
* A lot of state is already set up in PDA init for 64 bit
*/
#ifdef CONFIG_X86_64
void cpu_init(void)
{
struct orig_ist *oist;
int cpu = raw_smp_processor_id();
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
int cpu = raw_smp_processor_id();
int i;
wait_for_master_cpu(cpu);
......@@ -1715,7 +1681,6 @@ void cpu_init(void)
load_ucode_ap();
t = &per_cpu(cpu_tss_rw, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (this_cpu_read(numa_node) == 0 &&
......@@ -1753,16 +1718,11 @@ void cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
if (!oist->ist[0]) {
char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
if (v == DEBUG_STACK-1)
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
}
if (!t->x86_tss.ist[0]) {
t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
......
......@@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack <= begin || stack > end)
if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
......@@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack <= begin || stack > end)
if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
......
......@@ -16,23 +16,21 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
static char *exception_stack_names[N_EXCEPTION_STACKS] = {
[ DOUBLEFAULT_STACK-1 ] = "#DF",
[ NMI_STACK-1 ] = "NMI",
[ DEBUG_STACK-1 ] = "#DB",
[ MCE_STACK-1 ] = "#MC",
};
static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
static const char * const exception_stack_names[] = {
[ ESTACK_DF ] = "#DF",
[ ESTACK_NMI ] = "NMI",
[ ESTACK_DB2 ] = "#DB2",
[ ESTACK_DB1 ] = "#DB1",
[ ESTACK_DB ] = "#DB",
[ ESTACK_MCE ] = "#MC",
};
const char *stack_type_name(enum stack_type type)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
if (type == STACK_TYPE_IRQ)
return "IRQ";
......@@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type)
return NULL;
}
/**
* struct estack_pages - Page descriptor for exception stacks
* @offs: Offset from the start of the exception stack area
* @size: Size of the exception stack
* @type: Type to store in the stack_info struct
*/
struct estack_pages {
u32 offs;
u16 size;
u16 type;
};
#define EPAGERANGE(st) \
[PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \
PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \
.offs = CEA_ESTACK_OFFS(st), \
.size = CEA_ESTACK_SIZE(st), \
.type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
/*
* Array of exception stack page descriptors. If the stack is larger than
* PAGE_SIZE, all pages covering a particular stack will have the same
* info. The guard pages including the not mapped DB2 stack are zeroed
* out.
*/
static const
struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
EPAGERANGE(DF),
EPAGERANGE(NMI),
EPAGERANGE(DB1),
EPAGERANGE(DB),
EPAGERANGE(MCE),
};
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin, *end;
unsigned long begin, end, stk = (unsigned long)stack;
const struct estack_pages *ep;
struct pt_regs *regs;
unsigned k;
unsigned int k;
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
begin = end - (exception_stack_sizes[k] / sizeof(long));
regs = (struct pt_regs *)end - 1;
if (stack <= begin || stack >= end)
continue;
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
end = begin + sizeof(struct cea_exception_stacks);
/* Bail if @stack is outside the exception stack area. */
if (stk < begin || stk >= end)
return false;
info->type = STACK_TYPE_EXCEPTION + k;
info->begin = begin;
info->end = end;
info->next_sp = (unsigned long *)regs->sp;
/* Calc page offset from start of exception stacks */
k = (stk - begin) >> PAGE_SHIFT;
/* Lookup the page descriptor */
ep = &estack_pages[k];
/* Guard page? */
if (!ep->size)
return false;
return true;
}
begin += (unsigned long)ep->offs;
end = begin + (unsigned long)ep->size;
regs = (struct pt_regs *)end - 1;
return false;
info->type = ep->type;
info->begin = (unsigned long *)begin;
info->end = (unsigned long *)end;
info->next_sp = (unsigned long *)regs->sp;
return true;
}
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack <= begin || stack > end)
if (stack < begin || stack >= end)
return false;
info->type = STACK_TYPE_IRQ;
......
......@@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
.quad INIT_PER_CPU_VAR(fixed_percpu_data)
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
......
......@@ -41,13 +41,12 @@ struct idt_data {
#define SYSG(_vector, _addr) \
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
/* Interrupt gate with interrupt stack */
/*
* Interrupt gate with interrupt stack. The _ist index is the index in
* the tss.ist[] array, but for the descriptor it needs to start at 1.
*/
#define ISTG(_vector, _addr, _ist) \
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
/* System interrupt gate with interrupt stack */
#define SISTG(_vector, _addr, _ist) \
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
/* Task gate */
#define TSKG(_vector, _gdt) \
......@@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* cpu_init() when the TSS has been initialized.
*/
static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
#ifdef CONFIG_X86_MCE
ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
#endif
};
......
......@@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { }
#endif
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
static void call_on_stack(void *func, void *stack)
{
......@@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
irqstk = __this_cpu_read(hardirq_stack);
irqstk = __this_cpu_read(hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
......@@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
}
/*
* allocate per-cpu stacks for hardirq and for softirq processing
* Allocate per-cpu stacks for hardirq and softirq processing
*/
void irq_ctx_init(int cpu)
int irq_init_percpu_irqstack(unsigned int cpu)
{
struct irq_stack *irqstk;
if (per_cpu(hardirq_stack, cpu))
return;
int node = cpu_to_node(cpu);
struct page *ph, *ps;
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP,
THREAD_SIZE_ORDER));
per_cpu(hardirq_stack, cpu) = irqstk;
if (per_cpu(hardirq_stack_ptr, cpu))
return 0;
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP,
THREAD_SIZE_ORDER));
per_cpu(softirq_stack, cpu) = irqstk;
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
if (!ph)
return -ENOMEM;
ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
if (!ps) {
__free_pages(ph, THREAD_SIZE_ORDER);
return -ENOMEM;
}
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
return 0;
}
void do_softirq_own_stack(void)
......@@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
irqstk = __this_cpu_read(softirq_stack);
irqstk = __this_cpu_read(softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
......
......@@ -18,63 +18,64 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/sched/task_stack.h>
#include <asm/cpu_entry_area.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
int sysctl_panic_on_stackoverflow;
DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
DECLARE_INIT_PER_CPU(irq_stack_backing_store);
/*
* Probabilistic stack overflow check:
*
* Only check the stack in process context, because everything else
* runs on the big interrupt stacks. Checking reliably is too expensive,
* so we just check from interrupts.
*/
static inline void stack_overflow_check(struct pt_regs *regs)
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
struct orig_ist *oist;
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (IS_ERR_OR_NULL(desc))
return false;
if (user_mode(regs))
return;
generic_handle_irq_desc(desc);
return true;
}
if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;
#ifdef CONFIG_VMAP_STACK
/*
* VMAP the backing store with guard pages
*/
static int map_irq_stack(unsigned int cpu)
{
char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
void *va;
int i;
irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
STACK_TOP_MARGIN;
irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
oist = this_cpu_ptr(&orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
}
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
estack_top, estack_bottom, (void *)regs->ip);
va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
if (!va)
return -ENOMEM;
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
return 0;
}
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
#else
/*
* If VMAP stacks are disabled due to KASAN, just use the per cpu
* backing store without guard pages.
*/
static int map_irq_stack(unsigned int cpu)
{
stack_overflow_check(regs);
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
if (IS_ERR_OR_NULL(desc))
return false;
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
return 0;
}
#endif
generic_handle_irq_desc(desc);
return true;
int irq_init_percpu_irqstack(unsigned int cpu)
{
if (per_cpu(hardirq_stack_ptr, cpu))
return 0;
return map_irq_stack(cpu);
}
......@@ -91,6 +91,8 @@ void __init init_IRQ(void)
for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
x86_init.irqs.intr_init();
}
......@@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
irq_ctx_init(smp_processor_id());
}
......@@ -21,13 +21,14 @@
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/atomic.h>
#include <linux/sched/clock.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
#include <linux/atomic.h>
#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
......@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
* switch back to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
static bool notrace is_debug_stack(unsigned long addr)
{
struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
unsigned long top = CEA_ESTACK_TOP(cs, DB);
unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
if (__this_cpu_read(debug_stack_usage))
return true;
/*
* Note, this covers the guard page between DB and DB1 as well to
* avoid two checks. But by all means @addr can never point into
* the guard page.
*/
return addr >= bot && addr < top;
}
NOKPROBE_SYMBOL(is_debug_stack);
#endif
dotraplinkage notrace void
......
......@@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE;
#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
......
......@@ -935,20 +935,27 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
return boot_error;
}
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
int common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
/* Initialize the interrupt stack(s) */
ret = irq_init_percpu_irqstack(cpu);
if (ret)
return ret;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
return 0;
}
/*
......@@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
common_cpu_up(cpu, tidle);
err = common_cpu_up(cpu, tidle);
if (err)
return err;
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
if (err) {
......
......@@ -403,7 +403,8 @@ SECTIONS
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);
/*
* Build-time check on the image size:
......@@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
. = ASSERT((irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
. = ASSERT((fixed_percpu_data == 0),
"fixed_percpu_data is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
......
......@@ -13,8 +13,8 @@
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
#endif
struct cpu_entry_area *get_cpu_entry_area(int cpu)
......@@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
static void __init percpu_setup_debug_store(int cpu)
static void __init percpu_setup_debug_store(unsigned int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
int npages;
unsigned int npages;
void *cea;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
......@@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
#endif
}
#ifdef CONFIG_X86_64
#define cea_map_stack(name) do { \
npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
cea_map_percpu_pages(cea->estacks.name## _stack, \
estacks->name## _stack, npages, PAGE_KERNEL); \
} while (0)
static void __init percpu_setup_exception_stacks(unsigned int cpu)
{
struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
unsigned int npages;
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
/*
* The exceptions stack mappings in the per cpu area are protected
* by guard pages so each stack must be mapped separately. DB2 is
* not mapped; it just exists to catch triple nesting of #DB.
*/
cea_map_stack(DF);
cea_map_stack(NMI);
cea_map_stack(DB1);
cea_map_stack(DB);
cea_map_stack(MCE);
}
#else
static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
#endif
/* Setup the fixmap mappings only once per-processor */
static void __init setup_cpu_entry_area(int cpu)
static void __init setup_cpu_entry_area(unsigned int cpu)
{
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
#ifdef CONFIG_X86_64
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
pgprot_t gdt_prot = PAGE_KERNEL_RO;
......@@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
pgprot_t tss_prot = PAGE_KERNEL;
#endif
cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
gdt_prot);
cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
cea_map_percpu_pages(&cea->entry_stack_page,
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);
......@@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
&per_cpu(cpu_tss_rw, cpu),
cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
#ifdef CONFIG_X86_32
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
per_cpu(cpu_entry_area, cpu) = cea;
#endif
#ifdef CONFIG_X86_64
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
BUILD_BUG_ON(sizeof(exception_stacks) !=
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
&per_cpu(exception_stacks, cpu),
sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
#endif
percpu_setup_exception_stacks(cpu);
percpu_setup_debug_store(cpu);
}
......
......@@ -28,6 +28,7 @@
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
#include <asm/cpu_entry_area.h> /* exception stack */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
......@@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
......
......@@ -754,7 +754,7 @@ static void percpu_init(void)
* __per_cpu_load
*
* The "gold" linker incorrectly associates:
* init_per_cpu__irq_stack_union
* init_per_cpu__fixed_percpu_data
* init_per_cpu__gdt_page
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
......
......@@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
common_cpu_up(cpu, idle);
rc = common_cpu_up(cpu, idle);
if (rc)
return rc;
xen_setup_runstate_info(cpu);
......
......@@ -40,13 +40,13 @@ ENTRY(startup_xen)
#ifdef CONFIG_X86_64
/* Set up %gs.
*
* The base of %gs always points to the bottom of the irqstack
* union. If the stack protector canary is enabled, it is
* located at %gs:40. Note that, on SMP, the boot cpu uses
* init data section till per cpu areas are set up.
* The base of %gs always points to fixed_percpu_data. If the
* stack protector canary is enabled, it is located at %gs:40.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
#endif
......
......@@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void)
#ifdef CONFIG_X86
if (xen_pv_domain()) {
irq_ctx_init(smp_processor_id());
if (xen_initial_domain())
pci_xen_initial_domain();
}
......
......@@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
}
#ifdef CONFIG_DEBUG_PAGEALLOC
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
unsigned long caller)
{
int size = cachep->object_size;
addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
if (size < 5 * sizeof(unsigned long))
return;
*addr++ = 0x12345678;
*addr++ = caller;
*addr++ = smp_processor_id();
size -= 3 * sizeof(unsigned long);
{
unsigned long *sptr = &caller;
unsigned long svalue;
while (!kstack_end(sptr)) {
svalue = *sptr++;
if (kernel_text_address(svalue)) {
*addr++ = svalue;
size -= sizeof(unsigned long);
if (size <= sizeof(unsigned long))
break;
}
}
}
*addr++ = 0x87654321;
}
static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
int map, unsigned long caller)
static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
{
if (!is_debug_pagealloc_cache(cachep))
return;
if (caller)
store_stackinfo(cachep, objp, caller);
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
}
#else
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
int map, unsigned long caller) {}
int map) {}
#endif
......@@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);
slab_kernel_map(cachep, objp, 1, 0);
slab_kernel_map(cachep, objp, 1);
}
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
......@@ -2433,7 +2397,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
slab_kernel_map(cachep, objp, 0, 0);
slab_kernel_map(cachep, objp, 0);
}
}
#endif
......@@ -2812,7 +2776,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
slab_kernel_map(cachep, objp, 0, caller);
slab_kernel_map(cachep, objp, 0);
}
return objp;
}
......@@ -3076,7 +3040,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
return objp;
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);
slab_kernel_map(cachep, objp, 1, 0);
slab_kernel_map(cachep, objp, 1);
poison_obj(cachep, objp, POISON_INUSE);
}
if (cachep->flags & SLAB_STORE_USER)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment