Commit a7fcf28d authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar

x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32

I broke 32-bit kernels.  The implementation of sp0 was correct
as far as I can tell, but sp0 was much weirder on x86_32 than I
realized.  It has the following issues:

 - Init's sp0 is inconsistent with everything else's: non-init tasks
   are offset by 8 bytes.  (I have no idea why, and the comment is unhelpful.)

 - vm86 does crazy things to sp0.

Fix it up by replacing this_cpu_sp0() with
current_top_of_stack() and using a new percpu variable to track
the top of the stack on x86_32.
Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 75182b16 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()")
Link: http://lkml.kernel.org/r/d09dbe270883433776e0cbee3c7079433349e96d.1425692936.git.luto@amacapital.netSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent b27559a4
...@@ -284,6 +284,10 @@ struct tss_struct { ...@@ -284,6 +284,10 @@ struct tss_struct {
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#endif
/* /*
* Save the original ist values for checking stack pointers during debugging * Save the original ist values for checking stack pointers during debugging
*/ */
...@@ -564,9 +568,14 @@ static inline void native_swapgs(void) ...@@ -564,9 +568,14 @@ static inline void native_swapgs(void)
#endif #endif
} }
static inline unsigned long this_cpu_sp0(void) static inline unsigned long current_top_of_stack(void)
{ {
#ifdef CONFIG_X86_64
return this_cpu_read_stable(cpu_tss.x86_tss.sp0); return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
#else
/* sp0 on x86_32 is special in and around vm86 mode. */
return this_cpu_read_stable(cpu_current_top_of_stack);
#endif
} }
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
......
...@@ -158,9 +158,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); ...@@ -158,9 +158,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
{ {
struct thread_info *ti; return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
ti = (void *)(this_cpu_sp0() - THREAD_SIZE);
return ti;
} }
static inline unsigned long current_stack_pointer(void) static inline unsigned long current_stack_pointer(void)
......
...@@ -1130,8 +1130,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union, ...@@ -1130,8 +1130,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible; irq_stack_union) __aligned(PAGE_SIZE) __visible;
/* /*
* The following four percpu variables are hot. Align current_task to * The following percpu variables are hot. Align current_task to
* cacheline size such that all four fall in the same cacheline. * cacheline size such that they fall in the same cacheline.
*/ */
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
...@@ -1226,6 +1226,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; ...@@ -1226,6 +1226,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count); EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
......
...@@ -306,13 +306,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -306,13 +306,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
/* /*
* Reload esp0. This changes current_thread_info(). * Reload esp0, kernel_stack, and current_top_of_stack. This changes
* current_thread_info().
*/ */
load_sp0(tss, next); load_sp0(tss, next);
this_cpu_write(kernel_stack, this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET); THREAD_SIZE - KERNEL_STACK_OFFSET);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
/* /*
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)
......
...@@ -806,6 +806,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) ...@@ -806,6 +806,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */ /* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu); irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else #else
clear_tsk_thread_flag(idle, TIF_FORK); clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu); initial_gs = per_cpu_offset(cpu);
......
...@@ -174,8 +174,8 @@ void ist_begin_non_atomic(struct pt_regs *regs) ...@@ -174,8 +174,8 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* will catch asm bugs and any attempt to use ist_preempt_enable * will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault. * from double_fault.
*/ */
BUG_ON((unsigned long)(this_cpu_sp0() - current_stack_pointer()) >= BUG_ON((unsigned long)(current_top_of_stack() -
THREAD_SIZE); current_stack_pointer()) >= THREAD_SIZE);
preempt_count_sub(HARDIRQ_OFFSET); preempt_count_sub(HARDIRQ_OFFSET);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment