Commit 198d208d authored by Steven Rostedt's avatar Steven Rostedt Committed by H. Peter Anvin

x86: Keep thread_info on thread stack in x86_32

x86_64 uses a per_cpu variable kernel_stack to always point to
the thread stack of current. This is where the thread_info is stored
and is accessed from this location even when the irq or exception stack
is in use. This removes the complexity of having to maintain the
thread info on the stack when interrupts are running and having to
copy the preempt_count and other fields to the interrupt stack.

x86_32 uses the old method of copying the thread_info from the thread
stack to the exception stack just before executing the exception.

Having the two different requires #ifdefs and also the x86_32 way
is a bit of a pain to maintain. By converting x86_32 to the same
method of x86_64, we can remove #ifdefs, clean up the x86_32 code
a little, and remove the overhead of the copy.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Brian Gerst <brgerst@gmail.com>
Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20110806012354.263834829@goodmis.org
Link: http://lkml.kernel.org/r/20140206144321.852942014@goodmis.orgSigned-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 0788aa6a
...@@ -449,6 +449,15 @@ struct stack_canary { ...@@ -449,6 +449,15 @@ struct stack_canary {
}; };
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
/*
* per-CPU IRQ handling stacks
*/
struct irq_stack {
u32 stack[THREAD_SIZE/sizeof(u32)];
} __aligned(THREAD_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */ #endif /* X86_64 */
extern unsigned int xstate_size; extern unsigned int xstate_size;
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/percpu.h>
#include <asm/types.h> #include <asm/types.h>
/* /*
...@@ -34,12 +35,6 @@ struct thread_info { ...@@ -34,12 +35,6 @@ struct thread_info {
void __user *sysenter_return; void __user *sysenter_return;
unsigned int sig_on_uaccess_error:1; unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */ unsigned int uaccess_err:1; /* uaccess failed */
#ifdef CONFIG_X86_32
unsigned long previous_esp; /* ESP of the previous stack in
case of nested (IRQ) stacks
(Moved to end, to be removed soon)
*/
#endif
}; };
#define INIT_THREAD_INFO(tsk) \ #define INIT_THREAD_INFO(tsk) \
...@@ -153,9 +148,9 @@ struct thread_info { ...@@ -153,9 +148,9 @@ struct thread_info {
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#ifdef CONFIG_X86_32
#define STACK_WARN (THREAD_SIZE/8) #define STACK_WARN (THREAD_SIZE/8)
#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
/* /*
* macros/functions for gaining access to the thread information structure * macros/functions for gaining access to the thread information structure
* *
...@@ -163,38 +158,6 @@ struct thread_info { ...@@ -163,38 +158,6 @@ struct thread_info {
*/ */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#define current_stack_pointer ({ \
unsigned long sp; \
asm("mov %%esp,%0" : "=g" (sp)); \
sp; \
})
/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)
(current_stack_pointer & ~(THREAD_SIZE - 1));
}
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
movl $-THREAD_SIZE, reg; \
andl %esp, reg
#endif
#else /* X86_32 */
#include <asm/percpu.h>
#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
DECLARE_PER_CPU(unsigned long, kernel_stack); DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
...@@ -209,8 +172,8 @@ static inline struct thread_info *current_thread_info(void) ...@@ -209,8 +172,8 @@ static inline struct thread_info *current_thread_info(void)
/* how to get the thread information struct from ASM */ /* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \ #define GET_THREAD_INFO(reg) \
movq PER_CPU_VAR(kernel_stack),reg ; \ _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
/* /*
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
...@@ -220,8 +183,6 @@ static inline struct thread_info *current_thread_info(void) ...@@ -220,8 +183,6 @@ static inline struct thread_info *current_thread_info(void)
#endif #endif
#endif /* !X86_32 */
/* /*
* Thread-synchronous status. * Thread-synchronous status.
* *
......
...@@ -1078,6 +1078,10 @@ static __init int setup_disablecpuid(char *arg) ...@@ -1078,6 +1078,10 @@ static __init int setup_disablecpuid(char *arg)
} }
__setup("clearcpuid=", setup_disablecpuid); __setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
...@@ -1094,10 +1098,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = ...@@ -1094,10 +1098,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
EXPORT_PER_CPU_SYMBOL(current_task); EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
......
...@@ -16,11 +16,33 @@ ...@@ -16,11 +16,33 @@
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
static void *is_irq_stack(void *p, void *irq)
{
if (p < irq || p >= (irq + THREAD_SIZE))
return NULL;
return irq + THREAD_SIZE;
}
static void *is_hardirq_stack(unsigned long *stack, int cpu)
{
void *irq = per_cpu(hardirq_stack, cpu);
return is_irq_stack(stack, irq);
}
static void *is_softirq_stack(unsigned long *stack, int cpu)
{
void *irq = per_cpu(softirq_stack, cpu);
return is_irq_stack(stack, irq);
}
void dump_trace(struct task_struct *task, struct pt_regs *regs, void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data) const struct stacktrace_ops *ops, void *data)
{ {
const unsigned cpu = get_cpu();
int graph = 0; int graph = 0;
u32 *prev_esp; u32 *prev_esp;
...@@ -40,18 +62,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -40,18 +62,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
for (;;) { for (;;) {
struct thread_info *context; struct thread_info *context;
void *end_stack;
end_stack = is_hardirq_stack(stack, cpu);
if (!end_stack)
end_stack = is_softirq_stack(stack, cpu);
context = (struct thread_info *) context = task_thread_info(task);
((unsigned long)stack & (~(THREAD_SIZE - 1))); bp = ops->walk_stack(context, stack, bp, ops, data,
bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); end_stack, &graph);
/* Stop if not on irq stack */ /* Stop if not on irq stack */
if (task_stack_page(task) == context) if (!end_stack)
break; break;
/* The previous esp is just above the context */ /* The previous esp is saved on the bottom of the stack */
prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - prev_esp = (u32 *)(end_stack - THREAD_SIZE);
sizeof(long));
stack = (unsigned long *)*prev_esp; stack = (unsigned long *)*prev_esp;
if (!stack) if (!stack)
break; break;
...@@ -60,6 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -60,6 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
break; break;
touch_nmi_watchdog(); touch_nmi_watchdog();
} }
put_cpu();
} }
EXPORT_SYMBOL(dump_trace); EXPORT_SYMBOL(dump_trace);
......
...@@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; } ...@@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { } static inline void print_stack_overflow(void) { }
#endif #endif
/* DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
* per-CPU IRQ handling contexts (thread information and stack) DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
*/
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
} __attribute__((aligned(THREAD_SIZE)));
static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
static void call_on_stack(void *func, void *stack) static void call_on_stack(void *func, void *stack)
{ {
...@@ -77,14 +69,22 @@ static void call_on_stack(void *func, void *stack) ...@@ -77,14 +69,22 @@ static void call_on_stack(void *func, void *stack)
: "memory", "cc", "edx", "ecx", "eax"); : "memory", "cc", "edx", "ecx", "eax");
} }
/* how to get the current stack pointer from C */
register unsigned long current_stack_pointer asm("esp") __used;
static inline void *current_stack(void)
{
return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int static inline int
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
{ {
union irq_ctx *curctx, *irqctx; struct irq_stack *curstk, *irqstk;
u32 *isp, *prev_esp, arg1, arg2; u32 *isp, *prev_esp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info(); curstk = (struct irq_stack *) current_stack();
irqctx = __this_cpu_read(hardirq_ctx); irqstk = __this_cpu_read(hardirq_stack);
/* /*
* this is where we switch to the IRQ stack. However, if we are * this is where we switch to the IRQ stack. However, if we are
...@@ -92,15 +92,13 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) ...@@ -92,15 +92,13 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
* handler) we can't do that and just have to keep using the * handler) we can't do that and just have to keep using the
* current stack (which is the irq stack already after all) * current stack (which is the irq stack already after all)
*/ */
if (unlikely(curctx == irqctx)) if (unlikely(curstk == irqstk))
return 0; return 0;
/* build the stack frame on the IRQ stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
irqctx->tinfo.task = curctx->tinfo.task; /* Save the next esp at the bottom of the stack */
/* Save the next esp after thread_info */ prev_esp = (u32 *)irqstk;
prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) -
sizeof(long));
*prev_esp = current_stack_pointer; *prev_esp = current_stack_pointer;
if (unlikely(overflow)) if (unlikely(overflow))
...@@ -121,49 +119,39 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) ...@@ -121,49 +119,39 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
*/ */
void irq_ctx_init(int cpu) void irq_ctx_init(int cpu)
{ {
union irq_ctx *irqctx; struct irq_stack *irqstk;
if (per_cpu(hardirq_ctx, cpu)) if (per_cpu(hardirq_stack, cpu))
return; return;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP, THREADINFO_GFP,
THREAD_SIZE_ORDER)); THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); per_cpu(hardirq_stack, cpu) = irqstk;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(hardirq_ctx, cpu) = irqctx; irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP, THREADINFO_GFP,
THREAD_SIZE_ORDER)); THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); per_cpu(softirq_stack, cpu) = irqstk;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(softirq_ctx, cpu) = irqctx;
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
} }
void do_softirq_own_stack(void) void do_softirq_own_stack(void)
{ {
struct thread_info *curctx; struct thread_info *curstk;
union irq_ctx *irqctx; struct irq_stack *irqstk;
u32 *isp, *prev_esp; u32 *isp, *prev_esp;
curctx = current_thread_info(); curstk = current_stack();
irqctx = __this_cpu_read(softirq_ctx); irqstk = __this_cpu_read(softirq_stack);
irqctx->tinfo.task = curctx->task;
/* build the stack frame on the softirq stack */ /* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
/* Push the previous esp onto the stack */ /* Push the previous esp onto the stack */
prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - prev_esp = (u32 *)irqstk;
sizeof(long));
*prev_esp = current_stack_pointer; *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp); call_on_stack(__do_softirq, isp);
......
...@@ -314,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -314,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/ */
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
/* /*
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)
*/ */
......
...@@ -189,7 +189,7 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) ...@@ -189,7 +189,7 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
if (context == (sp & ~(THREAD_SIZE - 1))) if (context == (sp & ~(THREAD_SIZE - 1)))
return sp; return sp;
prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); prev_esp = (u32 *)(context);
if (prev_esp) if (prev_esp)
return (unsigned long)prev_esp; return (unsigned long)prev_esp;
......
...@@ -758,10 +758,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) ...@@ -758,10 +758,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
#else #else
clear_tsk_thread_flag(idle, TIF_FORK); clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu); initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) = per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) - (unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE; KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary; initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp; stack_start = idle->thread.sp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment