Commit 951c2a51 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/irq/64: Adjust the per CPU irq stack pointer by 8

The per CPU hardirq_stack_ptr contains the pointer to the irq stack in the
form that it is ready to be assigned to [ER]SP so that the first push ends
up on the top entry of the stack.

But the stack switching on 64 bit has the following rules:

    1) Store the current stack pointer (RSP) in the top most stack entry
       to allow the unwinder to link back to the previous stack

    2) Set RSP to the top most stack entry

    3) Invoke functions on the irq stack

    4) Pop RSP from the top most stack entry (stored in #1) so it's back
       to the original stack.

That requires all stack switching code to decrement the stored pointer by 8
in order to be able to store the current RSP and then set RSP to that
location. That's a pointless exercise.

Do the -8 adjustment right when storing the pointer and make the data type
a void pointer to avoid confusion vs. the struct irq_stack data type which
is on 64bit only used to declare the backing store. Move the definition
next to the inuse flag so they likely end up in the same cache
line. Sticking them into a struct to enforce it is a seperate change.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarKees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210210002512.354260928@linutronix.de
parent e7f89001
...@@ -23,7 +23,7 @@ static __always_inline void __run_on_irqstack(void (*func)(void)) ...@@ -23,7 +23,7 @@ static __always_inline void __run_on_irqstack(void (*func)(void))
void *tos = __this_cpu_read(hardirq_stack_ptr); void *tos = __this_cpu_read(hardirq_stack_ptr);
__this_cpu_write(hardirq_stack_inuse, true); __this_cpu_write(hardirq_stack_inuse, true);
asm_call_on_stack(tos - 8, func, NULL); asm_call_on_stack(tos, func, NULL);
__this_cpu_write(hardirq_stack_inuse, false); __this_cpu_write(hardirq_stack_inuse, false);
} }
...@@ -34,7 +34,7 @@ __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs), ...@@ -34,7 +34,7 @@ __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
void *tos = __this_cpu_read(hardirq_stack_ptr); void *tos = __this_cpu_read(hardirq_stack_ptr);
__this_cpu_write(hardirq_stack_inuse, true); __this_cpu_write(hardirq_stack_inuse, true);
asm_call_sysvec_on_stack(tos - 8, func, regs); asm_call_sysvec_on_stack(tos, func, regs);
__this_cpu_write(hardirq_stack_inuse, false); __this_cpu_write(hardirq_stack_inuse, false);
} }
...@@ -45,7 +45,7 @@ __run_irq_on_irqstack(void (*func)(struct irq_desc *desc), ...@@ -45,7 +45,7 @@ __run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
void *tos = __this_cpu_read(hardirq_stack_ptr); void *tos = __this_cpu_read(hardirq_stack_ptr);
__this_cpu_write(hardirq_stack_inuse, true); __this_cpu_write(hardirq_stack_inuse, true);
asm_call_irq_on_stack(tos - 8, func, desc); asm_call_irq_on_stack(tos, func, desc);
__this_cpu_write(hardirq_stack_inuse, false); __this_cpu_write(hardirq_stack_inuse, false);
} }
......
...@@ -426,8 +426,6 @@ struct irq_stack { ...@@ -426,8 +426,6 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE]; char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE); } __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else #else
...@@ -454,6 +452,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) ...@@ -454,6 +452,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
} }
DECLARE_PER_CPU(void *, hardirq_stack_ptr);
DECLARE_PER_CPU(bool, hardirq_stack_inuse); DECLARE_PER_CPU(bool, hardirq_stack_inuse);
extern asmlinkage void ignore_sysret(void); extern asmlinkage void ignore_sysret(void);
...@@ -473,9 +472,9 @@ struct stack_canary { ...@@ -473,9 +472,9 @@ struct stack_canary {
}; };
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
/* Per CPU softirq stack pointer */ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */ #endif /* !X86_64 */
extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_kernel_xstate_size;
extern unsigned int fpu_user_xstate_size; extern unsigned int fpu_user_xstate_size;
......
...@@ -1739,7 +1739,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = ...@@ -1739,7 +1739,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
EXPORT_PER_CPU_SYMBOL(current_task); EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); DEFINE_PER_CPU(void *, hardirq_stack_ptr);
DEFINE_PER_CPU(bool, hardirq_stack_inuse); DEFINE_PER_CPU(bool, hardirq_stack_inuse);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
......
...@@ -128,12 +128,21 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac ...@@ -128,12 +128,21 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{ {
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); unsigned long *begin;
/* /*
* This is a software stack, so 'end' can be a valid stack pointer. * @end points directly to the top most stack entry to avoid a -8
* It just means the stack is empty. * adjustment in the stack switch hotpath. Adjust it back before
* calculating @begin.
*/
end++;
begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* Due to the switching logic RSP can never be == @end because the
* final operation is 'popq %rsp' which means after that RSP points
* to the original stack and not to @end.
*/ */
if (stack < begin || stack >= end) if (stack < begin || stack >= end)
return false; return false;
...@@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info ...@@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info
info->end = end; info->end = end;
/* /*
* The next stack pointer is the first thing pushed by the entry code * The next stack pointer is stored at the top of the irq stack
* after switching to the irq stack. * before switching to the irq stack. Actual stack entries are all
* below that.
*/ */
info->next_sp = (unsigned long *)*(end - 1); info->next_sp = (unsigned long *)*(end - 1);
......
...@@ -48,7 +48,8 @@ static int map_irq_stack(unsigned int cpu) ...@@ -48,7 +48,8 @@ static int map_irq_stack(unsigned int cpu)
if (!va) if (!va)
return -ENOMEM; return -ENOMEM;
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; /* Store actual TOS to avoid adjustment in the hotpath */
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0; return 0;
} }
#else #else
...@@ -60,7 +61,8 @@ static int map_irq_stack(unsigned int cpu) ...@@ -60,7 +61,8 @@ static int map_irq_stack(unsigned int cpu)
{ {
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; /* Store actual TOS to avoid adjustment in the hotpath */
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0; return 0;
} }
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment