Commit c02433dd authored by Mark Rutland's avatar Mark Rutland Committed by Catalin Marinas

arm64: split thread_info from task stack

This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.

Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.

This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.

Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).

Both secondary entry and idle are updated to stash the sp and task
pointer separately.
Signed-off-by: default avatarMark Rutland <mark.rutland@arm.com>
Tested-by: default avatarLaura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 1b7e2296
...@@ -109,6 +109,7 @@ config ARM64 ...@@ -109,6 +109,7 @@ config ARM64
select POWER_SUPPLY select POWER_SUPPLY
select SPARSE_IRQ select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
help help
ARM 64-bit (AArch64) Linux support. ARM 64-bit (AArch64) Linux support.
......
generic-y += bugs.h generic-y += bugs.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h generic-y += cputime.h
generic-y += current.h
generic-y += delay.h generic-y += delay.h
generic-y += div64.h generic-y += div64.h
generic-y += dma.h generic-y += dma.h
......
#ifndef __ASM_CURRENT_H
#define __ASM_CURRENT_H
#include <linux/compiler.h>
#include <asm/sysreg.h>
#ifndef __ASSEMBLY__
struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
return (struct task_struct *)read_sysreg(sp_el0);
}
#define current get_current()
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CURRENT_H */
...@@ -82,6 +82,7 @@ asmlinkage void secondary_start_kernel(void); ...@@ -82,6 +82,7 @@ asmlinkage void secondary_start_kernel(void);
*/ */
struct secondary_data { struct secondary_data {
void *stack; void *stack;
struct task_struct *task;
long status; long status;
}; };
......
...@@ -47,41 +47,17 @@ typedef unsigned long mm_segment_t; ...@@ -47,41 +47,17 @@ typedef unsigned long mm_segment_t;
struct thread_info { struct thread_info {
unsigned long flags; /* low level flags */ unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */ mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
int preempt_count; /* 0 => preemptable, <0 => bug */ int preempt_count; /* 0 => preemptable, <0 => bug */
int cpu; /* cpu */
}; };
#define INIT_THREAD_INFO(tsk) \ #define INIT_THREAD_INFO(tsk) \
{ \ { \
.task = &tsk, \
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \ .preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \ .addr_limit = KERNEL_DS, \
} }
#define init_stack (init_thread_union.stack) #define init_stack (init_thread_union.stack)
/*
* how to get the thread information struct from C
*/
static inline struct thread_info *current_thread_info(void) __attribute_const__;
/*
* struct thread_info can be accessed directly via sp_el0.
*
* We don't use read_sysreg() as we want the compiler to cache the value where
* possible.
*/
static inline struct thread_info *current_thread_info(void)
{
unsigned long sp_el0;
asm ("mrs %0, sp_el0" : "=r" (sp_el0));
return (struct thread_info *)sp_el0;
}
#define thread_saved_pc(tsk) \ #define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc)) ((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \ #define thread_saved_sp(tsk) \
......
...@@ -36,9 +36,10 @@ int main(void) ...@@ -36,9 +36,10 @@ int main(void)
{ {
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK(); BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
BLANK(); BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK(); BLANK();
...@@ -121,6 +122,7 @@ int main(void) ...@@ -121,6 +122,7 @@ int main(void)
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK(); BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK(); BLANK();
#ifdef CONFIG_KVM_ARM_HOST #ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
......
...@@ -90,9 +90,8 @@ ...@@ -90,9 +90,8 @@
.if \el == 0 .if \el == 0
mrs x21, sp_el0 mrs x21, sp_el0
mov tsk, sp ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling. disable_step_tsk x19, x20 // exceptions when scheduling.
mov x29, xzr // fp pointed to user-space mov x29, xzr // fp pointed to user-space
...@@ -100,10 +99,10 @@ ...@@ -100,10 +99,10 @@
add x21, sp, #S_FRAME_SIZE add x21, sp, #S_FRAME_SIZE
get_thread_info tsk get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
ldr x20, [tsk, #TI_ADDR_LIMIT] ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64 mov x20, #TASK_SIZE_64
str x20, [tsk, #TI_ADDR_LIMIT] str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */ .endif /* \el == 0 */
mrs x22, elr_el1 mrs x22, elr_el1
...@@ -139,7 +138,7 @@ ...@@ -139,7 +138,7 @@
.if \el != 0 .if \el != 0
/* Restore the task's original addr_limit. */ /* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT] ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
str x20, [tsk, #TI_ADDR_LIMIT] str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */ /* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif .endif
...@@ -192,13 +191,14 @@ alternative_else_nop_endif ...@@ -192,13 +191,14 @@ alternative_else_nop_endif
mov x19, sp // preserve the original sp mov x19, sp // preserve the original sp
/* /*
* Compare sp with the current thread_info, if the top * Compare sp with the base of the task stack.
* ~(THREAD_SIZE - 1) bits match, we are on a task stack, and * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
* should switch to the irq stack. * and should switch to the irq stack.
*/ */
and x25, x19, #~(THREAD_SIZE - 1) ldr x25, [tsk, TSK_STACK]
cmp x25, tsk eor x25, x25, x19
b.ne 9998f and x25, x25, #~(THREAD_SIZE - 1)
cbnz x25, 9998f
adr_this_cpu x25, irq_stack, x26 adr_this_cpu x25, irq_stack, x26
mov x26, #IRQ_STACK_START_SP mov x26, #IRQ_STACK_START_SP
...@@ -427,9 +427,9 @@ el1_irq: ...@@ -427,9 +427,9 @@ el1_irq:
irq_handler irq_handler
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
ldr w24, [tsk, #TI_PREEMPT] // get preempt count ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
cbnz w24, 1f // preempt count != 0 cbnz w24, 1f // preempt count != 0
ldr x0, [tsk, #TI_FLAGS] // get flags ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
bl el1_preempt bl el1_preempt
1: 1:
...@@ -444,7 +444,7 @@ ENDPROC(el1_irq) ...@@ -444,7 +444,7 @@ ENDPROC(el1_irq)
el1_preempt: el1_preempt:
mov x24, lr mov x24, lr
1: bl preempt_schedule_irq // irq en/disable is done inside 1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24 ret x24
#endif #endif
...@@ -674,8 +674,7 @@ ENTRY(cpu_switch_to) ...@@ -674,8 +674,7 @@ ENTRY(cpu_switch_to)
ldp x29, x9, [x8], #16 ldp x29, x9, [x8], #16
ldr lr, [x8] ldr lr, [x8]
mov sp, x9 mov sp, x9
and x9, x9, #~(THREAD_SIZE - 1) msr sp_el0, x1
msr sp_el0, x9
ret ret
ENDPROC(cpu_switch_to) ENDPROC(cpu_switch_to)
...@@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to) ...@@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to)
ret_fast_syscall: ret_fast_syscall:
disable_irq // disable interrupts disable_irq // disable interrupts
str x0, [sp, #S_X0] // returned x0 str x0, [sp, #S_X0] // returned x0
ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK and x2, x1, #_TIF_SYSCALL_WORK
cbnz x2, ret_fast_syscall_trace cbnz x2, ret_fast_syscall_trace
and x2, x1, #_TIF_WORK_MASK and x2, x1, #_TIF_WORK_MASK
...@@ -706,14 +705,14 @@ work_pending: ...@@ -706,14 +705,14 @@ work_pending:
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace bl trace_hardirqs_on // enabled while in userspace
#endif #endif
ldr x1, [tsk, #TI_FLAGS] // re-check for single-step ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user b finish_ret_to_user
/* /*
* "slow" syscall return path. * "slow" syscall return path.
*/ */
ret_to_user: ret_to_user:
disable_irq // disable interrupts disable_irq // disable interrupts
ldr x1, [tsk, #TI_FLAGS] ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending cbnz x2, work_pending
finish_ret_to_user: finish_ret_to_user:
...@@ -746,7 +745,7 @@ el0_svc_naked: // compat entry point ...@@ -746,7 +745,7 @@ el0_svc_naked: // compat entry point
enable_dbg_and_irq enable_dbg_and_irq
ct_user_exit 1 ct_user_exit 1
ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
tst x16, #_TIF_SYSCALL_WORK tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace b.ne __sys_trace
cmp scno, sc_nr // check upper syscall limit cmp scno, sc_nr // check upper syscall limit
......
...@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables) ...@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
__primary_switched: __primary_switched:
adrp x4, init_thread_union adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE add sp, x4, #THREAD_SIZE
msr sp_el0, x4 // Save thread_info adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
adr_l x8, vectors // load VBAR_EL1 with virtual adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address msr vbar_el1, x8 // vector table address
...@@ -699,10 +700,10 @@ __secondary_switched: ...@@ -699,10 +700,10 @@ __secondary_switched:
isb isb
adr_l x0, secondary_data adr_l x0, secondary_data
ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0 mov sp, x1
and x0, x0, #~(THREAD_SIZE - 1) ldr x2, [x0, #CPU_BOOT_TASK]
msr sp_el0, x0 // save thread_info msr sp_el0, x2
mov x29, #0 mov x29, #0
b secondary_start_kernel b secondary_start_kernel
ENDPROC(__secondary_switched) ENDPROC(__secondary_switched)
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/personality.h> #include <linux/personality.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include <linux/percpu.h>
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/compat.h> #include <asm/compat.h>
...@@ -321,6 +322,20 @@ void uao_thread_switch(struct task_struct *next) ...@@ -321,6 +322,20 @@ void uao_thread_switch(struct task_struct *next)
} }
} }
/*
* We store our current task in sp_el0, which is clobbered by userspace. Keep a
* shadow copy so that we can restore this upon entry from userspace.
*
* This is *only* for exception entry from EL0, and is not valid until we
* __switch_to() a user task.
*/
DEFINE_PER_CPU(struct task_struct *, __entry_task);
static void entry_task_switch(struct task_struct *next)
{
__this_cpu_write(__entry_task, next);
}
/* /*
* Thread switching. * Thread switching.
*/ */
...@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev, ...@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next); tls_thread_switch(next);
hw_breakpoint_thread_switch(next); hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next); contextidr_thread_switch(next);
entry_task_switch(next);
uao_thread_switch(next); uao_thread_switch(next);
/* /*
......
...@@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) ...@@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* We need to tell the secondary core where to find its stack and the * We need to tell the secondary core where to find its stack and the
* page tables. * page tables.
*/ */
secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF); update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data)); __flush_dcache_area(&secondary_data, sizeof(secondary_data));
...@@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) ...@@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret); pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
} }
secondary_data.task = NULL;
secondary_data.stack = NULL; secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status); status = READ_ONCE(secondary_data.status);
if (ret && status) { if (ret && status) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment