Commit 082af5ec authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/scs' into for-next/core

Support for Clang's Shadow Call Stack in the kernel
(Sami Tolvanen and Will Deacon)
* for-next/scs:
  arm64: entry-ftrace.S: Update comment to indicate that x18 is live
  scs: Move DEFINE_SCS macro into core code
  scs: Remove references to asm/scs.h from core code
  scs: Move scs_overflow_check() out of architecture code
  arm64: scs: Use 'scs_sp' register alias for x18
  scs: Move accounting into alloc/free functions
  arm64: scs: Store absolute SCS stack pointer value in thread_info
  efi/libstub: Disable Shadow Call Stack
  arm64: scs: Add shadow stacks for SDEI
  arm64: Implement Shadow Call Stack
  arm64: Disable SCS for hypervisor code
  arm64: vdso: Disable Shadow Call Stack
  arm64: efi: Restore register x18 if it was corrupted
  arm64: Preserve register x18 when CPU is suspended
  arm64: Reserve register x18 from general allocation with SCS
  scs: Disable when function graph tracing is enabled
  scs: Add support for stack usage debugging
  scs: Add page accounting for shadow call stack allocations
  scs: Add support for Clang's Shadow Call Stack (SCS)
parents c350717e 258c3d62
...@@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH ...@@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif endif
ifdef CONFIG_SHADOW_CALL_STACK
CC_FLAGS_SCS := -fsanitize=shadow-call-stack
KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
# arch Makefile may override CC so keep this after arch Makefile is included # arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
......
...@@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG ...@@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code about 20% of all kernel functions, which increases the kernel code
size by about 2%. size by about 2%.
config ARCH_SUPPORTS_SHADOW_CALL_STACK
bool
help
An architecture should select this if it supports Clang's Shadow
Call Stack and implements runtime support for shadow stack
switching.
config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
help
This option enables Clang's Shadow Call Stack, which uses a
shadow stack to protect function return addresses from being
overwritten by an attacker. More information can be found in
Clang's documentation:
https://clang.llvm.org/docs/ShadowCallStack.html
Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks in memory, which means an attacker capable of
reading and writing arbitrary memory may be able to locate them
and hijack control flow by modifying the stacks.
config HAVE_ARCH_WITHIN_STACK_FRAMES config HAVE_ARCH_WITHIN_STACK_FRAMES
bool bool
help help
......
...@@ -68,6 +68,7 @@ config ARM64 ...@@ -68,6 +68,7 @@ config ARM64
select ARCH_USE_QUEUED_SPINLOCKS select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_SYM_ANNOTATIONS select ARCH_USE_SYM_ANNOTATIONS
select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_NUMA_BALANCING
...@@ -1026,6 +1027,10 @@ config ARCH_HAS_CACHE_LINE_SIZE ...@@ -1026,6 +1027,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2 def_bool y if PGTABLE_LEVELS > 2
# Supported by clang >= 7.0
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
config SECCOMP config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode" bool "Enable seccomp to safely compute untrusted bytecode"
---help--- ---help---
......
...@@ -87,6 +87,10 @@ endif ...@@ -87,6 +87,10 @@ endif
KBUILD_CFLAGS += $(branch-prot-flags-y) KBUILD_CFLAGS += $(branch-prot-flags-y)
ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
KBUILD_CFLAGS += -ffixed-x18
endif
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__ CHECKFLAGS += -D__AARCH64EB__
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace #define __hyp_text __section(.hyp.text) notrace __noscs
#define read_sysreg_elx(r,nvh,vh) \ #define read_sysreg_elx(r,nvh,vh) \
({ \ ({ \
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SCS_H
#define _ASM_SCS_H
#ifdef __ASSEMBLY__
#include <asm/asm-offsets.h>
#ifdef CONFIG_SHADOW_CALL_STACK
scs_sp .req x18
.macro scs_load tsk, tmp
ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
.macro scs_save tsk, tmp
str scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
#else
.macro scs_load tsk, tmp
.endm
.macro scs_save tsk, tmp
.endm
#endif /* CONFIG_SHADOW_CALL_STACK */
#endif /* __ASSEMBLY __ */
#endif /* _ASM_SCS_H */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H #ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H #define __ASM_SUSPEND_H
#define NR_CTX_REGS 12 #define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12 #define NR_CALLEE_SAVED_REGS 12
/* /*
......
...@@ -41,6 +41,10 @@ struct thread_info { ...@@ -41,6 +41,10 @@ struct thread_info {
#endif #endif
} preempt; } preempt;
}; };
#ifdef CONFIG_SHADOW_CALL_STACK
void *scs_base;
void *scs_sp;
#endif
}; };
#define thread_saved_pc(tsk) \ #define thread_saved_pc(tsk) \
...@@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk); ...@@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk);
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_SYSCALL_EMU) _TIF_SYSCALL_EMU)
#ifdef CONFIG_SHADOW_CALL_STACK
#define INIT_SCS \
.scs_base = init_shadow_call_stack, \
.scs_sp = init_shadow_call_stack,
#else
#define INIT_SCS
#endif
#define INIT_THREAD_INFO(tsk) \ #define INIT_THREAD_INFO(tsk) \
{ \ { \
.flags = _TIF_FOREIGN_FPSTATE, \ .flags = _TIF_FOREIGN_FPSTATE, \
.preempt_count = INIT_PREEMPT_COUNT, \ .preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \ .addr_limit = KERNEL_DS, \
INIT_SCS \
} }
#endif /* __ASM_THREAD_INFO_H */ #endif /* __ASM_THREAD_INFO_H */
...@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o ...@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-y += vdso/ probes/ obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/
......
...@@ -33,6 +33,10 @@ int main(void) ...@@ -33,6 +33,10 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN #ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
#ifdef CONFIG_SHADOW_CALL_STACK
DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base));
DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp));
#endif #endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR #ifdef CONFIG_STACKPROTECTOR
......
...@@ -34,5 +34,14 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ...@@ -34,5 +34,14 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32 ldp x29, x30, [sp], #32
b.ne 0f b.ne 0f
ret ret
0: b efi_handle_corrupted_x18 // tail call 0:
/*
* With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
* shadow stack pointer, which we need to restore before returning to
* potentially instrumented code. This is safe because the wrapper is
* called with preemption disabled and a separate shadow stack is used
* for interrupts.
*/
mov x18, x2
b efi_handle_corrupted_x18 // tail call
SYM_FUNC_END(__efi_rt_asm_wrapper) SYM_FUNC_END(__efi_rt_asm_wrapper)
...@@ -23,8 +23,9 @@ ...@@ -23,8 +23,9 @@
* *
* ... where <entry> is either ftrace_caller or ftrace_regs_caller. * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
* *
* Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
* live, and x9-x18 are safe to clobber. * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
* clobber.
* *
* We save the callsite's context into a pt_regs before invoking any ftrace * We save the callsite's context into a pt_regs before invoking any ftrace
* callbacks. So that we can get a sensible backtrace, we create a stack record * callbacks. So that we can get a sensible backtrace, we create a stack record
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/scs.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/asm-uaccess.h> #include <asm/asm-uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -179,6 +180,8 @@ alternative_cb_end ...@@ -179,6 +180,8 @@ alternative_cb_end
apply_ssbd 1, x22, x23 apply_ssbd 1, x22, x23
ptrauth_keys_install_kernel tsk, x20, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23
scs_load tsk, x20
.else .else
add x21, sp, #S_FRAME_SIZE add x21, sp, #S_FRAME_SIZE
get_current_task tsk get_current_task tsk
...@@ -343,6 +346,8 @@ alternative_else_nop_endif ...@@ -343,6 +346,8 @@ alternative_else_nop_endif
msr cntkctl_el1, x1 msr cntkctl_el1, x1
4: 4:
#endif #endif
scs_save tsk, x0
/* No kernel C function calls after this as user keys are set. */ /* No kernel C function calls after this as user keys are set. */
ptrauth_keys_install_user tsk, x0, x1, x2 ptrauth_keys_install_user tsk, x0, x1, x2
...@@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 ...@@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
.macro irq_stack_entry .macro irq_stack_entry
mov x19, sp // preserve the original sp mov x19, sp // preserve the original sp
#ifdef CONFIG_SHADOW_CALL_STACK
mov x24, scs_sp // preserve the original shadow stack
#endif
/* /*
* Compare sp with the base of the task stack. * Compare sp with the base of the task stack.
...@@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 ...@@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
/* switch to the irq stack */ /* switch to the irq stack */
mov sp, x26 mov sp, x26
#ifdef CONFIG_SHADOW_CALL_STACK
/* also switch to the irq shadow stack */
adr_this_cpu scs_sp, irq_shadow_call_stack, x26
#endif
9998: 9998:
.endm .endm
/* /*
* x19 should be preserved between irq_stack_entry and * The callee-saved regs (x19-x29) should be preserved between
* irq_stack_exit. * irq_stack_entry and irq_stack_exit, but note that kernel_entry
* uses x20-x23 to store data for later use.
*/ */
.macro irq_stack_exit .macro irq_stack_exit
mov sp, x19 mov sp, x19
#ifdef CONFIG_SHADOW_CALL_STACK
mov scs_sp, x24
#endif
.endm .endm
/* GPRs used by entry code */ /* GPRs used by entry code */
...@@ -902,6 +920,8 @@ SYM_FUNC_START(cpu_switch_to) ...@@ -902,6 +920,8 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9 mov sp, x9
msr sp_el0, x1 msr sp_el0, x1
ptrauth_keys_install_kernel x1, x8, x9, x10 ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0, x8
scs_load x1, x8
ret ret
SYM_FUNC_END(cpu_switch_to) SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to) NOKPROBE(cpu_switch_to)
...@@ -1030,13 +1050,16 @@ SYM_CODE_START(__sdei_asm_handler) ...@@ -1030,13 +1050,16 @@ SYM_CODE_START(__sdei_asm_handler)
mov x19, x1 mov x19, x1
#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
#endif
#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_VMAP_STACK
/* /*
* entry.S may have been using sp as a scratch register, find whether * entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate * this is a normal or critical event and switch to the appropriate
* stack for this CPU. * stack for this CPU.
*/ */
ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f b 2f
...@@ -1046,6 +1069,15 @@ SYM_CODE_START(__sdei_asm_handler) ...@@ -1046,6 +1069,15 @@ SYM_CODE_START(__sdei_asm_handler)
mov sp, x5 mov sp, x5
#endif #endif
#ifdef CONFIG_SHADOW_CALL_STACK
/* Use a separate shadow call stack for normal and critical events */
cbnz w4, 3f
adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6
b 4f
3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6
4:
#endif
/* /*
* We may have interrupted userspace, or a guest, or exit-from or * We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it. * return-to either of these. We can't trust sp_el0, restore it.
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <asm/pgtable-hwdef.h> #include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/scs.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
...@@ -433,6 +434,10 @@ SYM_FUNC_START_LOCAL(__primary_switched) ...@@ -433,6 +434,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
stp xzr, x30, [sp, #-16]! stp xzr, x30, [sp, #-16]!
mov x29, sp mov x29, sp
#ifdef CONFIG_SHADOW_CALL_STACK
adr_l scs_sp, init_shadow_call_stack // Set shadow call stack
#endif
str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between ldr_l x4, kimage_vaddr // Save the offset between
...@@ -745,6 +750,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ...@@ -745,6 +750,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
ldr x2, [x0, #CPU_BOOT_TASK] ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow cbz x2, __secondary_too_slow
msr sp_el0, x2 msr sp_el0, x2
scs_load x2, x3
mov x29, #0 mov x29, #0
mov x30, #0 mov x30, #0
......
// SPDX-License-Identifier: GPL-2.0
/*
* Shadow Call Stack support.
*
* Copyright (C) 2019 Google LLC
*/
#include <linux/percpu.h>
#include <linux/scs.h>
DEFINE_SCS(irq_shadow_call_stack);
#ifdef CONFIG_ARM_SDE_INTERFACE
DEFINE_SCS(sdei_shadow_call_stack_normal);
DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif
...@@ -29,7 +29,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ ...@@ -29,7 +29,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -DDISABLE_BRANCH_PROFILING
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO) KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n KASAN_SANITIZE := n
UBSAN_SANITIZE := n UBSAN_SANITIZE := n
......
...@@ -58,6 +58,8 @@ ...@@ -58,6 +58,8 @@
* cpu_do_suspend - save CPU registers context * cpu_do_suspend - save CPU registers context
* *
* x0: virtual address of context pointer * x0: virtual address of context pointer
*
* This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/ */
SYM_FUNC_START(cpu_do_suspend) SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0 mrs x2, tpidr_el0
...@@ -82,6 +84,11 @@ alternative_endif ...@@ -82,6 +84,11 @@ alternative_endif
stp x8, x9, [x0, #48] stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64] stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80] stp x12, x13, [x0, #80]
/*
* Save x18 as it may be used as a platform register, e.g. by shadow
* call stack.
*/
str x18, [x0, #96]
ret ret
SYM_FUNC_END(cpu_do_suspend) SYM_FUNC_END(cpu_do_suspend)
...@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume) ...@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48] ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64] ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80] ldp x13, x14, [x0, #80]
/*
* Restore x18, as it may be used as a platform register, and clear
* the buffer to minimize the risk of exposure when used for shadow
* call stack.
*/
ldr x18, [x0, #96]
str xzr, [x0, #96]
msr tpidr_el0, x2 msr tpidr_el0, x2
msr tpidrro_el0, x3 msr tpidrro_el0, x3
msr contextidr_el1, x4 msr contextidr_el1, x4
......
...@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n" "Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n" "Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n" "Node %d KernelStack: %8lu kB\n"
#ifdef CONFIG_SHADOW_CALL_STACK
"Node %d ShadowCallStack:%8lu kB\n"
#endif
"Node %d PageTables: %8lu kB\n" "Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n" "Node %d Bounce: %8lu kB\n"
...@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram), nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
......
...@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ ...@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \ $(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS -D__DISABLE_EXPORTS
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
GCOV_PROFILE := n GCOV_PROFILE := n
KASAN_SANITIZE := n KASAN_SANITIZE := n
UBSAN_SANITIZE := n UBSAN_SANITIZE := n
......
...@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) ...@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim); show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n", seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB)); global_zone_page_state(NR_KERNEL_STACK_KB));
#ifdef CONFIG_SHADOW_CALL_STACK
seq_printf(m, "ShadowCallStack:%8lu kB\n",
global_zone_page_state(NR_KERNEL_SCS_KB));
#endif
show_val_kb(m, "PageTables: ", show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE)); global_zone_page_state(NR_PAGETABLE));
......
...@@ -42,3 +42,7 @@ ...@@ -42,3 +42,7 @@
* compilers, like ICC. * compilers, like ICC.
*/ */
#define barrier() __asm__ __volatile__("" : : : "memory") #define barrier() __asm__ __volatile__("" : : : "memory")
#if __has_feature(shadow_call_stack)
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
...@@ -193,6 +193,10 @@ struct ftrace_likely_data { ...@@ -193,6 +193,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end # define randomized_struct_fields_end
#endif #endif
#ifndef __noscs
# define __noscs
#endif
#ifndef asm_volatile_goto #ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x) #define asm_volatile_goto(x...) asm goto(x)
#endif #endif
......
...@@ -156,6 +156,9 @@ enum zone_stat_item { ...@@ -156,6 +156,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */ NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */ NR_KERNEL_STACK_KB, /* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
/* Second 128 byte cacheline */ /* Second 128 byte cacheline */
NR_BOUNCE, NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC) #if IS_ENABLED(CONFIG_ZSMALLOC)
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shadow Call Stack support.
*
* Copyright (C) 2019 Google LLC
*/
#ifndef _LINUX_SCS_H
#define _LINUX_SCS_H
#include <linux/gfp.h>
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/sizes.h>
#ifdef CONFIG_SHADOW_CALL_STACK
/*
* In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
* architecture) provided ~40% safety margin on stack usage while keeping
* memory allocation overhead reasonable.
*/
#define SCS_SIZE SZ_1K
#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
/* An illegal pointer value to mark the end of the shadow stack. */
#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
/* Allocate a static per-CPU shadow stack */
#define DEFINE_SCS(name) \
DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
#define task_scs(tsk) (task_thread_info(tsk)->scs_base)
#define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp)
void scs_init(void);
int scs_prepare(struct task_struct *tsk, int node);
void scs_release(struct task_struct *tsk);
static inline void scs_task_reset(struct task_struct *tsk)
{
/*
* Reset the shadow stack to the base address in case the task
* is reused.
*/
task_scs_sp(tsk) = task_scs(tsk);
}
static inline unsigned long *__scs_magic(void *s)
{
return (unsigned long *)(s + SCS_SIZE) - 1;
}
static inline bool task_scs_end_corrupted(struct task_struct *tsk)
{
unsigned long *magic = __scs_magic(task_scs(tsk));
unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);
return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
}
#else /* CONFIG_SHADOW_CALL_STACK */
static inline void scs_init(void) {}
static inline void scs_task_reset(struct task_struct *tsk) {}
static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
static inline void scs_release(struct task_struct *tsk) {}
static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; }
#endif /* CONFIG_SHADOW_CALL_STACK */
#endif /* _LINUX_SCS_H */
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/scs.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = { ...@@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = {
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
}; };
#ifdef CONFIG_SHADOW_CALL_STACK
unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
__init_task_data = {
[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
};
#endif
/* /*
* Set up the first task table, touch at your own risk!. Base=0, * Set up the first task table, touch at your own risk!. Base=0,
* limit=0x1fffff (=2MB) * limit=0x1fffff (=2MB)
......
...@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ ...@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_BPF) += bpf/
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_PERF_EVENTS) += events/ obj-$(CONFIG_PERF_EVENTS) += events/
......
...@@ -94,6 +94,7 @@ ...@@ -94,6 +94,7 @@
#include <linux/thread_info.h> #include <linux/thread_info.h>
#include <linux/stackleak.h> #include <linux/stackleak.h>
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/scs.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk) ...@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk) void free_task(struct task_struct *tsk)
{ {
scs_release(tsk);
#ifndef CONFIG_THREAD_INFO_IN_TASK #ifndef CONFIG_THREAD_INFO_IN_TASK
/* /*
* The task is finally done with both the stack and thread_info, * The task is finally done with both the stack and thread_info,
...@@ -840,6 +843,8 @@ void __init fork_init(void) ...@@ -840,6 +843,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache); NULL, free_vm_stack_cache);
#endif #endif
scs_init();
lockdep_init_task(&init_task); lockdep_init_task(&init_task);
uprobes_init(); uprobes_init();
} }
...@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) ...@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err) if (err)
goto free_stack; goto free_stack;
err = scs_prepare(tsk, node);
if (err)
goto free_stack;
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
/* /*
* We must handle setting up seccomp filters once we're under * We must handle setting up seccomp filters once we're under
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/kcov.h> #include <linux/kcov.h>
#include <linux/scs.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/tlb.h> #include <asm/tlb.h>
...@@ -3877,6 +3878,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) ...@@ -3877,6 +3878,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
#ifdef CONFIG_SCHED_STACK_END_CHECK #ifdef CONFIG_SCHED_STACK_END_CHECK
if (task_stack_end_corrupted(prev)) if (task_stack_end_corrupted(prev))
panic("corrupted stack end detected inside scheduler\n"); panic("corrupted stack end detected inside scheduler\n");
if (task_scs_end_corrupted(prev))
panic("corrupted shadow stack detected inside scheduler\n");
#endif #endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
...@@ -6040,6 +6044,7 @@ void init_idle(struct task_struct *idle, int cpu) ...@@ -6040,6 +6044,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock(); idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE; idle->flags |= PF_IDLE;
scs_task_reset(idle);
kasan_unpoison_task_stack(idle); kasan_unpoison_task_stack(idle);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
// SPDX-License-Identifier: GPL-2.0
/*
* Shadow Call Stack support.
*
* Copyright (C) 2019 Google LLC
*/
#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmstat.h>
static struct kmem_cache *scs_cache;
static void __scs_account(void *s, int account)
{
struct page *scs_page = virt_to_page(s);
mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
account * (SCS_SIZE / SZ_1K));
}
static void *scs_alloc(int node)
{
void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
if (!s)
return NULL;
*__scs_magic(s) = SCS_END_MAGIC;
/*
* Poison the allocation to catch unintentional accesses to
* the shadow stack when KASAN is enabled.
*/
kasan_poison_object_data(scs_cache, s);
__scs_account(s, 1);
return s;
}
static void scs_free(void *s)
{
__scs_account(s, -1);
kasan_unpoison_object_data(scs_cache, s);
kmem_cache_free(scs_cache, s);
}
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
}
int scs_prepare(struct task_struct *tsk, int node)
{
void *s = scs_alloc(node);
if (!s)
return -ENOMEM;
task_scs(tsk) = task_scs_sp(tsk) = s;
return 0;
}
static void scs_check_usage(struct task_struct *tsk)
{
static unsigned long highest;
unsigned long *p, prev, curr = highest, used = 0;
if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
return;
for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
if (!READ_ONCE_NOCHECK(*p))
break;
used++;
}
while (used > curr) {
prev = cmpxchg_relaxed(&highest, curr, used);
if (prev == curr) {
pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
tsk->comm, task_pid_nr(tsk), used);
break;
}
curr = prev;
}
}
void scs_release(struct task_struct *tsk)
{
void *s = task_scs(tsk);
if (!s)
return;
WARN(task_scs_end_corrupted(tsk),
"corrupted shadow stack detected when freeing task\n");
scs_check_usage(tsk);
scs_free(s);
}
...@@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB" " managed:%lukB"
" mlocked:%lukB" " mlocked:%lukB"
" kernel_stack:%lukB" " kernel_stack:%lukB"
#ifdef CONFIG_SHADOW_CALL_STACK
" shadow_call_stack:%lukB"
#endif
" pagetables:%lukB" " pagetables:%lukB"
" bounce:%lukB" " bounce:%lukB"
" free_pcp:%lukB" " free_pcp:%lukB"
...@@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)), K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB), zone_page_state(zone, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
zone_page_state(zone, NR_KERNEL_SCS_KB),
#endif
K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp), K(free_pcp),
......
...@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = { ...@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock", "nr_mlock",
"nr_page_table_pages", "nr_page_table_pages",
"nr_kernel_stack", "nr_kernel_stack",
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
"nr_shadow_call_stack",
#endif
"nr_bounce", "nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC) #if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages", "nr_zspages",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment