Commit 0982c8d8 authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/nvhe-stacktrace into kvmarm-master/next

* kvm-arm64/nvhe-stacktrace: (27 commits)
  : .
  : Add an overflow stack to the nVHE EL2 code, allowing
  : the implementation of an unwinder, courtesy of
  : Kalesh Singh. From the cover letter (slightly edited):
  :
  : "nVHE has two modes of operation: protected (pKVM) and unprotected
  : (conventional nVHE). Depending on the mode, a slightly different approach
  : is used to dump the hypervisor stacktrace but the core unwinding logic
  : remains the same.
  :
  : * Protected nVHE (pKVM) stacktraces:
  :
  : In protected nVHE mode, the host cannot directly access hypervisor memory.
  :
  : The hypervisor stack unwinding happens in EL2 and is made accessible to
  : the host via a shared buffer. Symbolizing and printing the stacktrace
  : addresses is delegated to the host and happens in EL1.
  :
  : * Non-protected (Conventional) nVHE stacktraces:
  :
  : In non-protected mode, the host is able to directly access the hypervisor
  : stack pages.
  :
  : The hypervisor stack unwinding and dumping of the stacktrace is performed
  : by the host in EL1, as this avoids the memory overhead of setting up
  : shared buffers between the host and hypervisor."
  :
  : Additional patches from Oliver Upton and Marc Zyngier, tidying up
  : the initial series.
  : .
  arm64: Update 'unwinder howto'
  KVM: arm64: Don't open code ARRAY_SIZE()
  KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c
  KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions
  KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit
  KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around
  KVM: arm64: Introduce pkvm_dump_backtrace()
  KVM: arm64: Implement protected nVHE hyp stack unwinder
  KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace
  KVM: arm64: Stub implementation of pKVM HYP stack unwinder
  KVM: arm64: Allocate shared pKVM hyp stacktrace buffers
  KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig
  KVM: arm64: Introduce hyp_dump_backtrace()
  KVM: arm64: Implement non-protected nVHE hyp stack unwinder
  KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace
  KVM: arm64: Stub implementation of non-protected nVHE HYP stack unwinder
  KVM: arm64: On stack overflow switch to hyp overflow_stack
  arm64: stacktrace: Add description of stacktrace/common.h
  arm64: stacktrace: Factor out common unwind()
  arm64: stacktrace: Handle frame pointer from different address spaces
  ...
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents ae98a4a9 a4c750e2
...@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params { ...@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
unsigned long vtcr; unsigned long vtcr;
}; };
/*
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
* hyp_panic() in non-protected mode.
*
* @stack_base: hyp VA of the hyp_stack base.
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
* @fp: hyp FP where the backtrace begins.
* @pc: hyp PC where the backtrace begins.
*/
struct kvm_nvhe_stacktrace_info {
unsigned long stack_base;
unsigned long overflow_stack_base;
unsigned long fp;
unsigned long pc;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */ /* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \ #define kvm_ksym_ref(ptr) \
({ \ ({ \
......
...@@ -113,6 +113,14 @@ ...@@ -113,6 +113,14 @@
#define OVERFLOW_STACK_SIZE SZ_4K #define OVERFLOW_STACK_SIZE SZ_4K
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
/* /*
* Alignment of kernel segments (e.g. .text, .data). * Alignment of kernel segments (e.g. .text, .data).
* *
......
...@@ -8,52 +8,20 @@ ...@@ -8,52 +8,20 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h> #include <linux/llist.h>
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/sdei.h> #include <asm/sdei.h>
enum stack_type { #include <asm/stacktrace/common.h>
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl); const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline bool on_irq_stack(unsigned long sp, unsigned long size, static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info) struct stack_info *info)
{ {
...@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size, ...@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; } struct stack_info *info) { return false; }
#endif #endif
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static inline bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
#endif /* __ASM_STACKTRACE_H */ #endif /* __ASM_STACKTRACE_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common arm64 stack unwinder code.
*
* To implement a new arm64 stack unwinder:
* 1) Include this header
*
* 2) Call into unwind_next_common() from your top level unwind
* function, passing it the validation and translation callbacks
* (though the later can be NULL if no translation is required).
*
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/kprobes.h>
#include <linux/types.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
STACK_TYPE_HYP,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
*
* @task: The task being unwound.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
struct task_struct *task;
};
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline void unwind_init_common(struct unwind_state *state,
struct task_struct *task)
{
state->task = task;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
}
/*
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
* a kernel address.
*
* @fp: the frame pointer to be updated to its kernel address.
* @type: the stack type associated with frame pointer @fp
*
* Returns true and success and @fp is updated to the corresponding
* kernel virtual address; otherwise returns false.
*/
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
enum stack_type type);
/*
* on_accessible_stack_fn() - Check whether a stack range is on any
* of the possible stacks.
*
* @tsk: task whose stack is being unwound
* @sp: stack address being checked
* @size: size of the stack range being checked
* @info: stack unwinding context
*/
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info);
static inline int unwind_next_common(struct unwind_state *state,
struct stack_info *info,
on_accessible_stack_fn accessible,
stack_trace_translate_fp_fn translate_fp)
{
unsigned long fp = state->fp, kern_fp = fp;
struct task_struct *tsk = state->task;
if (fp & 0x7)
return -EINVAL;
if (!accessible(tsk, fp, 16, info))
return -EINVAL;
if (test_bit(info->type, state->stacks_done))
return -EINVAL;
/*
* If fp is not from the current address space perform the necessary
* translation before dereferencing it to get the next fp.
*/
if (translate_fp && !translate_fp(&kern_fp, info->type))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
* HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info->type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
__set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
state->prev_fp = fp;
state->prev_type = info->type;
return 0;
}
#endif /* __ASM_STACKTRACE_COMMON_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#ifndef __ASM_STACKTRACE_NVHE_H
#define __ASM_STACKTRACE_NVHE_H
#include <asm/stacktrace/common.h>
/*
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
*
* @state : unwind_state to initialize
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*/
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
unwind_init_common(state, NULL);
state->fp = fp;
state->pc = pc;
}
#ifndef __KVM_NVHE_HYPERVISOR__
/*
* Conventional (non-protected) nVHE HYP stack unwinder
*
* In non-protected mode, the unwinding is done from kernel proper context
* (by the host in EL1).
*/
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
#endif /* __KVM_NVHE_HYPERVISOR__ */
#endif /* __ASM_STACKTRACE_NVHE_H */
...@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) ...@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector CFLAGS_syscall.o += -fno-stack-protector
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
# can significantly impact performance. Avoid instrumenting the stack trace
# collection code to minimize this impact.
KASAN_SANITIZE_stacktrace.o := n
# It's not safe to invoke KCOV when portions of the kernel environment aren't # It's not safe to invoke KCOV when portions of the kernel environment aren't
# available or are out-of-sync with HW state. Since `noinstr` doesn't always # available or are out-of-sync with HW state. Since `noinstr` doesn't always
# inhibit KCOV instrumentation, disable it for the entire compilation unit. # inhibit KCOV instrumentation, disable it for the entire compilation unit.
......
...@@ -7,72 +7,90 @@ ...@@ -7,72 +7,90 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h> #include <asm/stack_pointer.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
/* /*
* A snapshot of a frame record or fp/lr register values, along with some * Start an unwind from a pt_regs.
* accounting information necessary for robust unwinding.
* *
* @fp: The fp value in the frame record (or the real fp) * The unwind will begin at the PC within the regs.
* @pc: The lr value in the frame record (or the real lr)
* *
* @stacks_done: Stacks which have been entirely unwound, for which it is no * The regs must be on a stack currently owned by the calling task.
* longer valid to unwind to. */
static inline void unwind_init_from_regs(struct unwind_state *state,
struct pt_regs *regs)
{
unwind_init_common(state, current);
state->fp = regs->regs[29];
state->pc = regs->pc;
}
/*
* Start an unwind from a caller.
* *
* @prev_fp: The fp that pointed to this frame record, or a synthetic value * The unwind will begin at the caller of whichever function this is inlined
* of 0. This is used to ensure that within a stack, each * into.
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
* *
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * The function which invokes this must be noinline.
* associated with the most recently encountered replacement lr
* value.
*/ */
struct unwind_state { static __always_inline void unwind_init_from_caller(struct unwind_state *state)
unsigned long fp; {
unsigned long pc; unwind_init_common(state, current);
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp; state->fp = (unsigned long)__builtin_frame_address(1);
enum stack_type prev_type; state->pc = (unsigned long)__builtin_return_address(0);
#ifdef CONFIG_KRETPROBES }
struct llist_node *kr_cur;
#endif
};
static notrace void unwind_init(struct unwind_state *state, unsigned long fp, /*
unsigned long pc) * Start an unwind from a blocked task.
*
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
* cpu_switch_to()).
*
* The caller should ensure the task is blocked in cpu_switch_to() for the
* duration of the unwind, or the unwind will be bogus. It is never valid to
* call this for the current task.
*/
static inline void unwind_init_from_task(struct unwind_state *state,
struct task_struct *task)
{ {
state->fp = fp; unwind_init_common(state, task);
state->pc = pc;
#ifdef CONFIG_KRETPROBES state->fp = thread_saved_fp(task);
state->kr_cur = NULL; state->pc = thread_saved_pc(task);
#endif }
/* /*
* Prime the first unwind. * We can only safely access per-cpu stacks from current in a non-preemptible
* * context.
* In unwind_next() we'll check that the FP points to a valid stack, */
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be static bool on_accessible_stack(const struct task_struct *tsk,
* treated as a transition to whichever stack that happens to be. The unsigned long sp, unsigned long size,
* prev_fp value won't be used, but we set it to 0 such that it is struct stack_info *info)
* definitely not an accessible stack address. {
*/ if (info)
bitmap_zero(state->stacks_done, __NR_STACK_TYPES); info->type = STACK_TYPE_UNKNOWN;
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN; if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
} }
NOKPROBE_SYMBOL(unwind_init);
/* /*
* Unwind from one frame record (A) to the next frame record (B). * Unwind from one frame record (A) to the next frame record (B).
...@@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init); ...@@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A * records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B. * and the location (but not the fp value) of B.
*/ */
static int notrace unwind_next(struct task_struct *tsk, static int notrace unwind_next(struct unwind_state *state)
struct unwind_state *state)
{ {
struct task_struct *tsk = state->task;
unsigned long fp = state->fp; unsigned long fp = state->fp;
struct stack_info info; struct stack_info info;
int err;
/* Final frame; nothing to unwind */ /* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT; return -ENOENT;
if (fp & 0x7) err = unwind_next_common(state, &info, on_accessible_stack, NULL);
return -EINVAL; if (err)
return err;
if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
if (test_bit(info.type, state->stacks_done))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info.type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
state->prev_type = info.type;
state->pc = ptrauth_strip_insn_pac(state->pc); state->pc = ptrauth_strip_insn_pac(state->pc);
...@@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk, ...@@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk,
} }
NOKPROBE_SYMBOL(unwind_next); NOKPROBE_SYMBOL(unwind_next);
static void notrace unwind(struct task_struct *tsk, static void notrace unwind(struct unwind_state *state,
struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie) stack_trace_consume_fn consume_entry, void *cookie)
{ {
while (1) { while (1) {
...@@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk, ...@@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk,
if (!consume_entry(cookie, state->pc)) if (!consume_entry(cookie, state->pc))
break; break;
ret = unwind_next(tsk, state); ret = unwind_next(state);
if (ret < 0) if (ret < 0)
break; break;
} }
...@@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, ...@@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
{ {
struct unwind_state state; struct unwind_state state;
if (regs) if (regs) {
unwind_init(&state, regs->regs[29], regs->pc); if (task != current)
else if (task == current) return;
unwind_init(&state, unwind_init_from_regs(&state, regs);
(unsigned long)__builtin_frame_address(1), } else if (task == current) {
(unsigned long)__builtin_return_address(0)); unwind_init_from_caller(&state);
else } else {
unwind_init(&state, thread_saved_fp(task), unwind_init_from_task(&state, task);
thread_saved_pc(task)); }
unwind(task, &state, consume_entry, cookie); unwind(&state, consume_entry, cookie);
} }
...@@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG ...@@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
If unsure, say N. If unsure, say N.
config PROTECTED_NVHE_STACKTRACE
bool "Protected KVM hypervisor stacktraces"
depends on NVHE_EL2_DEBUG
default n
help
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
If using protected nVHE mode, but cannot afford the associated
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
say N.
If unsure, or not using protected nVHE (pKVM), say N.
endif # VIRTUALIZATION endif # VIRTUALIZATION
...@@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/ ...@@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \ inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \ arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \ vgic/vgic.o vgic/vgic-init.o \
......
...@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); ...@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h> #include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h> #include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h> #include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h> #include <asm/traps.h>
#include <kvm/arm_hypercalls.h> #include <kvm/arm_hypercalls.h>
...@@ -353,6 +354,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, ...@@ -353,6 +354,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
(void *)(panic_addr + kaslr_offset())); (void *)(panic_addr + kaslr_offset()));
} }
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
/* /*
* Hyp has panicked and we're going to handle that by panicking the * Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're * kernel. The kernel offset will be revealed in the panic so we're
......
...@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) ...@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
......
...@@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc) ...@@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
b hyp_panic b hyp_panic
.L__hyp_sp_overflow\@: .L__hyp_sp_overflow\@:
/* /* Switch to the overflow stack */
* Reset SP to the top of the stack, to allow handling the hyp_panic. adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
* This corrupts the stack but is ok, since we won't be attempting
* any unwinding here.
*/
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
mov sp, x0
b hyp_panic_bad_stack b hyp_panic_bad_stack
ASM_BUG() ASM_BUG()
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM nVHE hypervisor stack tracing support.
*
* Copyright (C) 2022 Google LLC
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/memory.h>
#include <asm/percpu.h>
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
/*
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the information needed by the host to unwind the non-protected
* nVHE hypervisor stack in EL1.
*/
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
#include <asm/stacktrace/nvhe.h>
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
unsigned long low = high - PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack, NULL);
}
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry,
void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
*
* @arg : index of the entry in the stacktrace buffer
* @where : the program counter corresponding to the stack frame
*
* Save the return address of a stack frame to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
{
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
int *idx = (int *)arg;
/*
* Need 2 free slots: 1 for current entry and 1 for the
* delimiter.
*/
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
return false;
stacktrace[*idx] = where;
stacktrace[++*idx] = 0UL;
return true;
}
/*
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the unwinded stack addresses to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
struct unwind_state state;
int idx = 0;
kvm_nvhe_unwind_init(&state, fp, pc);
unwind(&state, pkvm_save_backtrace_entry, &idx);
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Saves the information needed by the host to dump the nVHE hypervisor
* backtrace.
*/
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
{
if (is_protected_kvm_enabled())
pkvm_save_backtrace(fp, pc);
else
hyp_prepare_backtrace(fp, pc);
}
...@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); ...@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu) static void __activate_traps(struct kvm_vcpu *vcpu)
{ {
u64 val; u64 val;
...@@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void) ...@@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt); __sysreg_restore_state_nvhe(host_ctxt);
} }
/* Prepare to dump kvm nvhe hyp stacktrace */
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
_THIS_IP_);
__hyp_do_panic(host_ctxt, spsr, elr, par); __hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable(); unreachable();
} }
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/stacktrace/nvhe.h>
/*
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
*
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
* allow for guard pages below the stack. Consequently, the fixed offset address
* translation macros won't work here.
*
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
* stack base.
*
* Returns true on success and updates @addr to its corresponding kernel VA;
* otherwise returns false.
*/
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
enum stack_type type)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
unsigned long hyp_base, kern_base, hyp_offset;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
switch (type) {
case STACK_TYPE_HYP:
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
hyp_base = (unsigned long)stacktrace_info->stack_base;
break;
case STACK_TYPE_OVERFLOW:
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
break;
default:
return false;
}
hyp_offset = *addr - hyp_base;
*addr = kern_base + hyp_offset;
return true;
}
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->stack_base;
unsigned long high = low + PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack,
kvm_nvhe_stack_kern_va);
}
static void unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
*
* @arg : the hypervisor offset, used for address translation
* @where : the program counter corresponding to the stack frame
*/
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
{
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
unsigned long hyp_offset = (unsigned long)arg;
/* Mask tags and convert to kern addr */
where = (where & va_mask) + hyp_offset;
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
return true;
}
static void kvm_nvhe_dump_backtrace_start(void)
{
kvm_err("nVHE call trace:\n");
}
static void kvm_nvhe_dump_backtrace_end(void)
{
kvm_err("---[ end nVHE call trace ]---\n");
}
/*
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* The host can directly access HYP stack pages in non-protected
* mode, so the unwinding is done directly from EL1. This removes
* the need for shared buffers between host and hypervisor for
* the stacktrace.
*/
static void hyp_dump_backtrace(unsigned long hyp_offset)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
struct unwind_state state;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
kvm_nvhe_dump_backtrace_start();
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
kvm_nvhe_dump_backtrace_end();
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
pkvm_stacktrace);
/*
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* Dumping of the pKVM HYP backtrace is done by reading the
* stack addresses from the shared stacktrace buffer, since the
* host cannot directly access hypervisor memory in protected
* mode.
*/
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
unsigned long *stacktrace
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
int i;
kvm_nvhe_dump_backtrace_start();
/* The saved stacktrace is terminated by a null entry */
for (i = 0;
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
i++)
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
kvm_nvhe_dump_backtrace_end();
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*/
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
{
if (is_protected_kvm_enabled())
pkvm_dump_backtrace(hyp_offset);
else
hyp_dump_backtrace(hyp_offset);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment