Commit c4edb2ba authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 5.20:

- Unwinder implementations for both nVHE modes (classic and
  protected), complete with an overflow stack

- Rework of the sysreg access from userspace, with a complete
  rewrite of the vgic-v3 view to allign with the rest of the
  infrastructure

- Disagregation of the vcpu flags in separate sets to better track
  their use model.

- A fix for the GICv2-on-v3 selftest

- A small set of cosmetic fixes
parents 63f4b210 0982c8d8
......@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
unsigned long vtcr;
};
/*
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
* hyp_panic() in non-protected mode.
*
* @stack_base: hyp VA of the hyp_stack base.
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
* @fp: hyp FP where the backtrace begins.
* @pc: hyp PC where the backtrace begins.
*/
struct kvm_nvhe_stacktrace_info {
unsigned long stack_base;
unsigned long overflow_stack_base;
unsigned long fp;
unsigned long pc;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \
({ \
......
......@@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
vcpu_set_flag(vcpu, INCREMENT_PC);
}
#define kvm_pend_exception(v, e) \
do { \
WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
vcpu_set_flag((v), PENDING_EXCEPTION); \
vcpu_set_flag((v), e); \
} while (0)
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
{
return test_bit(feature, vcpu->arch.features);
......
......@@ -325,8 +325,30 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
/* Miscellaneous vcpu state flags */
u64 flags;
/* Ownership of the FP regs */
enum {
FP_STATE_FREE,
FP_STATE_HOST_OWNED,
FP_STATE_GUEST_OWNED,
} fp_state;
/* Configuration flags, set once and for all before the vcpu can run */
u8 cflags;
/* Input flags to the hypervisor code, potentially cleared after use */
u8 iflags;
/* State flags for kernel bookkeeping, unused by the hypervisor code */
u8 sflags;
/*
* Don't run the guest (internal implementation need).
*
* Contrary to the flags above, this is set/cleared outside of
* a vcpu context, and thus cannot be mixed with the flags
* themselves (or the flag accesses need to be made atomic).
*/
bool pause;
/*
* We maintain more than a single set of debug registers to support
......@@ -376,9 +398,6 @@ struct kvm_vcpu_arch {
/* vcpu power state */
struct kvm_mp_state mp_state;
/* Don't run the guest (internal implementation need) */
bool pause;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
......@@ -392,10 +411,6 @@ struct kvm_vcpu_arch {
/* Additional reset state */
struct vcpu_reset_state reset_state;
/* True when deferrable sysregs are loaded on the physical CPU,
* see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
bool sysregs_loaded_on_cpu;
/* Guest PV state */
struct {
u64 last_steal;
......@@ -403,6 +418,124 @@ struct kvm_vcpu_arch {
} steal;
};
/*
* Each 'flag' is composed of a comma-separated triplet:
*
* - the flag-set it belongs to in the vcpu->arch structure
* - the value for that flag
* - the mask for that flag
*
* __vcpu_single_flag() builds such a triplet for a single-bit flag.
* unpack_vcpu_flag() extract the flag value from the triplet for
* direct use outside of the flag accessors.
*/
#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
#define __unpack_flag(_set, _f, _m) _f
#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
#define __build_check_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *_fset; \
\
/* Check that the flags fit in the mask */ \
BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
/* Check that the flags fit in the type */ \
BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
} while (0)
#define __vcpu_get_flag(v, flagset, f, m) \
({ \
__build_check_flag(v, flagset, f, m); \
\
v->arch.flagset & (m); \
})
#define __vcpu_set_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
\
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
if (HWEIGHT(m) > 1) \
*fset &= ~(m); \
*fset |= (f); \
} while (0)
#define __vcpu_clear_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
\
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
*fset &= ~(m); \
} while (0)
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
/* SVE exposed to guest */
#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
/* PTRAUTH exposed to guest */
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
/*
* PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
* be set together with an exception...
*/
#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
/* Target EL/MODE (not a single flag, but let's abuse the macro) */
#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
/* Helpers to encode exceptions with minimum fuss */
#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
/*
* When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
* values:
*
* For AArch32 EL1:
*/
#define EXCEPT_AA32_UND __vcpu_except_flags(0)
#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
/* For AArch64: */
#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
/* For AArch64 with NV (one day): */
#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
/* Guest debug is live */
#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
/* Save SPE context if active */
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
/* SME enabled for EL0 */
#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
/* Physical CPU not in supported_cpus */
#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
/* WFIT instruction trapped */
#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
/* vcpu system registers loaded on physical CPU */
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
sve_ffr_offset((vcpu)->arch.sve_max_vl))
......@@ -423,70 +556,31 @@ struct kvm_vcpu_arch {
__size_ret; \
})
/* vcpu_arch flags field values: */
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
/*
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
* set together with an exception...
*/
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
/*
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
* take the following values:
*
* For AArch32 EL1:
*/
#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
/* For AArch64: */
#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
vcpu_get_flag(vcpu, GUEST_HAS_SVE))
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
(vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
#define vcpu_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
......@@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
......@@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
......
......@@ -113,6 +113,14 @@
#define OVERFLOW_STACK_SIZE SZ_4K
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
/*
* Alignment of kernel segments (e.g. .text, .data).
*
......
......@@ -8,52 +8,20 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h>
#include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
#include <asm/stacktrace/common.h>
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
......@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static inline bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
#endif /* __ASM_STACKTRACE_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common arm64 stack unwinder code.
*
* To implement a new arm64 stack unwinder:
* 1) Include this header
*
* 2) Call into unwind_next_common() from your top level unwind
* function, passing it the validation and translation callbacks
* (though the later can be NULL if no translation is required).
*
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/kprobes.h>
#include <linux/types.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
STACK_TYPE_HYP,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
*
* @task: The task being unwound.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
struct task_struct *task;
};
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline void unwind_init_common(struct unwind_state *state,
struct task_struct *task)
{
state->task = task;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
}
/*
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
* a kernel address.
*
* @fp: the frame pointer to be updated to its kernel address.
* @type: the stack type associated with frame pointer @fp
*
* Returns true and success and @fp is updated to the corresponding
* kernel virtual address; otherwise returns false.
*/
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
enum stack_type type);
/*
* on_accessible_stack_fn() - Check whether a stack range is on any
* of the possible stacks.
*
* @tsk: task whose stack is being unwound
* @sp: stack address being checked
* @size: size of the stack range being checked
* @info: stack unwinding context
*/
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info);
static inline int unwind_next_common(struct unwind_state *state,
struct stack_info *info,
on_accessible_stack_fn accessible,
stack_trace_translate_fp_fn translate_fp)
{
unsigned long fp = state->fp, kern_fp = fp;
struct task_struct *tsk = state->task;
if (fp & 0x7)
return -EINVAL;
if (!accessible(tsk, fp, 16, info))
return -EINVAL;
if (test_bit(info->type, state->stacks_done))
return -EINVAL;
/*
* If fp is not from the current address space perform the necessary
* translation before dereferencing it to get the next fp.
*/
if (translate_fp && !translate_fp(&kern_fp, info->type))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
* HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info->type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
__set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
state->prev_fp = fp;
state->prev_type = info->type;
return 0;
}
#endif /* __ASM_STACKTRACE_COMMON_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#ifndef __ASM_STACKTRACE_NVHE_H
#define __ASM_STACKTRACE_NVHE_H
#include <asm/stacktrace/common.h>
/*
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
*
* @state : unwind_state to initialize
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*/
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
unwind_init_common(state, NULL);
state->fp = fp;
state->pc = pc;
}
#ifndef __KVM_NVHE_HYPERVISOR__
/*
* Conventional (non-protected) nVHE HYP stack unwinder
*
* In non-protected mode, the unwinding is done from kernel proper context
* (by the host in EL1).
*/
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
#endif /* __KVM_NVHE_HYPERVISOR__ */
#endif /* __ASM_STACKTRACE_NVHE_H */
......@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
# can significantly impact performance. Avoid instrumenting the stack trace
# collection code to minimize this impact.
KASAN_SANITIZE_stacktrace.o := n
# It's not safe to invoke KCOV when portions of the kernel environment aren't
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
# inhibit KCOV instrumentation, disable it for the entire compilation unit.
......
......@@ -7,72 +7,90 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/irq.h>
#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
* Start an unwind from a pt_regs.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
* The unwind will begin at the PC within the regs.
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
* The regs must be on a stack currently owned by the calling task.
*/
static inline void unwind_init_from_regs(struct unwind_state *state,
struct pt_regs *regs)
{
unwind_init_common(state, current);
state->fp = regs->regs[29];
state->pc = regs->pc;
}
/*
* Start an unwind from a caller.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
* The unwind will begin at the caller of whichever function this is inlined
* into.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
* The function which invokes this must be noinline.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
};
static __always_inline void unwind_init_from_caller(struct unwind_state *state)
{
unwind_init_common(state, current);
state->fp = (unsigned long)__builtin_frame_address(1);
state->pc = (unsigned long)__builtin_return_address(0);
}
static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
unsigned long pc)
/*
* Start an unwind from a blocked task.
*
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
* cpu_switch_to()).
*
* The caller should ensure the task is blocked in cpu_switch_to() for the
* duration of the unwind, or the unwind will be bogus. It is never valid to
* call this for the current task.
*/
static inline void unwind_init_from_task(struct unwind_state *state,
struct task_struct *task)
{
state->fp = fp;
state->pc = pc;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
unwind_init_common(state, task);
state->fp = thread_saved_fp(task);
state->pc = thread_saved_pc(task);
}
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
NOKPROBE_SYMBOL(unwind_init);
/*
* Unwind from one frame record (A) to the next frame record (B).
......@@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
static int notrace unwind_next(struct task_struct *tsk,
struct unwind_state *state)
static int notrace unwind_next(struct unwind_state *state)
{
struct task_struct *tsk = state->task;
unsigned long fp = state->fp;
struct stack_info info;
int err;
/* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT;
if (fp & 0x7)
return -EINVAL;
if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
if (test_bit(info.type, state->stacks_done))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info.type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
state->prev_type = info.type;
err = unwind_next_common(state, &info, on_accessible_stack, NULL);
if (err)
return err;
state->pc = ptrauth_strip_insn_pac(state->pc);
......@@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk,
}
NOKPROBE_SYMBOL(unwind_next);
static void notrace unwind(struct task_struct *tsk,
struct unwind_state *state,
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
......@@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk,
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(tsk, state);
ret = unwind_next(state);
if (ret < 0)
break;
}
......@@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
{
struct unwind_state state;
if (regs)
unwind_init(&state, regs->regs[29], regs->pc);
else if (task == current)
unwind_init(&state,
(unsigned long)__builtin_frame_address(1),
(unsigned long)__builtin_return_address(0));
else
unwind_init(&state, thread_saved_fp(task),
thread_saved_pc(task));
unwind(task, &state, consume_entry, cookie);
if (regs) {
if (task != current)
return;
unwind_init_from_regs(&state, regs);
} else if (task == current) {
unwind_init_from_caller(&state);
} else {
unwind_init_from_task(&state, task);
}
unwind(&state, consume_entry, cookie);
}
......@@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
If unsure, say N.
config PROTECTED_NVHE_STACKTRACE
bool "Protected KVM hypervisor stacktraces"
depends on NVHE_EL2_DEBUG
default n
help
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
If using protected nVHE mode, but cannot afford the associated
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
say N.
If unsure, or not using protected nVHE (pKVM), say N.
endif # VIRTUALIZATION
......@@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \
......
......@@ -242,7 +242,7 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
{
return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
(vcpu->arch.flags & KVM_ARM64_WFIT));
vcpu_get_flag(vcpu, IN_WFIT));
}
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
......
......@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
......@@ -330,6 +330,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/*
* Default value for the FP state, will be overloaded at load
* time if we support FP (pretty likely)
*/
vcpu->arch.fp_state = FP_STATE_FREE;
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
......@@ -659,7 +665,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_WFIT;
vcpu_clear_flag(vcpu, IN_WFIT);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
......@@ -1015,8 +1021,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* the vcpu state. Note that this relies on __kvm_adjust_pc()
* being preempt-safe on VHE.
*/
if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_INCREMENT_PC)))
if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) ||
vcpu_get_flag(vcpu, INCREMENT_PC)))
kvm_call_hyp(__kvm_adjust_pc, vcpu);
vcpu_put(vcpu);
......@@ -1414,18 +1420,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
unsigned long dev_id, type;
dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
KVM_ARM_DEVICE_ID_SHIFT;
type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
KVM_ARM_DEVICE_TYPE_SHIFT;
switch (dev_id) {
switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) {
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr);
default:
return -ENODEV;
}
......
......@@ -104,11 +104,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
* Trap debug register access when one of the following is true:
* - Userspace is using the hardware to debug the guest
* (KVM_GUESTDBG_USE_HW is set).
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
* - The guest is not using debug (DEBUG_DIRTY clear).
* - The guest has enabled the OS Lock (debug exceptions are blocked).
*/
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) ||
!vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
kvm_vcpu_os_lock_enabled(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
......@@ -147,8 +147,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
* debug related registers.
*
* Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
* flag on vcpu->arch.flags). Since the guest must not interfere
* the guest is not actively using them (see the DEBUG_DIRTY
* flag on vcpu->arch.iflags). Since the guest must not interfere
* with the hardware state when debugging the guest, we must ensure that
* trapping is enabled whenever we are debugging the guest using the
* debug registers.
......@@ -205,9 +205,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*
* We simply switch the debug_ptr to point to our new
* external_debug_state which has been populated by the
* debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
* mechanism ensures the registers are updated on the
* world switch.
* debug ioctl. The existing DEBUG_DIRTY mechanism ensures
* the registers are updated on the world switch.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
......@@ -216,7 +215,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
vcpu_set_flag(vcpu, DEBUG_DIRTY);
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0],
......@@ -246,7 +245,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
/* If KDE or MDE are set, perform a full save/restore cycle. */
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
vcpu_set_flag(vcpu, DEBUG_DIRTY);
/* Write mdcr_el2 changes since vcpu_load on VHE systems */
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
......@@ -298,16 +297,16 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
*/
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE;
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
/* Check if we have TRBE implemented and available at the host */
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE;
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
{
vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE |
KVM_ARM64_DEBUG_STATE_SAVE_TRBE);
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
......@@ -77,12 +77,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
BUG_ON(!current->mm);
BUG_ON(test_thread_flag(TIF_SVE));
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
if (!system_supports_fpsimd())
return;
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
/*
* We don't currently support SME guests but if we leave
......@@ -94,29 +96,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg_s(SYS_SVCR) &
(SVCR_SM_MASK | SVCR_ZA_MASK)) {
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
vcpu->arch.fp_state = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
}
}
}
/*
* Called just before entering the guest once we are no longer
* preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM
* mirror of the flag used by the hypervisor.
* Called just before entering the guest once we are no longer preemptable
* and interrupts are disabled. If we have managed to run anything using
* FP while we were preemptible (such as off the back of an interrupt),
* then neither the host nor the guest own the FP hardware (and it was the
* responsibility of the code that used FP to save the existing state).
*/
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
else
vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
vcpu->arch.fp_state = FP_STATE_FREE;
}
/*
......@@ -130,7 +131,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
/*
* Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to.
......@@ -163,7 +164,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
*/
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED)
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
sysreg_clear_set(CPACR_EL1, 0,
CPACR_EL1_SMEN_EL0EN |
CPACR_EL1_SMEN_EL1EN);
......@@ -173,7 +174,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
CPACR_EL1_SMEN_EL1EN);
}
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
......@@ -192,7 +193,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* for EL0. To avoid spurious traps, restore the trap state
* seen by kvm_arch_vcpu_load_fp():
*/
if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
else
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
......
......@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h>
#include <kvm/arm_hypercalls.h>
......@@ -120,7 +121,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else {
if (esr & ESR_ELx_WFx_ISS_WFxT)
vcpu->arch.flags |= KVM_ARM64_WFIT;
vcpu_set_flag(vcpu, IN_WFIT);
kvm_vcpu_wfi(vcpu);
}
......@@ -347,12 +348,15 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
else
kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
(void *)panic_addr);
(void *)(panic_addr + kaslr_offset()));
} else {
kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
(void *)panic_addr);
(void *)(panic_addr + kaslr_offset()));
}
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're
......
......@@ -303,14 +303,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
if (vcpu_el1_is_32bit(vcpu)) {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
case KVM_ARM64_EXCEPT_AA32_UND:
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case unpack_vcpu_flag(EXCEPT_AA32_UND):
enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
break;
case KVM_ARM64_EXCEPT_AA32_IABT:
case unpack_vcpu_flag(EXCEPT_AA32_IABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
break;
case KVM_ARM64_EXCEPT_AA32_DABT:
case unpack_vcpu_flag(EXCEPT_AA32_DABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
break;
default:
......@@ -318,9 +318,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
break;
}
} else {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_EXCEPT_AA64_EL1):
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
default:
......@@ -340,12 +339,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
*/
void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
if (vcpu_get_flag(vcpu, PENDING_EXCEPTION)) {
kvm_inject_exception(vcpu);
vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
} else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
vcpu_clear_flag(vcpu, EXCEPT_MASK);
} else if (vcpu_get_flag(vcpu, INCREMENT_PC)) {
kvm_skip_instr(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
vcpu_clear_flag(vcpu, INCREMENT_PC);
}
}
......@@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
......@@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
......@@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
__debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt);
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
vcpu_clear_flag(vcpu, DEBUG_DIRTY);
}
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
......@@ -37,22 +37,10 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
/* Check whether the FP regs were dirtied while in the host-side run loop: */
static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
/* Check whether the FP regs are owned by the guest */
static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
{
/*
* When the system doesn't support FP/SIMD, we cannot rely on
* the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
* abort on the very first access to FP and thus we should never
* see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
}
/* Save the 32-bit only FPSIMD system register state */
......@@ -191,10 +179,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
/* Write out the host state if it's in the registers */
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
/* Restore the guest state */
if (sve_guest)
......@@ -206,7 +192,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
return true;
}
......
......@@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
......@@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
......
......@@ -12,13 +12,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-y += $(lib-objs)
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
hyp-obj-y += $(lib-objs)
##
## Build rules for compiling nVHE hyp code
......@@ -26,9 +26,9 @@ obj-y += $(lib-objs)
## file containing all nVHE hyp code and data.
##
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
obj-y := kvm_nvhe.o
extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
......
......@@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1)
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
/* Disable and flush Self-Hosted Trace generation */
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
}
......@@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
}
......
......@@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
b hyp_panic
.L__hyp_sp_overflow\@:
/*
* Reset SP to the top of the stack, to allow handling the hyp_panic.
* This corrupts the stack but is ok, since we won't be attempting
* any unwinding here.
*/
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
mov sp, x0
/* Switch to the overflow stack */
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
b hyp_panic_bad_stack
ASM_BUG()
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM nVHE hypervisor stack tracing support.
*
* Copyright (C) 2022 Google LLC
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/memory.h>
#include <asm/percpu.h>
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
/*
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the information needed by the host to unwind the non-protected
* nVHE hypervisor stack in EL1.
*/
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
#include <asm/stacktrace/nvhe.h>
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
unsigned long low = high - PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack, NULL);
}
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry,
void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
*
* @arg : index of the entry in the stacktrace buffer
* @where : the program counter corresponding to the stack frame
*
* Save the return address of a stack frame to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
{
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
int *idx = (int *)arg;
/*
* Need 2 free slots: 1 for current entry and 1 for the
* delimiter.
*/
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
return false;
stacktrace[*idx] = where;
stacktrace[++*idx] = 0UL;
return true;
}
/*
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the unwinded stack addresses to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
struct unwind_state state;
int idx = 0;
kvm_nvhe_unwind_init(&state, fp, pc);
unwind(&state, pkvm_save_backtrace_entry, &idx);
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Saves the information needed by the host to dump the nVHE hypervisor
* backtrace.
*/
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
{
if (is_protected_kvm_enabled())
pkvm_save_backtrace(fp, pc);
else
hyp_prepare_backtrace(fp, pc);
}
......@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
......@@ -43,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = vcpu->arch.cptr_el2;
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
if (!update_fp_enabled(vcpu)) {
if (!guest_owns_fp_regs(vcpu)) {
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
__activate_traps_fpsimd32(vcpu);
}
......@@ -123,7 +125,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
}
cptr = CPTR_EL2_DEFAULT;
if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
cptr |= CPTR_EL2_TZ;
if (cpus_have_final_cap(ARM64_SME))
cptr &= ~CPTR_EL2_TSM;
......@@ -335,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
......@@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
/* Prepare to dump kvm nvhe hyp stacktrace */
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
_THIS_IP_);
__hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
}
......@@ -386,5 +392,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void)
asmlinkage void kvm_unexpected_el2_exception(void)
{
return __kvm_unexpected_el2_exception();
__kvm_unexpected_el2_exception();
}
......@@ -38,9 +38,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
__kvm_adjust_pc(vcpu);
......
......@@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
if (update_fp_enabled(vcpu)) {
if (guest_owns_fp_regs(vcpu)) {
if (vcpu_has_sve(vcpu))
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else {
......@@ -175,7 +175,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
......@@ -249,5 +249,5 @@ void __noreturn hyp_panic(void)
asmlinkage void kvm_unexpected_el2_exception(void)
{
return __kvm_unexpected_el2_exception();
__kvm_unexpected_el2_exception();
}
......@@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_user_state(guest_ctxt);
__sysreg_restore_el1_state(guest_ctxt);
vcpu->arch.sysregs_loaded_on_cpu = true;
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
activate_traps_vhe_load(vcpu);
}
......@@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
vcpu->arch.sysregs_loaded_on_cpu = false;
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
......@@ -20,9 +20,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
......@@ -52,9 +50,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
{
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
/*
* Build an unknown exception, depending on the instruction
......@@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
static void inject_undef32(struct kvm_vcpu *vcpu)
{
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA32_UND);
}
/*
......@@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
far = vcpu_read_sys_reg(vcpu, FAR_EL1);
if (is_pabt) {
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA32_IABT);
far &= GENMASK(31, 0);
far |= (u64)addr << 32;
vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
} else { /* !iabt */
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT |
KVM_ARM64_PENDING_EXCEPTION);
kvm_pend_exception(vcpu, EXCEPT_AA32_DABT);
far &= GENMASK(63, 32);
far |= addr;
vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
......
......@@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
vcpu_set_flag(vcpu, GUEST_HAS_SVE);
return 0;
}
......@@ -120,7 +120,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
}
vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
return 0;
}
......@@ -177,7 +177,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
!system_has_full_ptr_auth())
return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
return 0;
}
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/stacktrace/nvhe.h>
/*
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
*
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
* allow for guard pages below the stack. Consequently, the fixed offset address
* translation macros won't work here.
*
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
* stack base.
*
* Returns true on success and updates @addr to its corresponding kernel VA;
* otherwise returns false.
*/
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
enum stack_type type)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
unsigned long hyp_base, kern_base, hyp_offset;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
switch (type) {
case STACK_TYPE_HYP:
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
hyp_base = (unsigned long)stacktrace_info->stack_base;
break;
case STACK_TYPE_OVERFLOW:
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
break;
default:
return false;
}
hyp_offset = *addr - hyp_base;
*addr = kern_base + hyp_offset;
return true;
}
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->stack_base;
unsigned long high = low + PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack,
kvm_nvhe_stack_kern_va);
}
static void unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
*
* @arg : the hypervisor offset, used for address translation
* @where : the program counter corresponding to the stack frame
*/
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
{
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
unsigned long hyp_offset = (unsigned long)arg;
/* Mask tags and convert to kern addr */
where = (where & va_mask) + hyp_offset;
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
return true;
}
static void kvm_nvhe_dump_backtrace_start(void)
{
kvm_err("nVHE call trace:\n");
}
static void kvm_nvhe_dump_backtrace_end(void)
{
kvm_err("---[ end nVHE call trace ]---\n");
}
/*
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* The host can directly access HYP stack pages in non-protected
* mode, so the unwinding is done directly from EL1. This removes
* the need for shared buffers between host and hypervisor for
* the stacktrace.
*/
static void hyp_dump_backtrace(unsigned long hyp_offset)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
struct unwind_state state;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
kvm_nvhe_dump_backtrace_start();
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
kvm_nvhe_dump_backtrace_end();
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
pkvm_stacktrace);
/*
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* Dumping of the pKVM HYP backtrace is done by reading the
* stack addresses from the shared stacktrace buffer, since the
* host cannot directly access hypervisor memory in protected
* mode.
*/
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
unsigned long *stacktrace
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
int i;
kvm_nvhe_dump_backtrace_start();
/* The saved stacktrace is terminated by a null entry */
for (i = 0;
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
i++)
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
kvm_nvhe_dump_backtrace_end();
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*/
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
{
if (is_protected_kvm_enabled())
pkvm_dump_backtrace(hyp_offset);
else
hyp_dump_backtrace(hyp_offset);
}
This diff is collapsed.
......@@ -75,9 +75,9 @@ struct sys_reg_desc {
/* Custom get/set_user functions, fallback to generic if NULL */
int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr);
u64 *val);
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr);
u64 val);
/* Return mask of REG_* runtime visibility overrides */
unsigned int (*visibility)(const struct kvm_vcpu *vcpu,
......@@ -190,10 +190,16 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
const struct sys_reg_desc *find_reg_by_id(u64 id,
struct sys_reg_params *params,
const struct sys_reg_desc table[],
unsigned int num);
const struct sys_reg_desc *get_reg_by_id(u64 id,
const struct sys_reg_desc table[],
unsigned int num);
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
const struct sys_reg_desc table[], unsigned int num);
int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
const struct sys_reg_desc table[], unsigned int num);
#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
......
This diff is collapsed.
This diff is collapsed.
......@@ -986,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
iodev.base_addr = 0;
break;
}
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
u64 reg, id;
id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
}
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
return vgic_v3_has_cpu_sysregs_attr(vcpu, attr);
default:
return -ENXIO;
}
......@@ -1158,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
}
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u64 *val)
u32 intid, u32 *val)
{
if (intid % 32)
return -EINVAL;
......
......@@ -775,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
}
}
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
{
int i;
u64 val = 0;
u32 val = 0;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
for (i = 0; i < 32; i++) {
......@@ -798,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
}
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u64 val)
const u32 val)
{
int i;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
......
......@@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
bool is_write, int offset, u32 *val);
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u64 val);
const u32 val);
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
......
......@@ -245,12 +245,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u64 id, u64 *val);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
u64 *reg);
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr, bool is_write);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u64 *val);
u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type);
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
......
......@@ -364,7 +364,7 @@ struct vgic_cpu {
extern struct static_key_false vgic_v2_cpuif_trap;
extern struct static_key_false vgic_v3_cpuif_trap;
int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type);
......
......@@ -663,7 +663,7 @@ int test_kvm_device(uint32_t gic_dev_type)
if (!__kvm_test_create_device(v.vm, other)) {
ret = __kvm_test_create_device(v.vm, other);
TEST_ASSERT(ret && errno == EINVAL,
TEST_ASSERT(ret && (errno == EINVAL || errno == EEXIST),
"create GIC device while other version exists");
}
......@@ -691,6 +691,7 @@ int main(int ac, char **av)
{
int ret;
int pa_bits;
int cnt_impl = 0;
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;
......@@ -699,13 +700,19 @@ int main(int ac, char **av)
if (!ret) {
pr_info("Running GIC_v3 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
return 0;
cnt_impl++;
}
ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
__TEST_REQUIRE(!ret, "No GICv2 nor GICv3 support");
if (!ret) {
pr_info("Running GIC_v2 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
cnt_impl++;
}
pr_info("Running GIC_v2 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
if (!cnt_impl) {
print_skip("No GICv2 nor GICv3 support");
exit(KSFT_SKIP);
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment