Commit c4edb2ba authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 5.20:

- Unwinder implementations for both nVHE modes (classic and
  protected), complete with an overflow stack

- Rework of the sysreg access from userspace, with a complete
  rewrite of the vgic-v3 view to allign with the rest of the
  infrastructure

- Disagregation of the vcpu flags in separate sets to better track
  their use model.

- A fix for the GICv2-on-v3 selftest

- A small set of cosmetic fixes
parents 63f4b210 0982c8d8
...@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params { ...@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
unsigned long vtcr; unsigned long vtcr;
}; };
/*
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
* hyp_panic() in non-protected mode.
*
* @stack_base: hyp VA of the hyp_stack base.
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
* @fp: hyp FP where the backtrace begins.
* @pc: hyp PC where the backtrace begins.
*/
struct kvm_nvhe_stacktrace_info {
unsigned long stack_base;
unsigned long overflow_stack_base;
unsigned long fp;
unsigned long pc;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */ /* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \ #define kvm_ksym_ref(ptr) \
({ \ ({ \
......
...@@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, ...@@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
vcpu_set_flag(vcpu, INCREMENT_PC);
} }
#define kvm_pend_exception(v, e) \
do { \
WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
vcpu_set_flag((v), PENDING_EXCEPTION); \
vcpu_set_flag((v), e); \
} while (0)
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
{ {
return test_bit(feature, vcpu->arch.features); return test_bit(feature, vcpu->arch.features);
......
...@@ -325,8 +325,30 @@ struct kvm_vcpu_arch { ...@@ -325,8 +325,30 @@ struct kvm_vcpu_arch {
/* Exception Information */ /* Exception Information */
struct kvm_vcpu_fault_info fault; struct kvm_vcpu_fault_info fault;
/* Miscellaneous vcpu state flags */ /* Ownership of the FP regs */
u64 flags; enum {
FP_STATE_FREE,
FP_STATE_HOST_OWNED,
FP_STATE_GUEST_OWNED,
} fp_state;
/* Configuration flags, set once and for all before the vcpu can run */
u8 cflags;
/* Input flags to the hypervisor code, potentially cleared after use */
u8 iflags;
/* State flags for kernel bookkeeping, unused by the hypervisor code */
u8 sflags;
/*
* Don't run the guest (internal implementation need).
*
* Contrary to the flags above, this is set/cleared outside of
* a vcpu context, and thus cannot be mixed with the flags
* themselves (or the flag accesses need to be made atomic).
*/
bool pause;
/* /*
* We maintain more than a single set of debug registers to support * We maintain more than a single set of debug registers to support
...@@ -376,9 +398,6 @@ struct kvm_vcpu_arch { ...@@ -376,9 +398,6 @@ struct kvm_vcpu_arch {
/* vcpu power state */ /* vcpu power state */
struct kvm_mp_state mp_state; struct kvm_mp_state mp_state;
/* Don't run the guest (internal implementation need) */
bool pause;
/* Cache some mmu pages needed inside spinlock regions */ /* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_cache;
...@@ -392,10 +411,6 @@ struct kvm_vcpu_arch { ...@@ -392,10 +411,6 @@ struct kvm_vcpu_arch {
/* Additional reset state */ /* Additional reset state */
struct vcpu_reset_state reset_state; struct vcpu_reset_state reset_state;
/* True when deferrable sysregs are loaded on the physical CPU,
* see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
bool sysregs_loaded_on_cpu;
/* Guest PV state */ /* Guest PV state */
struct { struct {
u64 last_steal; u64 last_steal;
...@@ -403,6 +418,124 @@ struct kvm_vcpu_arch { ...@@ -403,6 +418,124 @@ struct kvm_vcpu_arch {
} steal; } steal;
}; };
/*
* Each 'flag' is composed of a comma-separated triplet:
*
* - the flag-set it belongs to in the vcpu->arch structure
* - the value for that flag
* - the mask for that flag
*
* __vcpu_single_flag() builds such a triplet for a single-bit flag.
* unpack_vcpu_flag() extract the flag value from the triplet for
* direct use outside of the flag accessors.
*/
#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
#define __unpack_flag(_set, _f, _m) _f
#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
#define __build_check_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *_fset; \
\
/* Check that the flags fit in the mask */ \
BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
/* Check that the flags fit in the type */ \
BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
} while (0)
#define __vcpu_get_flag(v, flagset, f, m) \
({ \
__build_check_flag(v, flagset, f, m); \
\
v->arch.flagset & (m); \
})
#define __vcpu_set_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
\
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
if (HWEIGHT(m) > 1) \
*fset &= ~(m); \
*fset |= (f); \
} while (0)
#define __vcpu_clear_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
\
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
*fset &= ~(m); \
} while (0)
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
/* SVE exposed to guest */
#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
/* PTRAUTH exposed to guest */
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
/*
* PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
* be set together with an exception...
*/
#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
/* Target EL/MODE (not a single flag, but let's abuse the macro) */
#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
/* Helpers to encode exceptions with minimum fuss */
#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
/*
* When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
* values:
*
* For AArch32 EL1:
*/
#define EXCEPT_AA32_UND __vcpu_except_flags(0)
#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
/* For AArch64: */
#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
/* For AArch64 with NV (one day): */
#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
/* Guest debug is live */
#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
/* Save SPE context if active */
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
/* SME enabled for EL0 */
#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
/* Physical CPU not in supported_cpus */
#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
/* WFIT instruction trapped */
#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
/* vcpu system registers loaded on physical CPU */
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
sve_ffr_offset((vcpu)->arch.sve_max_vl)) sve_ffr_offset((vcpu)->arch.sve_max_vl))
...@@ -423,70 +556,31 @@ struct kvm_vcpu_arch { ...@@ -423,70 +556,31 @@ struct kvm_vcpu_arch {
__size_ret; \ __size_ret; \
}) })
/* vcpu_arch flags field values: */
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
/*
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
* set together with an exception...
*/
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
/*
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
* take the following values:
*
* For AArch32 EL1:
*/
#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
/* For AArch64: */
#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \ KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \ KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP) KVM_GUESTDBG_SINGLESTEP)
#define vcpu_has_sve(vcpu) (system_supports_sve() && \ #define vcpu_has_sve(vcpu) (system_supports_sve() && \
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) vcpu_get_flag(vcpu, GUEST_HAS_SVE))
#ifdef CONFIG_ARM64_PTR_AUTH #ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \ #define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
(vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH) vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
#else #else
#define vcpu_has_ptrauth(vcpu) false #define vcpu_has_ptrauth(vcpu) false
#endif #endif
#define vcpu_on_unsupported_cpu(vcpu) \ #define vcpu_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU) vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \ #define vcpu_set_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU) vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \ #define vcpu_clear_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU) vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
...@@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); ...@@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events); struct kvm_vcpu_events *events);
...@@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu); ...@@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \ #define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) \ #define kvm_has_mte(kvm) \
(system_supports_mte() && \ (system_supports_mte() && \
......
...@@ -113,6 +113,14 @@ ...@@ -113,6 +113,14 @@
#define OVERFLOW_STACK_SIZE SZ_4K #define OVERFLOW_STACK_SIZE SZ_4K
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
/* /*
* Alignment of kernel segments (e.g. .text, .data). * Alignment of kernel segments (e.g. .text, .data).
* *
......
...@@ -8,52 +8,20 @@ ...@@ -8,52 +8,20 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/types.h>
#include <linux/llist.h> #include <linux/llist.h>
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/sdei.h> #include <asm/sdei.h>
enum stack_type { #include <asm/stacktrace/common.h>
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl); const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline bool on_irq_stack(unsigned long sp, unsigned long size, static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info) struct stack_info *info)
{ {
...@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size, ...@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; } struct stack_info *info) { return false; }
#endif #endif
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static inline bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
}
#endif /* __ASM_STACKTRACE_H */ #endif /* __ASM_STACKTRACE_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common arm64 stack unwinder code.
*
* To implement a new arm64 stack unwinder:
* 1) Include this header
*
* 2) Call into unwind_next_common() from your top level unwind
* function, passing it the validation and translation callbacks
* (though the later can be NULL if no translation is required).
*
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/kprobes.h>
#include <linux/types.h>
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
STACK_TYPE_HYP,
__NR_STACK_TYPES
};
struct stack_info {
unsigned long low;
unsigned long high;
enum stack_type type;
};
/*
* A snapshot of a frame record or fp/lr register values, along with some
* accounting information necessary for robust unwinding.
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
* @stacks_done: Stacks which have been entirely unwound, for which it is no
* longer valid to unwind to.
*
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
* of 0. This is used to ensure that within a stack, each
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
*
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
*
* @task: The task being unwound.
*/
struct unwind_state {
unsigned long fp;
unsigned long pc;
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
struct task_struct *task;
};
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
info->low = low;
info->high = high;
info->type = type;
}
return true;
}
static inline void unwind_init_common(struct unwind_state *state,
struct task_struct *task)
{
state->task = task;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
/*
* Prime the first unwind.
*
* In unwind_next() we'll check that the FP points to a valid stack,
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
* treated as a transition to whichever stack that happens to be. The
* prev_fp value won't be used, but we set it to 0 such that it is
* definitely not an accessible stack address.
*/
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
}
/*
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
* a kernel address.
*
* @fp: the frame pointer to be updated to its kernel address.
* @type: the stack type associated with frame pointer @fp
*
* Returns true and success and @fp is updated to the corresponding
* kernel virtual address; otherwise returns false.
*/
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
enum stack_type type);
/*
* on_accessible_stack_fn() - Check whether a stack range is on any
* of the possible stacks.
*
* @tsk: task whose stack is being unwound
* @sp: stack address being checked
* @size: size of the stack range being checked
* @info: stack unwinding context
*/
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info);
static inline int unwind_next_common(struct unwind_state *state,
struct stack_info *info,
on_accessible_stack_fn accessible,
stack_trace_translate_fp_fn translate_fp)
{
unsigned long fp = state->fp, kern_fp = fp;
struct task_struct *tsk = state->task;
if (fp & 0x7)
return -EINVAL;
if (!accessible(tsk, fp, 16, info))
return -EINVAL;
if (test_bit(info->type, state->stacks_done))
return -EINVAL;
/*
* If fp is not from the current address space perform the necessary
* translation before dereferencing it to get the next fp.
*/
if (translate_fp && !translate_fp(&kern_fp, info->type))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
* HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info->type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
__set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
state->prev_fp = fp;
state->prev_type = info->type;
return 0;
}
#endif /* __ASM_STACKTRACE_COMMON_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#ifndef __ASM_STACKTRACE_NVHE_H
#define __ASM_STACKTRACE_NVHE_H
#include <asm/stacktrace/common.h>
/*
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
*
* @state : unwind_state to initialize
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*/
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
unwind_init_common(state, NULL);
state->fp = fp;
state->pc = pc;
}
#ifndef __KVM_NVHE_HYPERVISOR__
/*
* Conventional (non-protected) nVHE HYP stack unwinder
*
* In non-protected mode, the unwinding is done from kernel proper context
* (by the host in EL1).
*/
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
#endif /* __KVM_NVHE_HYPERVISOR__ */
#endif /* __ASM_STACKTRACE_NVHE_H */
...@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) ...@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector CFLAGS_syscall.o += -fno-stack-protector
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
# can significantly impact performance. Avoid instrumenting the stack trace
# collection code to minimize this impact.
KASAN_SANITIZE_stacktrace.o := n
# It's not safe to invoke KCOV when portions of the kernel environment aren't # It's not safe to invoke KCOV when portions of the kernel environment aren't
# available or are out-of-sync with HW state. Since `noinstr` doesn't always # available or are out-of-sync with HW state. Since `noinstr` doesn't always
# inhibit KCOV instrumentation, disable it for the entire compilation unit. # inhibit KCOV instrumentation, disable it for the entire compilation unit.
......
...@@ -7,72 +7,90 @@ ...@@ -7,72 +7,90 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h> #include <asm/stack_pointer.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
/* /*
* A snapshot of a frame record or fp/lr register values, along with some * Start an unwind from a pt_regs.
* accounting information necessary for robust unwinding.
* *
* @fp: The fp value in the frame record (or the real fp) * The unwind will begin at the PC within the regs.
* @pc: The lr value in the frame record (or the real lr)
* *
* @stacks_done: Stacks which have been entirely unwound, for which it is no * The regs must be on a stack currently owned by the calling task.
* longer valid to unwind to. */
static inline void unwind_init_from_regs(struct unwind_state *state,
struct pt_regs *regs)
{
unwind_init_common(state, current);
state->fp = regs->regs[29];
state->pc = regs->pc;
}
/*
* Start an unwind from a caller.
* *
* @prev_fp: The fp that pointed to this frame record, or a synthetic value * The unwind will begin at the caller of whichever function this is inlined
* of 0. This is used to ensure that within a stack, each * into.
* subsequent frame record is at an increasing address.
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
* *
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * The function which invokes this must be noinline.
* associated with the most recently encountered replacement lr
* value.
*/ */
struct unwind_state { static __always_inline void unwind_init_from_caller(struct unwind_state *state)
unsigned long fp; {
unsigned long pc; unwind_init_common(state, current);
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp; state->fp = (unsigned long)__builtin_frame_address(1);
enum stack_type prev_type; state->pc = (unsigned long)__builtin_return_address(0);
#ifdef CONFIG_KRETPROBES }
struct llist_node *kr_cur;
#endif
};
static notrace void unwind_init(struct unwind_state *state, unsigned long fp, /*
unsigned long pc) * Start an unwind from a blocked task.
*
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
* cpu_switch_to()).
*
* The caller should ensure the task is blocked in cpu_switch_to() for the
* duration of the unwind, or the unwind will be bogus. It is never valid to
* call this for the current task.
*/
static inline void unwind_init_from_task(struct unwind_state *state,
struct task_struct *task)
{ {
state->fp = fp; unwind_init_common(state, task);
state->pc = pc;
#ifdef CONFIG_KRETPROBES state->fp = thread_saved_fp(task);
state->kr_cur = NULL; state->pc = thread_saved_pc(task);
#endif }
/* /*
* Prime the first unwind. * We can only safely access per-cpu stacks from current in a non-preemptible
* * context.
* In unwind_next() we'll check that the FP points to a valid stack, */
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be static bool on_accessible_stack(const struct task_struct *tsk,
* treated as a transition to whichever stack that happens to be. The unsigned long sp, unsigned long size,
* prev_fp value won't be used, but we set it to 0 such that it is struct stack_info *info)
* definitely not an accessible stack address. {
*/ if (info)
bitmap_zero(state->stacks_done, __NR_STACK_TYPES); info->type = STACK_TYPE_UNKNOWN;
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN; if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp, size, info))
return true;
if (on_overflow_stack(sp, size, info))
return true;
if (on_sdei_stack(sp, size, info))
return true;
return false;
} }
NOKPROBE_SYMBOL(unwind_init);
/* /*
* Unwind from one frame record (A) to the next frame record (B). * Unwind from one frame record (A) to the next frame record (B).
...@@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init); ...@@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A * records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B. * and the location (but not the fp value) of B.
*/ */
static int notrace unwind_next(struct task_struct *tsk, static int notrace unwind_next(struct unwind_state *state)
struct unwind_state *state)
{ {
struct task_struct *tsk = state->task;
unsigned long fp = state->fp; unsigned long fp = state->fp;
struct stack_info info; struct stack_info info;
int err;
/* Final frame; nothing to unwind */ /* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT; return -ENOENT;
if (fp & 0x7) err = unwind_next_common(state, &info, on_accessible_stack, NULL);
return -EINVAL; if (err)
return err;
if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
if (test_bit(info.type, state->stacks_done))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info.type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
state->prev_type = info.type;
state->pc = ptrauth_strip_insn_pac(state->pc); state->pc = ptrauth_strip_insn_pac(state->pc);
...@@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk, ...@@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk,
} }
NOKPROBE_SYMBOL(unwind_next); NOKPROBE_SYMBOL(unwind_next);
static void notrace unwind(struct task_struct *tsk, static void notrace unwind(struct unwind_state *state,
struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie) stack_trace_consume_fn consume_entry, void *cookie)
{ {
while (1) { while (1) {
...@@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk, ...@@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk,
if (!consume_entry(cookie, state->pc)) if (!consume_entry(cookie, state->pc))
break; break;
ret = unwind_next(tsk, state); ret = unwind_next(state);
if (ret < 0) if (ret < 0)
break; break;
} }
...@@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, ...@@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
{ {
struct unwind_state state; struct unwind_state state;
if (regs) if (regs) {
unwind_init(&state, regs->regs[29], regs->pc); if (task != current)
else if (task == current) return;
unwind_init(&state, unwind_init_from_regs(&state, regs);
(unsigned long)__builtin_frame_address(1), } else if (task == current) {
(unsigned long)__builtin_return_address(0)); unwind_init_from_caller(&state);
else } else {
unwind_init(&state, thread_saved_fp(task), unwind_init_from_task(&state, task);
thread_saved_pc(task)); }
unwind(task, &state, consume_entry, cookie); unwind(&state, consume_entry, cookie);
} }
...@@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG ...@@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
If unsure, say N. If unsure, say N.
config PROTECTED_NVHE_STACKTRACE
bool "Protected KVM hypervisor stacktraces"
depends on NVHE_EL2_DEBUG
default n
help
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
If using protected nVHE mode, but cannot afford the associated
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
say N.
If unsure, or not using protected nVHE (pKVM), say N.
endif # VIRTUALIZATION endif # VIRTUALIZATION
...@@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/ ...@@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \ inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \ arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \ vgic/vgic.o vgic/vgic-init.o \
......
...@@ -242,7 +242,7 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) ...@@ -242,7 +242,7 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
{ {
return (cpus_have_final_cap(ARM64_HAS_WFXT) && return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
(vcpu->arch.flags & KVM_ARM64_WFIT)); vcpu_get_flag(vcpu, IN_WFIT));
} }
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
......
...@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); ...@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
...@@ -330,6 +330,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -330,6 +330,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/*
* Default value for the FP state, will be overloaded at load
* time if we support FP (pretty likely)
*/
vcpu->arch.fp_state = FP_STATE_FREE;
/* Set up the timer */ /* Set up the timer */
kvm_timer_vcpu_init(vcpu); kvm_timer_vcpu_init(vcpu);
...@@ -659,7 +665,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) ...@@ -659,7 +665,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable(); preempt_enable();
kvm_vcpu_halt(vcpu); kvm_vcpu_halt(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_WFIT; vcpu_clear_flag(vcpu, IN_WFIT);
kvm_clear_request(KVM_REQ_UNHALT, vcpu); kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable(); preempt_disable();
...@@ -1015,8 +1021,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -1015,8 +1021,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* the vcpu state. Note that this relies on __kvm_adjust_pc() * the vcpu state. Note that this relies on __kvm_adjust_pc()
* being preempt-safe on VHE. * being preempt-safe on VHE.
*/ */
if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION | if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) ||
KVM_ARM64_INCREMENT_PC))) vcpu_get_flag(vcpu, INCREMENT_PC)))
kvm_call_hyp(__kvm_adjust_pc, vcpu); kvm_call_hyp(__kvm_adjust_pc, vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
...@@ -1414,18 +1420,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, ...@@ -1414,18 +1420,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr) struct kvm_arm_device_addr *dev_addr)
{ {
unsigned long dev_id, type; switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) {
dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
KVM_ARM_DEVICE_ID_SHIFT;
type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
KVM_ARM_DEVICE_TYPE_SHIFT;
switch (dev_id) {
case KVM_ARM_DEVICE_VGIC_V2: case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present) if (!vgic_present)
return -ENXIO; return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr);
default: default:
return -ENODEV; return -ENODEV;
} }
......
...@@ -104,11 +104,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) ...@@ -104,11 +104,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
* Trap debug register access when one of the following is true: * Trap debug register access when one of the following is true:
* - Userspace is using the hardware to debug the guest * - Userspace is using the hardware to debug the guest
* (KVM_GUESTDBG_USE_HW is set). * (KVM_GUESTDBG_USE_HW is set).
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear). * - The guest is not using debug (DEBUG_DIRTY clear).
* - The guest has enabled the OS Lock (debug exceptions are blocked). * - The guest has enabled the OS Lock (debug exceptions are blocked).
*/ */
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) || !vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
kvm_vcpu_os_lock_enabled(vcpu)) kvm_vcpu_os_lock_enabled(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
...@@ -147,8 +147,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) ...@@ -147,8 +147,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
* debug related registers. * debug related registers.
* *
* Additionally, KVM only traps guest accesses to the debug registers if * Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY * the guest is not actively using them (see the DEBUG_DIRTY
* flag on vcpu->arch.flags). Since the guest must not interfere * flag on vcpu->arch.iflags). Since the guest must not interfere
* with the hardware state when debugging the guest, we must ensure that * with the hardware state when debugging the guest, we must ensure that
* trapping is enabled whenever we are debugging the guest using the * trapping is enabled whenever we are debugging the guest using the
* debug registers. * debug registers.
...@@ -205,9 +205,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) ...@@ -205,9 +205,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
* *
* We simply switch the debug_ptr to point to our new * We simply switch the debug_ptr to point to our new
* external_debug_state which has been populated by the * external_debug_state which has been populated by the
* debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY * debug ioctl. The existing DEBUG_DIRTY mechanism ensures
* mechanism ensures the registers are updated on the * the registers are updated on the world switch.
* world switch.
*/ */
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */ /* Enable breakpoints/watchpoints */
...@@ -216,7 +215,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) ...@@ -216,7 +215,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; vcpu_set_flag(vcpu, DEBUG_DIRTY);
trace_kvm_arm_set_regset("BKPTS", get_num_brps(), trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0], &vcpu->arch.debug_ptr->dbg_bcr[0],
...@@ -246,7 +245,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) ...@@ -246,7 +245,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
/* If KDE or MDE are set, perform a full save/restore cycle. */ /* If KDE or MDE are set, perform a full save/restore cycle. */
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; vcpu_set_flag(vcpu, DEBUG_DIRTY);
/* Write mdcr_el2 changes since vcpu_load on VHE systems */ /* Write mdcr_el2 changes since vcpu_load on VHE systems */
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
...@@ -298,16 +297,16 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) ...@@ -298,16 +297,16 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
*/ */
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) && if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT))) !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE; vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
/* Check if we have TRBE implemented and available at the host */ /* Check if we have TRBE implemented and available at the host */
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) && if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE; vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
} }
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE | vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
KVM_ARM64_DEBUG_STATE_SAVE_TRBE); vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
} }
...@@ -77,12 +77,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) ...@@ -77,12 +77,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
BUG_ON(!current->mm); BUG_ON(!current->mm);
BUG_ON(test_thread_flag(TIF_SVE)); BUG_ON(test_thread_flag(TIF_SVE));
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; if (!system_supports_fpsimd())
vcpu->arch.flags |= KVM_ARM64_FP_HOST; return;
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
/* /*
* We don't currently support SME guests but if we leave * We don't currently support SME guests but if we leave
...@@ -94,29 +96,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) ...@@ -94,29 +96,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity. * operations. Do this for ZA as well for now for simplicity.
*/ */
if (system_supports_sme()) { if (system_supports_sme()) {
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; vcpu_set_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg_s(SYS_SVCR) & if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
(SVCR_SM_MASK | SVCR_ZA_MASK)) { vcpu->arch.fp_state = FP_STATE_FREE;
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
fpsimd_save_and_flush_cpu_state(); fpsimd_save_and_flush_cpu_state();
} }
} }
} }
/* /*
* Called just before entering the guest once we are no longer * Called just before entering the guest once we are no longer preemptable
* preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM * and interrupts are disabled. If we have managed to run anything using
* mirror of the flag used by the hypervisor. * FP while we were preemptible (such as off the back of an interrupt),
* then neither the host nor the guest own the FP hardware (and it was the
* responsibility of the code that used FP to save the existing state).
*/ */
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{ {
if (test_thread_flag(TIF_FOREIGN_FPSTATE)) if (test_thread_flag(TIF_FOREIGN_FPSTATE))
vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE; vcpu->arch.fp_state = FP_STATE_FREE;
else
vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
} }
/* /*
...@@ -130,7 +131,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) ...@@ -130,7 +131,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{ {
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
/* /*
* Currently we do not support SME guests so SVCR is * Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to. * always 0 and we just need a variable to point to.
...@@ -163,7 +164,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) ...@@ -163,7 +164,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
*/ */
if (has_vhe() && system_supports_sme()) { if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */ /* Also restore EL0 state seen on entry */
if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED) if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, sysreg_clear_set(CPACR_EL1, 0,
CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL0EN |
CPACR_EL1_SMEN_EL1EN); CPACR_EL1_SMEN_EL1EN);
...@@ -173,7 +174,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) ...@@ -173,7 +174,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
CPACR_EL1_SMEN_EL1EN); CPACR_EL1_SMEN_EL1EN);
} }
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
if (vcpu_has_sve(vcpu)) { if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
...@@ -192,7 +193,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) ...@@ -192,7 +193,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* for EL0. To avoid spurious traps, restore the trap state * for EL0. To avoid spurious traps, restore the trap state
* seen by kvm_arch_vcpu_load_fp(): * seen by kvm_arch_vcpu_load_fp():
*/ */
if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
else else
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h> #include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h> #include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h> #include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h> #include <asm/traps.h>
#include <kvm/arm_hypercalls.h> #include <kvm/arm_hypercalls.h>
...@@ -120,7 +121,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) ...@@ -120,7 +121,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else { } else {
if (esr & ESR_ELx_WFx_ISS_WFxT) if (esr & ESR_ELx_WFx_ISS_WFxT)
vcpu->arch.flags |= KVM_ARM64_WFIT; vcpu_set_flag(vcpu, IN_WFIT);
kvm_vcpu_wfi(vcpu); kvm_vcpu_wfi(vcpu);
} }
...@@ -347,12 +348,15 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, ...@@ -347,12 +348,15 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
else else
kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
(void *)panic_addr); (void *)(panic_addr + kaslr_offset()));
} else { } else {
kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
(void *)panic_addr); (void *)(panic_addr + kaslr_offset()));
} }
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
/* /*
* Hyp has panicked and we're going to handle that by panicking the * Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're * kernel. The kernel offset will be revealed in the panic so we're
......
...@@ -303,14 +303,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) ...@@ -303,14 +303,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
static void kvm_inject_exception(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{ {
if (vcpu_el1_is_32bit(vcpu)) { if (vcpu_el1_is_32bit(vcpu)) {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case KVM_ARM64_EXCEPT_AA32_UND: case unpack_vcpu_flag(EXCEPT_AA32_UND):
enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
break; break;
case KVM_ARM64_EXCEPT_AA32_IABT: case unpack_vcpu_flag(EXCEPT_AA32_IABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
break; break;
case KVM_ARM64_EXCEPT_AA32_DABT: case unpack_vcpu_flag(EXCEPT_AA32_DABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
break; break;
default: default:
...@@ -318,9 +318,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) ...@@ -318,9 +318,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
break; break;
} }
} else { } else {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
KVM_ARM64_EXCEPT_AA64_EL1):
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break; break;
default: default:
...@@ -340,12 +339,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) ...@@ -340,12 +339,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
*/ */
void __kvm_adjust_pc(struct kvm_vcpu *vcpu) void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { if (vcpu_get_flag(vcpu, PENDING_EXCEPTION)) {
kvm_inject_exception(vcpu); kvm_inject_exception(vcpu);
vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
KVM_ARM64_EXCEPT_MASK); vcpu_clear_flag(vcpu, EXCEPT_MASK);
} else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { } else if (vcpu_get_flag(vcpu, INCREMENT_PC)) {
kvm_skip_instr(vcpu); kvm_skip_instr(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; vcpu_clear_flag(vcpu, INCREMENT_PC);
} }
} }
...@@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) ...@@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg; struct kvm_guest_debug_arch *guest_dbg;
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return; return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
...@@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) ...@@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg; struct kvm_guest_debug_arch *guest_dbg;
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return; return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
...@@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) ...@@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
__debug_save_state(guest_dbg, guest_ctxt); __debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt); __debug_restore_state(host_dbg, host_ctxt);
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; vcpu_clear_flag(vcpu, DEBUG_DIRTY);
} }
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
...@@ -37,22 +37,10 @@ struct kvm_exception_table_entry { ...@@ -37,22 +37,10 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table;
/* Check whether the FP regs were dirtied while in the host-side run loop: */ /* Check whether the FP regs are owned by the guest */
static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
{ {
/* return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
* When the system doesn't support FP/SIMD, we cannot rely on
* the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
* abort on the very first access to FP and thus we should never
* see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
} }
/* Save the 32-bit only FPSIMD system register state */ /* Save the 32-bit only FPSIMD system register state */
...@@ -191,10 +179,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) ...@@ -191,10 +179,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb(); isb();
/* Write out the host state if it's in the registers */ /* Write out the host state if it's in the registers */
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
__fpsimd_save_state(vcpu->arch.host_fpsimd_state); __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
/* Restore the guest state */ /* Restore the guest state */
if (sve_guest) if (sve_guest)
...@@ -206,7 +192,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) ...@@ -206,7 +192,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW)) if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
return true; return true;
} }
......
...@@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) ...@@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
} }
...@@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) ...@@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
} }
......
...@@ -12,13 +12,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include ...@@ -12,13 +12,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include
lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs)) lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-y += $(lib-objs) hyp-obj-y += $(lib-objs)
## ##
## Build rules for compiling nVHE hyp code ## Build rules for compiling nVHE hyp code
...@@ -26,9 +26,9 @@ obj-y += $(lib-objs) ...@@ -26,9 +26,9 @@ obj-y += $(lib-objs)
## file containing all nVHE hyp code and data. ## file containing all nVHE hyp code and data.
## ##
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y)) hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
obj-y := kvm_nvhe.o obj-y := kvm_nvhe.o
extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension # 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE. # avoids file name clashes for files shared with VHE.
......
...@@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1) ...@@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1)
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{ {
/* Disable and flush SPE data generation */ /* Disable and flush SPE data generation */
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE) if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
/* Disable and flush Self-Hosted Trace generation */ /* Disable and flush Self-Hosted Trace generation */
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE) if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
} }
...@@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) ...@@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE) if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE) if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
} }
......
...@@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc) ...@@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
b hyp_panic b hyp_panic
.L__hyp_sp_overflow\@: .L__hyp_sp_overflow\@:
/* /* Switch to the overflow stack */
* Reset SP to the top of the stack, to allow handling the hyp_panic. adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
* This corrupts the stack but is ok, since we won't be attempting
* any unwinding here.
*/
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
mov sp, x0
b hyp_panic_bad_stack b hyp_panic_bad_stack
ASM_BUG() ASM_BUG()
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM nVHE hypervisor stack tracing support.
*
* Copyright (C) 2022 Google LLC
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/memory.h>
#include <asm/percpu.h>
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
/*
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the information needed by the host to unwind the non-protected
* nVHE hypervisor stack in EL1.
*/
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
#include <asm/stacktrace/nvhe.h>
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
unsigned long low = high - PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack, NULL);
}
static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry,
void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
*
* @arg : index of the entry in the stacktrace buffer
* @where : the program counter corresponding to the stack frame
*
* Save the return address of a stack frame to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
{
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
int *idx = (int *)arg;
/*
* Need 2 free slots: 1 for current entry and 1 for the
* delimiter.
*/
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
return false;
stacktrace[*idx] = where;
stacktrace[++*idx] = 0UL;
return true;
}
/*
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Save the unwinded stack addresses to the shared stacktrace buffer.
* The host can access this shared buffer from EL1 to dump the backtrace.
*/
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
struct unwind_state state;
int idx = 0;
kvm_nvhe_unwind_init(&state, fp, pc);
unwind(&state, pkvm_save_backtrace_entry, &idx);
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
{
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
*
* @fp : frame pointer at which to start the unwinding.
* @pc : program counter at which to start the unwinding.
*
* Saves the information needed by the host to dump the nVHE hypervisor
* backtrace.
*/
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
{
if (is_protected_kvm_enabled())
pkvm_save_backtrace(fp, pc);
else
hyp_prepare_backtrace(fp, pc);
}
...@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); ...@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu) static void __activate_traps(struct kvm_vcpu *vcpu)
{ {
u64 val; u64 val;
...@@ -43,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ...@@ -43,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = vcpu->arch.cptr_el2; val = vcpu->arch.cptr_el2;
val |= CPTR_EL2_TTA | CPTR_EL2_TAM; val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
if (!update_fp_enabled(vcpu)) { if (!guest_owns_fp_regs(vcpu)) {
val |= CPTR_EL2_TFP | CPTR_EL2_TZ; val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
__activate_traps_fpsimd32(vcpu); __activate_traps_fpsimd32(vcpu);
} }
...@@ -123,7 +125,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) ...@@ -123,7 +125,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
} }
cptr = CPTR_EL2_DEFAULT; cptr = CPTR_EL2_DEFAULT;
if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
cptr |= CPTR_EL2_TZ; cptr |= CPTR_EL2_TZ;
if (cpus_have_final_cap(ARM64_SME)) if (cpus_have_final_cap(ARM64_SME))
cptr &= ~CPTR_EL2_TSM; cptr &= ~CPTR_EL2_TSM;
...@@ -335,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -335,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt); __sysreg_restore_state_nvhe(host_ctxt);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu); __fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu); __debug_switch_to_host(vcpu);
...@@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void) ...@@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt); __sysreg_restore_state_nvhe(host_ctxt);
} }
/* Prepare to dump kvm nvhe hyp stacktrace */
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
_THIS_IP_);
__hyp_do_panic(host_ctxt, spsr, elr, par); __hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable(); unreachable();
} }
...@@ -386,5 +392,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void) ...@@ -386,5 +392,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void)
asmlinkage void kvm_unexpected_el2_exception(void) asmlinkage void kvm_unexpected_el2_exception(void)
{ {
return __kvm_unexpected_el2_exception(); __kvm_unexpected_el2_exception();
} }
...@@ -38,9 +38,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) ...@@ -38,9 +38,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
__kvm_adjust_pc(vcpu); __kvm_adjust_pc(vcpu);
......
...@@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ...@@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM; val |= CPTR_EL2_TAM;
if (update_fp_enabled(vcpu)) { if (guest_owns_fp_regs(vcpu)) {
if (vcpu_has_sve(vcpu)) if (vcpu_has_sve(vcpu))
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else { } else {
...@@ -175,7 +175,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) ...@@ -175,7 +175,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt); sysreg_restore_host_state_vhe(host_ctxt);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu); __fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu); __debug_switch_to_host(vcpu);
...@@ -249,5 +249,5 @@ void __noreturn hyp_panic(void) ...@@ -249,5 +249,5 @@ void __noreturn hyp_panic(void)
asmlinkage void kvm_unexpected_el2_exception(void) asmlinkage void kvm_unexpected_el2_exception(void)
{ {
return __kvm_unexpected_el2_exception(); __kvm_unexpected_el2_exception();
} }
...@@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) ...@@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_user_state(guest_ctxt); __sysreg_restore_user_state(guest_ctxt);
__sysreg_restore_el1_state(guest_ctxt); __sysreg_restore_el1_state(guest_ctxt);
vcpu->arch.sysregs_loaded_on_cpu = true; vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
activate_traps_vhe_load(vcpu); activate_traps_vhe_load(vcpu);
} }
...@@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) ...@@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
/* Restore host user state */ /* Restore host user state */
__sysreg_restore_user_state(host_ctxt); __sysreg_restore_user_state(host_ctxt);
vcpu->arch.sysregs_loaded_on_cpu = false; vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
} }
...@@ -20,9 +20,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr ...@@ -20,9 +20,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0; u64 esr = 0;
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1); vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
...@@ -52,9 +50,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) ...@@ -52,9 +50,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
{ {
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
/* /*
* Build an unknown exception, depending on the instruction * Build an unknown exception, depending on the instruction
...@@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) ...@@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
static void inject_undef32(struct kvm_vcpu *vcpu) static void inject_undef32(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | kvm_pend_exception(vcpu, EXCEPT_AA32_UND);
KVM_ARM64_PENDING_EXCEPTION);
} }
/* /*
...@@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) ...@@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
far = vcpu_read_sys_reg(vcpu, FAR_EL1); far = vcpu_read_sys_reg(vcpu, FAR_EL1);
if (is_pabt) { if (is_pabt) {
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | kvm_pend_exception(vcpu, EXCEPT_AA32_IABT);
KVM_ARM64_PENDING_EXCEPTION);
far &= GENMASK(31, 0); far &= GENMASK(31, 0);
far |= (u64)addr << 32; far |= (u64)addr << 32;
vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
} else { /* !iabt */ } else { /* !iabt */
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | kvm_pend_exception(vcpu, EXCEPT_AA32_DABT);
KVM_ARM64_PENDING_EXCEPTION);
far &= GENMASK(63, 32); far &= GENMASK(63, 32);
far |= addr; far |= addr;
vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
......
...@@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) ...@@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
* kvm_arm_vcpu_finalize(), which freezes the configuration. * kvm_arm_vcpu_finalize(), which freezes the configuration.
*/ */
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; vcpu_set_flag(vcpu, GUEST_HAS_SVE);
return 0; return 0;
} }
...@@ -120,7 +120,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) ...@@ -120,7 +120,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
} }
vcpu->arch.sve_state = buf; vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
return 0; return 0;
} }
...@@ -177,7 +177,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) ...@@ -177,7 +177,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
!system_has_full_ptr_auth()) !system_has_full_ptr_auth())
return -EINVAL; return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
return 0; return 0;
} }
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM nVHE hypervisor stack tracing support.
*
* The unwinder implementation depends on the nVHE mode:
*
* 1) Non-protected nVHE mode - the host can directly access the
* HYP stack pages and unwind the HYP stack in EL1. This saves having
* to allocate shared buffers for the host to read the unwinded
* stacktrace.
*
* 2) pKVM (protected nVHE) mode - the host cannot directly access
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
* buffer where the host can read and print the stacktrace.
*
* Copyright (C) 2022 Google LLC
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/stacktrace/nvhe.h>
/*
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
*
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
* allow for guard pages below the stack. Consequently, the fixed offset address
* translation macros won't work here.
*
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
* stack base.
*
* Returns true on success and updates @addr to its corresponding kernel VA;
* otherwise returns false.
*/
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
enum stack_type type)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
unsigned long hyp_base, kern_base, hyp_offset;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
switch (type) {
case STACK_TYPE_HYP:
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
hyp_base = (unsigned long)stacktrace_info->stack_base;
break;
case STACK_TYPE_OVERFLOW:
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
break;
default:
return false;
}
hyp_offset = *addr - hyp_base;
*addr = kern_base + hyp_offset;
return true;
}
static bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
unsigned long high = low + OVERFLOW_STACK_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
static bool on_hyp_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->stack_base;
unsigned long high = low + PAGE_SIZE;
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}
static bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
return (on_overflow_stack(sp, size, info) ||
on_hyp_stack(sp, size, info));
}
static int unwind_next(struct unwind_state *state)
{
struct stack_info info;
return unwind_next_common(state, &info, on_accessible_stack,
kvm_nvhe_stack_kern_va);
}
static void unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
int ret;
if (!consume_entry(cookie, state->pc))
break;
ret = unwind_next(state);
if (ret < 0)
break;
}
}
/*
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
*
* @arg : the hypervisor offset, used for address translation
* @where : the program counter corresponding to the stack frame
*/
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
{
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
unsigned long hyp_offset = (unsigned long)arg;
/* Mask tags and convert to kern addr */
where = (where & va_mask) + hyp_offset;
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
return true;
}
static void kvm_nvhe_dump_backtrace_start(void)
{
kvm_err("nVHE call trace:\n");
}
static void kvm_nvhe_dump_backtrace_end(void)
{
kvm_err("---[ end nVHE call trace ]---\n");
}
/*
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* The host can directly access HYP stack pages in non-protected
* mode, so the unwinding is done directly from EL1. This removes
* the need for shared buffers between host and hypervisor for
* the stacktrace.
*/
static void hyp_dump_backtrace(unsigned long hyp_offset)
{
struct kvm_nvhe_stacktrace_info *stacktrace_info;
struct unwind_state state;
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
kvm_nvhe_dump_backtrace_start();
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
kvm_nvhe_dump_backtrace_end();
}
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
pkvm_stacktrace);
/*
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*
* Dumping of the pKVM HYP backtrace is done by reading the
* stack addresses from the shared stacktrace buffer, since the
* host cannot directly access hypervisor memory in protected
* mode.
*/
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
unsigned long *stacktrace
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
int i;
kvm_nvhe_dump_backtrace_start();
/* The saved stacktrace is terminated by a null entry */
for (i = 0;
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
i++)
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
kvm_nvhe_dump_backtrace_end();
}
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
static void pkvm_dump_backtrace(unsigned long hyp_offset)
{
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
}
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
/*
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
*
* @hyp_offset: hypervisor offset, used for address translation.
*/
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
{
if (is_protected_kvm_enabled())
pkvm_dump_backtrace(hyp_offset);
else
hyp_dump_backtrace(hyp_offset);
}
This diff is collapsed.
...@@ -75,9 +75,9 @@ struct sys_reg_desc { ...@@ -75,9 +75,9 @@ struct sys_reg_desc {
/* Custom get/set_user functions, fallback to generic if NULL */ /* Custom get/set_user functions, fallback to generic if NULL */
int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr); u64 *val);
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr); u64 val);
/* Return mask of REG_* runtime visibility overrides */ /* Return mask of REG_* runtime visibility overrides */
unsigned int (*visibility)(const struct kvm_vcpu *vcpu, unsigned int (*visibility)(const struct kvm_vcpu *vcpu,
...@@ -190,10 +190,16 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], ...@@ -190,10 +190,16 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
} }
const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc *get_reg_by_id(u64 id,
struct sys_reg_params *params, const struct sys_reg_desc table[],
const struct sys_reg_desc table[], unsigned int num);
unsigned int num);
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
const struct sys_reg_desc table[], unsigned int num);
int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
const struct sys_reg_desc table[], unsigned int num);
#define AA32(_x) .aarch32_map = AA32_##_x #define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x #define Op0(_x) .Op0 = _x
......
This diff is collapsed.
This diff is collapsed.
...@@ -986,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) ...@@ -986,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
iodev.base_addr = 0; iodev.base_addr = 0;
break; break;
} }
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
u64 reg, id; return vgic_v3_has_cpu_sysregs_attr(vcpu, attr);
id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
}
default: default:
return -ENXIO; return -ENXIO;
} }
...@@ -1158,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, ...@@ -1158,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
} }
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u64 *val) u32 intid, u32 *val)
{ {
if (intid % 32) if (intid % 32)
return -EINVAL; return -EINVAL;
......
...@@ -775,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, ...@@ -775,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
} }
} }
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
{ {
int i; int i;
u64 val = 0; u32 val = 0;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
for (i = 0; i < 32; i++) { for (i = 0; i < 32; i++) {
...@@ -798,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) ...@@ -798,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
} }
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u64 val) const u32 val)
{ {
int i; int i;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
......
...@@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, ...@@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
bool is_write, int offset, u32 *val); bool is_write, int offset, u32 *val);
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u64 val); const u32 val);
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
......
...@@ -245,12 +245,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, ...@@ -245,12 +245,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val); int offset, u32 *val);
int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val); int offset, u32 *val);
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
u64 id, u64 *val); struct kvm_device_attr *attr, bool is_write);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
u64 *reg);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u64 *val); u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type); int kvm_register_vgic_device(unsigned long type);
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
......
...@@ -364,7 +364,7 @@ struct vgic_cpu { ...@@ -364,7 +364,7 @@ struct vgic_cpu {
extern struct static_key_false vgic_v2_cpuif_trap; extern struct static_key_false vgic_v2_cpuif_trap;
extern struct static_key_false vgic_v3_cpuif_trap; extern struct static_key_false vgic_v3_cpuif_trap;
int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm); void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type);
......
...@@ -663,7 +663,7 @@ int test_kvm_device(uint32_t gic_dev_type) ...@@ -663,7 +663,7 @@ int test_kvm_device(uint32_t gic_dev_type)
if (!__kvm_test_create_device(v.vm, other)) { if (!__kvm_test_create_device(v.vm, other)) {
ret = __kvm_test_create_device(v.vm, other); ret = __kvm_test_create_device(v.vm, other);
TEST_ASSERT(ret && errno == EINVAL, TEST_ASSERT(ret && (errno == EINVAL || errno == EEXIST),
"create GIC device while other version exists"); "create GIC device while other version exists");
} }
...@@ -691,6 +691,7 @@ int main(int ac, char **av) ...@@ -691,6 +691,7 @@ int main(int ac, char **av)
{ {
int ret; int ret;
int pa_bits; int pa_bits;
int cnt_impl = 0;
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits; max_phys_size = 1ULL << pa_bits;
...@@ -699,13 +700,19 @@ int main(int ac, char **av) ...@@ -699,13 +700,19 @@ int main(int ac, char **av)
if (!ret) { if (!ret) {
pr_info("Running GIC_v3 tests.\n"); pr_info("Running GIC_v3 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V3); run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
return 0; cnt_impl++;
} }
ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2); ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
__TEST_REQUIRE(!ret, "No GICv2 nor GICv3 support"); if (!ret) {
pr_info("Running GIC_v2 tests.\n");
run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
cnt_impl++;
}
pr_info("Running GIC_v2 tests.\n"); if (!cnt_impl) {
run_tests(KVM_DEV_TYPE_ARM_VGIC_V2); print_skip("No GICv2 nor GICv3 support");
exit(KSFT_SKIP);
}
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment