Commit 89a734b5 authored by Oliver Upton's avatar Oliver Upton

Merge branch kvm-arm64/configurable-id-regs into kvmarm/next

* kvm-arm64/configurable-id-regs:
  : Configurable ID register infrastructure, courtesy of Jing Zhang
  :
  : Create generalized infrastructure for allowing userspace to select the
  : supported feature set for a VM, so long as the feature set is a subset
  : of what hardware + KVM allows. This does not add any new features that
  : are user-configurable, and instead focuses on the necessary refactoring
  : to enable future work.
  :
  : As a consequence of the series, feature asymmetry is now deliberately
  : disallowed for KVM. It is unlikely that VMMs ever configured VMs with
  : asymmetry, nor does it align with the kernel's overall stance that
  : features must be uniform across all cores in the system.
  :
  : Furthermore, KVM incorrectly advertised an IMP_DEF PMU to guests for
  : some time. Migrations from affected kernels was supported by explicitly
  : allowing such an ID register value from userspace, and forwarding that
  : along to the guest. KVM now allows an IMP_DEF PMU version to be restored
  : through the ID register interface, but reinterprets the user value as
  : not implemented (0).
  KVM: arm64: Rip out the vestiges of the 'old' ID register scheme
  KVM: arm64: Handle ID register reads using the VM-wide values
  KVM: arm64: Use generic sanitisation for ID_AA64PFR0_EL1
  KVM: arm64: Use generic sanitisation for ID_(AA64)DFR0_EL1
  KVM: arm64: Use arm64_ftr_bits to sanitise ID register writes
  KVM: arm64: Save ID registers' sanitized value per guest
  KVM: arm64: Reuse fields of sys_reg_desc for idreg
  KVM: arm64: Rewrite IMPDEF PMU version as NI
  KVM: arm64: Make vCPU feature flags consistent VM-wide
  KVM: arm64: Relax invariance of KVM_ARM_VCPU_POWER_OFF
  KVM: arm64: Separate out feature sanitisation and initialisation
Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parents acfdf34c 68667240
......@@ -918,6 +918,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
}
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
extern struct arm64_ftr_override id_aa64mmfr1_override;
......
......@@ -62,12 +62,7 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
#else
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
&kvm->arch.flags));
return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
}
#endif
......
......@@ -39,6 +39,7 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
#define KVM_VCPU_MAX_FEATURES 7
#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
......@@ -229,25 +230,23 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_MTE_ENABLED 1
/* At least one vCPU has ran in the VM */
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
/*
* The following two bits are used to indicate the guest's EL1
* register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
* bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
* Otherwise, the guest's EL1 register width has not yet been
* determined yet.
*/
#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* The vCPU feature set for the VM is configured */
#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
/* VM counter offset */
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
/* Timer PPIs made immutable */
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
/* SMCCC filter initialized for the VM */
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
/* Initial ID reg values loaded */
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
unsigned long flags;
/* VM-wide vCPU feature set */
DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
......@@ -257,17 +256,23 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
u8 pfr0_csv2;
u8 pfr0_csv3;
struct {
u8 imp:4;
u8 unimp:4;
} dfr0_pmuver;
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
struct maple_tree smccc_filter;
/*
* Emulated CPU ID registers per VM
* (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
* is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
*
* These emulated idregs are VM-wide, but accessed from the context of a vCPU.
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
*/
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
u64 id_regs[KVM_ARM_ID_REG_NUM];
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
* the associated pKVM instance in the hypervisor.
......
......@@ -800,7 +800,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
return reg;
}
static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
s64 cur)
{
s64 ret = 0;
......
......@@ -121,22 +121,6 @@ static int kvm_arm_default_max_vcpus(void)
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
}
static void set_default_spectre(struct kvm *kvm)
{
/*
* The default is to expose CSV2 == 1 if the HW isn't affected.
* Although this is a per-CPU feature, we make it global because
* asymmetric systems are just a nuisance.
*
* Userspace can override this as long as it doesn't promise
* the impossible.
*/
if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
kvm->arch.pfr0_csv2 = 1;
if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
kvm->arch.pfr0_csv3 = 1;
}
/**
* kvm_arch_init_vm - initializes a VM data structure
* @kvm: pointer to the KVM struct
......@@ -180,14 +164,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->max_vcpus = kvm_arm_default_max_vcpus();
set_default_spectre(kvm);
kvm_arm_init_hypercalls(kvm);
/*
* Initialise the default PMUver before there is a chance to
* create an actual PMU.
*/
kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES);
return 0;
......@@ -1195,58 +1174,115 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return -EINVAL;
}
static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
unsigned int i, ret;
u32 phys_target = kvm_target_cpu();
unsigned long features = init->features[0];
int i;
if (features & ~KVM_VCPU_VALID_FEATURES)
return -ENOENT;
for (i = 1; i < ARRAY_SIZE(init->features); i++) {
if (init->features[i])
return -ENOENT;
}
if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
return 0;
if (init->target != phys_target)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
return -EINVAL;
/*
* Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
* use the same target.
*/
if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
/* MTE is incompatible with AArch32 */
if (kvm_has_mte(vcpu->kvm))
return -EINVAL;
/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
for (i = 0; i < sizeof(init->features) * 8; i++) {
bool set = (init->features[i / 32] & (1 << (i % 32)));
/* NV is incompatible with AArch32 */
if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features))
return -EINVAL;
if (set && i >= KVM_VCPU_MAX_FEATURES)
return -ENOENT;
return 0;
}
/*
* Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
* use the same feature set.
*/
if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
test_bit(i, vcpu->arch.features) != set)
return -EINVAL;
static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
unsigned long features = init->features[0];
if (set)
set_bit(i, vcpu->arch.features);
}
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
vcpu->arch.target != init->target;
}
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
unsigned long features = init->features[0];
struct kvm *kvm = vcpu->kvm;
int ret = -EINVAL;
mutex_lock(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) &&
!bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
goto out_unlock;
vcpu->arch.target = phys_target;
vcpu->arch.target = init->target;
bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
/* Now we know what it is, we can reset it. */
ret = kvm_reset_vcpu(vcpu);
if (ret) {
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
goto out_unlock;
}
bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
int ret;
if (init->target != kvm_target_cpu())
return -EINVAL;
ret = kvm_vcpu_init_check_features(vcpu, init);
if (ret)
return ret;
if (vcpu->arch.target == -1)
return __kvm_vcpu_set_target(vcpu, init);
if (kvm_vcpu_init_changed(vcpu, init))
return -EINVAL;
return kvm_reset_vcpu(vcpu);
}
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
struct kvm_vcpu_init *init)
{
bool power_off = false;
int ret;
/*
* Treat the power-off vCPU feature as ephemeral. Clear the bit to avoid
* reflecting it in the finalized feature set, thus limiting its scope
* to a single KVM_ARM_VCPU_INIT call.
*/
if (init->features[0] & KVM_ARM_VCPU_POWER_OFF) {
init->features[0] &= ~KVM_ARM_VCPU_POWER_OFF;
power_off = true;
}
ret = kvm_vcpu_set_target(vcpu, init);
if (ret)
return ret;
......@@ -1275,7 +1311,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
*/
spin_lock(&vcpu->arch.mp_state_lock);
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
if (power_off)
__kvm_arm_vcpu_power_off(vcpu);
else
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
......
......@@ -186,57 +186,6 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
return 0;
}
/**
* kvm_set_vm_width() - set the register width for the guest
* @vcpu: Pointer to the vcpu being configured
*
* Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED
* in the VM flags based on the vcpu's requested register width, the HW
* capabilities and other options (such as MTE).
* When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be
* consistent with the value of the FLAG_EL1_32BIT bit in the flags.
*
* Return: 0 on success, negative error code on failure.
*/
static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
bool is32bit;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
lockdep_assert_held(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
/*
* The guest's register width is already configured.
* Make sure that the vcpu is consistent with it.
*/
if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags))
return 0;
return -EINVAL;
}
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return -EINVAL;
/* MTE is incompatible with AArch32 */
if (kvm_has_mte(kvm) && is32bit)
return -EINVAL;
/* NV is incompatible with AArch32 */
if (vcpu_has_nv(vcpu) && is32bit)
return -EINVAL;
if (is32bit)
set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags);
return 0;
}
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
......@@ -262,13 +211,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
bool loaded;
u32 pstate;
mutex_lock(&vcpu->kvm->arch.config_lock);
ret = kvm_set_vm_width(vcpu);
mutex_unlock(&vcpu->kvm->arch.config_lock);
if (ret)
return ret;
spin_lock(&vcpu->arch.mp_state_lock);
reset_state = vcpu->arch.reset_state;
vcpu->arch.reset_state.reset = false;
......
This diff is collapsed.
......@@ -27,6 +27,13 @@ struct sys_reg_params {
bool is_write;
};
#define encoding_to_params(reg) \
((struct sys_reg_params){ .Op0 = sys_reg_Op0(reg), \
.Op1 = sys_reg_Op1(reg), \
.CRn = sys_reg_CRn(reg), \
.CRm = sys_reg_CRm(reg), \
.Op2 = sys_reg_Op2(reg) })
#define esr_sys64_to_params(esr) \
((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3, \
.Op1 = ((esr) >> 14) & 0x7, \
......@@ -64,13 +71,16 @@ struct sys_reg_desc {
struct sys_reg_params *,
const struct sys_reg_desc *);
/* Initialization for vcpu. */
void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
/*
* Initialization for vcpu. Return initialized value, or KVM
* sanitized value for ID registers.
*/
u64 (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
/* Index into sys_reg[], or 0 if we don't need to save it. */
int reg;
/* Value (usually reset value) */
/* Value (usually reset value), or write mask for idregs */
u64 val;
/* Custom get/set_user functions, fallback to generic if NULL */
......@@ -123,19 +133,21 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
}
/* Reset functions */
static inline void reset_unknown(struct kvm_vcpu *vcpu,
static inline u64 reset_unknown(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
__vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
return __vcpu_sys_reg(vcpu, r->reg);
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
__vcpu_sys_reg(vcpu, r->reg) = r->val;
return __vcpu_sys_reg(vcpu, r->reg);
}
static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu,
......
......@@ -92,8 +92,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
/*
* Evaluates as true when emulating PMUv3p5, and false otherwise.
*/
#define kvm_pmu_is_3p5(vcpu) \
(vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5)
#define kvm_pmu_is_3p5(vcpu) ({ \
u64 val = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); \
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val); \
\
pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; \
})
u8 kvm_arm_pmu_get_pmuver_limit(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment