Commit a38cb417 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'topic/ppc-kvm' into next

Merge some powerpc KVM patches we are keeping in a topic branch just in
case anyone else needs to merge them.
parents 49c1d07f 732f21a3
...@@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, ...@@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot, struct kvm_memory_slot *memslot,
unsigned long *map); unsigned long *map);
extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
unsigned long lpcr);
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask); unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
......
...@@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn); unsigned long pte_index, unsigned long avpn);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn, unsigned long pte_index, unsigned long avpn);
unsigned long va);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index); unsigned long pte_index);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
......
...@@ -2503,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100) ...@@ -2503,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100)
INT_DEFINE_BEGIN(cbe_system_error) INT_DEFINE_BEGIN(cbe_system_error)
IVEC=0x1200 IVEC=0x1200
IHSRR=1 IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_system_error) INT_DEFINE_END(cbe_system_error)
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
...@@ -2524,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100) ...@@ -2524,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100) EXC_VIRT_NONE(0x5200, 0x100)
#endif #endif
/**
* Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
* This has been removed from the ISA before 2.01, which is the earliest
* 64-bit BookS ISA supported, however the G5 / 970 implements this
* interrupt with a non-architected feature available through the support
* processor interface.
*/
INT_DEFINE_BEGIN(instruction_breakpoint) INT_DEFINE_BEGIN(instruction_breakpoint)
IVEC=0x1300 IVEC=0x1300
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1 IKVM_REAL=1
#endif #endif
INT_DEFINE_END(instruction_breakpoint) INT_DEFINE_END(instruction_breakpoint)
...@@ -2674,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common) ...@@ -2674,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
INT_DEFINE_BEGIN(cbe_maintenance) INT_DEFINE_BEGIN(cbe_maintenance)
IVEC=0x1600 IVEC=0x1600
IHSRR=1 IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_maintenance) INT_DEFINE_END(cbe_maintenance)
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
...@@ -2727,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common) ...@@ -2727,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
INT_DEFINE_BEGIN(cbe_thermal) INT_DEFINE_BEGIN(cbe_thermal)
IVEC=0x1800 IVEC=0x1800
IHSRR=1 IHSRR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(cbe_thermal) INT_DEFINE_END(cbe_thermal)
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
......
...@@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, ...@@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawrx1 = value2; vcpu->arch.dawrx1 = value2;
return H_SUCCESS; return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/* KVM does not support mflags=2 (AIL=2) */ /*
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
if (mflags != 0 && mflags != 3) if (mflags != 0 && mflags != 3)
return H_UNSUPPORTED_FLAG_START; return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD; return H_TOO_HARD;
...@@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, ...@@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
/*
* Enforce limits on guest LPCR values based on hardware availability,
* guest configuration, and possibly hypervisor support and security
* concerns.
*/
unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
{
/* LPCR_TC only applies to HPT guests */
if (kvm_is_radix(kvm))
lpcr &= ~LPCR_TC;
/* On POWER8 and above, userspace can modify AIL */
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
/*
* On POWER9, allow userspace to enable large decrementer for the
* guest, whether or not the host has it enabled.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_300))
lpcr &= ~LPCR_LD;
return lpcr;
}
static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
{
if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
}
}
static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
bool preserve_top32) bool preserve_top32)
{ {
...@@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, ...@@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
u64 mask; u64 mask;
spin_lock(&vc->lock); spin_lock(&vc->lock);
/*
* Userspace can only modify
* DPFD (default prefetch depth), ILE (interrupt little-endian),
* TC (translation control), AIL (alternate interrupt location),
* LD (large decrementer).
* These are subject to restrictions from kvmppc_filter_lcpr_hv().
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
mask &= 0xFFFFFFFF;
new_lpcr = kvmppc_filter_lpcr_hv(kvm,
(vc->lpcr & ~mask) | (new_lpcr & mask));
/* /*
* If ILE (interrupt little-endian) has changed, update the * If ILE (interrupt little-endian) has changed, update the
* MSR_LE bit in the intr_msr for each vcpu in this vcore. * MSR_LE bit in the intr_msr for each vcpu in this vcore.
...@@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, ...@@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
} }
} }
/* vc->lpcr = new_lpcr;
* Userspace can only modify DPFD (default prefetch depth),
* ILE (interrupt little-endian) and TC (translation control).
* On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
* guest, whether or not the host has it enabled.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
mask |= LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
mask &= 0xFFFFFFFF;
vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
spin_unlock(&vc->lock); spin_unlock(&vc->lock);
} }
...@@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, ...@@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb; vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1; vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1; vcpu->arch.thread_cpu = -1;
/* Save guest CTRL register, set runlatch to 1 */
vcpu->arch.ctrl = mfspr(SPRN_CTRLF); vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB); vcpu->arch.pspb = mfspr(SPRN_PSPB);
...@@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, ...@@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DSCR, host_dscr); mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr); mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr); mtspr(SPRN_IAMR, host_iamr);
mtspr(SPRN_PSPB, 0);
if (host_amr != vcpu->arch.amr) if (host_amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_amr); mtspr(SPRN_AMR, host_amr);
...@@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) ...@@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
struct kvmppc_vcore *vc = kvm->arch.vcores[i]; struct kvmppc_vcore *vc = kvm->arch.vcores[i];
if (!vc) if (!vc)
continue; continue;
spin_lock(&vc->lock); spin_lock(&vc->lock);
vc->lpcr = (vc->lpcr & ~mask) | lpcr; vc->lpcr = (vc->lpcr & ~mask) | lpcr;
verify_lpcr(kvm, vc->lpcr);
spin_unlock(&vc->lock); spin_unlock(&vc->lock);
if (++cores_done >= kvm->arch.online_vcores) if (++cores_done >= kvm->arch.online_vcores)
break; break;
...@@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) ...@@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvmppc_setup_partition_table(kvm); kvmppc_setup_partition_table(kvm);
} }
verify_lpcr(kvm, lpcr);
kvm->arch.lpcr = lpcr; kvm->arch.lpcr = lpcr;
/* Initialization for future HPT resizes */ /* Initialization for future HPT resizes */
...@@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = { ...@@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = {
H_READ, H_READ,
H_PROTECT, H_PROTECT,
H_BULK_REMOVE, H_BULK_REMOVE,
#ifdef CONFIG_SPAPR_TCE_IOMMU
H_GET_TCE, H_GET_TCE,
H_PUT_TCE, H_PUT_TCE,
#endif
H_SET_DABR, H_SET_DABR,
H_SET_XDABR, H_SET_XDABR,
H_CEDE, H_CEDE,
......
...@@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) ...@@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
{ {
/* Guest must always run with ME enabled, HV disabled. */
msr = (msr | MSR_ME) & ~MSR_HV;
/* /*
* Check for illegal transactional state bit combination * Check for illegal transactional state bit combination
* and if we find it, force the TS field to a safe state. * and if we find it, force the TS field to a safe state.
......
...@@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, ...@@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
} }
} }
/*
* This can result in some L0 HV register state being leaked to an L1
* hypervisor when the hv_guest_state is copied back to the guest after
* being modified here.
*
* There is no known problem with such a leak, and in many cases these
* register settings could be derived by the guest by observing behaviour
* and timing, interrupts, etc., but it is an issue to consider.
*/
static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{ {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 mask;
/*
* Don't let L1 change LPCR bits for the L2 except these:
*/
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
LPCR_LPES | LPCR_MER;
/*
* Additional filtering is required depending on hardware
* and configuration.
*/
hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
(vc->lpcr & ~mask) | (hr->lpcr & mask));
/* /*
* Don't let L1 enable features for L2 which we've disabled for L1, * Don't let L1 enable features for L2 which we've disabled for L1,
* but preserve the interrupt cause field. * but preserve the interrupt cause field.
...@@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) ...@@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
u64 hv_ptr, regs_ptr; u64 hv_ptr, regs_ptr;
u64 hdec_exp; u64 hdec_exp;
s64 delta_purr, delta_spurr, delta_ic, delta_vtb; s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
u64 mask;
unsigned long lpcr;
if (vcpu->kvm->arch.l1_ptcr == 0) if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE; return H_NOT_AVAILABLE;
...@@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) ...@@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.nested = l2; vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
vcpu->arch.regs = l2_regs; vcpu->arch.regs = l2_regs;
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | /* Guest must always run with ME enabled, HV disabled. */
LPCR_LPES | LPCR_MER; vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
sanitise_hv_regs(vcpu, &l2_hv); sanitise_hv_regs(vcpu, &l2_hv);
restore_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv);
...@@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) ...@@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
r = RESUME_HOST; r = RESUME_HOST;
break; break;
} }
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
} while (is_kvmppc_resume_guest(r)); } while (is_kvmppc_resume_guest(r));
/* save L2 state for return */ /* save L2 state for return */
......
...@@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
} }
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn, unsigned long pte_index, unsigned long avpn)
unsigned long va)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
__be64 *hpte; __be64 *hpte;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment