Commit 5e1b59ab authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM fixes:
   - Wrong indentation in the PMU code from the merge window
   - A long-time bug occuring with running ntpd on the host, candidate
     for stable
   - Properly handle (and warn about) the unsupported configuration of
     running on systems with less than 40 bits of PA space
   - More fixes to the PM and hotplug notifier stuff from the merge
     window

  x86:
   - leak of guest xcr0 (typically shows up as SIGILL)
   - new maintainer (who is sending the pull request too)
   - fix for merge window regression
   - fix for guest CPUID"

Paolo Bonzini points out:
 "For the record, this tag is signed by me because I prepared the pull
  request.  Further pull requests for 4.6 will be signed and sent out by
  Radim directly"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: mask CPUID(0xD,0x1).EAX against host value
  kvm: x86: do not leak guest xcr0 into host interrupt handlers
  KVM: MMU: fix permission_fault()
  KVM: new maintainer on the block
  arm64: KVM: unregister notifiers in hyp mode teardown path
  arm64: KVM: Warn when PARange is less than 40 bits
  KVM: arm/arm64: Handle forward time correction gracefully
  arm64: KVM: Add braces to multi-line if statement in virtual PMU code
parents 1c74a7f8 316314ca
...@@ -6252,8 +6252,8 @@ S: Maintained ...@@ -6252,8 +6252,8 @@ S: Maintained
F: tools/testing/selftests F: tools/testing/selftests
KERNEL VIRTUAL MACHINE (KVM) KERNEL VIRTUAL MACHINE (KVM)
M: Gleb Natapov <gleb@kernel.org>
M: Paolo Bonzini <pbonzini@redhat.com> M: Paolo Bonzini <pbonzini@redhat.com>
M: Radim Krčmář <rkrcmar@redhat.com>
L: kvm@vger.kernel.org L: kvm@vger.kernel.org
W: http://www.linux-kvm.org W: http://www.linux-kvm.org
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
......
...@@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void) ...@@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void)
{ {
cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
} }
static void __init hyp_cpu_pm_exit(void)
{
cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
}
#else #else
static inline void hyp_cpu_pm_init(void) static inline void hyp_cpu_pm_init(void)
{ {
} }
static inline void hyp_cpu_pm_exit(void)
{
}
#endif #endif
static void teardown_common_resources(void) static void teardown_common_resources(void)
...@@ -1141,9 +1148,7 @@ static int init_subsystems(void) ...@@ -1141,9 +1148,7 @@ static int init_subsystems(void)
/* /*
* Register CPU Hotplug notifier * Register CPU Hotplug notifier
*/ */
cpu_notifier_register_begin(); err = register_cpu_notifier(&hyp_init_cpu_nb);
err = __register_cpu_notifier(&hyp_init_cpu_nb);
cpu_notifier_register_done();
if (err) { if (err) {
kvm_err("Cannot register KVM init CPU notifier (%d)\n", err); kvm_err("Cannot register KVM init CPU notifier (%d)\n", err);
return err; return err;
...@@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void) ...@@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void)
free_hyp_pgds(); free_hyp_pgds();
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
unregister_cpu_notifier(&hyp_init_cpu_nb);
hyp_cpu_pm_exit();
} }
static int init_vhe_mode(void) static int init_vhe_mode(void)
......
...@@ -151,8 +151,7 @@ ...@@ -151,8 +151,7 @@
*/ */
#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
VTCR_EL2_RES1)
#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
#else #else
/* /*
...@@ -163,8 +162,7 @@ ...@@ -163,8 +162,7 @@
*/ */
#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
VTCR_EL2_RES1)
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif #endif
......
...@@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void); ...@@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void); extern u32 __kvm_get_mdcr_el2(void);
extern void __init_stage2_translation(void); extern u32 __init_stage2_translation(void);
#endif #endif
......
...@@ -369,11 +369,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, ...@@ -369,11 +369,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr); struct kvm_device_attr *attr);
/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
static inline void __cpu_init_stage2(void) static inline void __cpu_init_stage2(void)
{ {
kvm_call_hyp(__init_stage2_translation); u32 parange = kvm_call_hyp(__init_stage2_translation);
WARN_ONCE(parange < 40,
"PARange is %d bits, unsupported configuration!", parange);
} }
#endif /* __ARM64_KVM_HOST_H__ */ #endif /* __ARM64_KVM_HOST_H__ */
...@@ -20,9 +20,10 @@ ...@@ -20,9 +20,10 @@
#include <asm/kvm_asm.h> #include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h> #include <asm/kvm_hyp.h>
void __hyp_text __init_stage2_translation(void) u32 __hyp_text __init_stage2_translation(void)
{ {
u64 val = VTCR_EL2_FLAGS; u64 val = VTCR_EL2_FLAGS;
u64 parange;
u64 tmp; u64 tmp;
/* /*
...@@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void) ...@@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void)
* bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
* PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
*/ */
val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; parange = read_sysreg(id_aa64mmfr0_el1) & 7;
val |= parange << 16;
/* Compute the actual PARange... */
switch (parange) {
case 0:
parange = 32;
break;
case 1:
parange = 36;
break;
case 2:
parange = 40;
break;
case 3:
parange = 42;
break;
case 4:
parange = 44;
break;
case 5:
default:
parange = 48;
break;
}
/*
* ... and clamp it to 40 bits, unless we have some braindead
* HW that implements less than that. In all cases, we'll
* return that value for the rest of the kernel to decide what
* to do.
*/
val |= 64 - (parange > 40 ? 40 : parange);
/* /*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
...@@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void) ...@@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void)
VTCR_EL2_VS_8BIT; VTCR_EL2_VS_8BIT;
write_sysreg(val, vtcr_el2); write_sysreg(val, vtcr_el2);
return parange;
} }
...@@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, ...@@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
do_cpuid_1_ent(&entry[i], function, idx); do_cpuid_1_ent(&entry[i], function, idx);
if (idx == 1) { if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
entry[i].ebx = 0; entry[i].ebx = 0;
if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
entry[i].ebx = entry[i].ebx =
......
...@@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) + int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
bool fault = (mmu->permissions[index] >> pte_access) & 1; bool fault = (mmu->permissions[index] >> pte_access) & 1;
u32 errcode = PFERR_PRESENT_MASK;
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
pfec |= PFERR_PRESENT_MASK;
if (unlikely(mmu->pkru_mask)) { if (unlikely(mmu->pkru_mask)) {
u32 pkru_bits, offset; u32 pkru_bits, offset;
...@@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = pfec - 1 + offset = (pfec & ~1) +
((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
pkru_bits &= mmu->pkru_mask >> offset; pkru_bits &= mmu->pkru_mask >> offset;
pfec |= -pkru_bits & PFERR_PK_MASK; errcode |= -pkru_bits & PFERR_PK_MASK;
fault |= (pkru_bits != 0); fault |= (pkru_bits != 0);
} }
return -(uint32_t)fault & pfec; return -(u32)fault & errcode;
} }
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
......
...@@ -360,7 +360,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -360,7 +360,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error; goto error;
if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
goto error; goto error;
} }
......
...@@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) ...@@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
return 1; return 1;
} }
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0; vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
...@@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->prepare_guest_switch(vcpu); kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active) if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu); kvm_load_guest_fpu(vcpu);
kvm_load_guest_xcr0(vcpu);
vcpu->mode = IN_GUEST_MODE; vcpu->mode = IN_GUEST_MODE;
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
...@@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection; goto cancel_injection;
} }
kvm_load_guest_xcr0(vcpu);
if (req_immediate_exit) if (req_immediate_exit)
smp_send_reschedule(vcpu->cpu); smp_send_reschedule(vcpu->cpu);
...@@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb(); smp_wmb();
kvm_put_guest_xcr0(vcpu);
/* Interrupt is enabled by handle_external_intr() */ /* Interrupt is enabled by handle_external_intr() */
kvm_x86_ops->handle_external_intr(vcpu); kvm_x86_ops->handle_external_intr(vcpu);
...@@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
* and assume host would use all available bits. * and assume host would use all available bits.
* Guest xcr0 would be loaded later. * Guest xcr0 would be loaded later.
*/ */
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1; vcpu->guest_fpu_loaded = 1;
__kernel_fpu_begin(); __kernel_fpu_begin();
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
...@@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{ {
kvm_put_guest_xcr0(vcpu);
if (!vcpu->guest_fpu_loaded) { if (!vcpu->guest_fpu_loaded) {
vcpu->fpu_counter = 0; vcpu->fpu_counter = 0;
return; return;
......
...@@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) ...@@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
vcpu->arch.timer_cpu.armed = false; vcpu->arch.timer_cpu.armed = false;
WARN_ON(!kvm_timer_should_fire(vcpu));
/* /*
* If the vcpu is blocked we want to wake it up so that it will see * If the vcpu is blocked we want to wake it up so that it will see
* the timer has expired when entering the guest. * the timer has expired when entering the guest.
...@@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) ...@@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }
static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
{
cycle_t cval, now;
cval = vcpu->arch.timer_cpu.cntv_cval;
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
if (now < cval) {
u64 ns;
ns = cyclecounter_cyc2ns(timecounter->cc,
cval - now,
timecounter->mask,
&timecounter->frac);
return ns;
}
return 0;
}
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
{ {
struct arch_timer_cpu *timer; struct arch_timer_cpu *timer;
struct kvm_vcpu *vcpu;
u64 ns;
timer = container_of(hrt, struct arch_timer_cpu, timer); timer = container_of(hrt, struct arch_timer_cpu, timer);
vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
/*
* Check that the timer has really expired from the guest's
* PoV (NTP on the host may have forced it to expire
* early). If we should have slept longer, restart it.
*/
ns = kvm_timer_compute_delta(vcpu);
if (unlikely(ns)) {
hrtimer_forward_now(hrt, ns_to_ktime(ns));
return HRTIMER_RESTART;
}
queue_work(wqueue, &timer->expired); queue_work(wqueue, &timer->expired);
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
...@@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) ...@@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
void kvm_timer_schedule(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu)
{ {
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 ns;
cycle_t cval, now;
BUG_ON(timer_is_armed(timer)); BUG_ON(timer_is_armed(timer));
...@@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) ...@@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
return; return;
/* The timer has not yet expired, schedule a background timer */ /* The timer has not yet expired, schedule a background timer */
cval = timer->cntv_cval; timer_arm(timer, kvm_timer_compute_delta(vcpu));
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
ns = cyclecounter_cyc2ns(timecounter->cc,
cval - now,
timecounter->mask,
&timecounter->frac);
timer_arm(timer, ns);
} }
void kvm_timer_unschedule(struct kvm_vcpu *vcpu) void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
......
...@@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) ...@@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
{ {
u64 reg = 0; u64 reg = 0;
if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
reg &= kvm_pmu_valid_counter_mask(vcpu); reg &= kvm_pmu_valid_counter_mask(vcpu);
}
return reg; return reg;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment