Commit 45d36906 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "Early fixes for x86.

  Instead of the (botched) revert, the lockdep/might_sleep splat has a
  real fix provided by Andrea"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF)
  kvm: take srcu lock around kvm_steal_time_set_preempted()
  kvm: fix schedule in atomic in kvm_steal_time_set_preempted()
  KVM: hyperv: fix locking of struct kvm_hv fields
  KVM: x86: Expose Intel AVX512IFMA/AVX512VBMI/SHA features to guest.
  kvm: nVMX: Correct a VMX instruction error code for VMPTRLD
parents f1e91324 ef85b673
...@@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows: ...@@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows:
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare. them together is quite rare.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
else is a leaf: no other lock is taken inside the critical sections.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
2: Exception 2: Exception
------------ ------------
......
...@@ -704,6 +704,7 @@ struct kvm_apic_map { ...@@ -704,6 +704,7 @@ struct kvm_apic_map {
/* Hyper-V emulation context */ /* Hyper-V emulation context */
struct kvm_hv { struct kvm_hv {
struct mutex hv_lock;
u64 hv_guest_os_id; u64 hv_guest_os_id;
u64 hv_hypercall; u64 hv_hypercall;
u64 hv_tsc_page; u64 hv_tsc_page;
......
...@@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, ...@@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_cpuid_7_0_ebx_x86_features = const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(AVX512BW) | F(AVX512VL); F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
/* cpuid 0xD.1.eax */ /* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features = const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
/* cpuid 7.0.ecx*/ /* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
/* cpuid 7.0.edx*/ /* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features = const u32 kvm_cpuid_7_0_edx_x86_features =
......
...@@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
return; return;
mutex_lock(&kvm->arch.hyperv.hv_lock);
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
goto out_unlock;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
/* /*
* Because the TSC parameters only vary when there is a * Because the TSC parameters only vary when there is a
...@@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
*/ */
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq)))) &tsc_seq, sizeof(tsc_seq))))
return; goto out_unlock;
/* /*
* While we're computing and writing the parameters, force the * While we're computing and writing the parameters, force the
...@@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = 0; hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
return; goto out_unlock;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
return; goto out_unlock;
/* Ensure sequence is zero before writing the rest of the struct. */ /* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb(); smp_wmb();
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
return; goto out_unlock;
/* /*
* Now switch to the TSC page mechanism by writing the sequence. * Now switch to the TSC page mechanism by writing the sequence.
...@@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = tsc_seq; hv->tsc_ref.tsc_sequence = tsc_seq;
kvm_write_guest(kvm, gfn_to_gpa(gfn), kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
out_unlock:
mutex_unlock(&kvm->arch.hyperv.hv_lock);
} }
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
...@@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) ...@@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (kvm_hv_msr_partition_wide(msr)) { if (kvm_hv_msr_partition_wide(msr)) {
int r; int r;
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_set_msr_pw(vcpu, msr, data, host); r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r; return r;
} else } else
return kvm_hv_set_msr(vcpu, msr, data, host); return kvm_hv_set_msr(vcpu, msr, data, host);
...@@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
if (kvm_hv_msr_partition_wide(msr)) { if (kvm_hv_msr_partition_wide(msr)) {
int r; int r;
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_get_msr_pw(vcpu, msr, pdata); r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r; return r;
} else } else
return kvm_hv_get_msr(vcpu, msr, pdata); return kvm_hv_get_msr(vcpu, msr, pdata);
...@@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
bool kvm_hv_hypercall_enabled(struct kvm *kvm) bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{ {
return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
} }
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
......
...@@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) ...@@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
} }
static inline bool is_exception(u32 intr_info) static inline bool is_nmi(u32 intr_info)
{ {
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
== (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
} }
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
...@@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) ...@@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info)) if (is_machine_check(intr_info))
return handle_machine_check(vcpu); return handle_machine_check(vcpu);
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */ return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) { if (is_no_device(intr_info)) {
...@@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, ...@@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
if (vmptr == vmx->nested.vmxon_ptr) { if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu, nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER); VMXERR_VMPTRLD_VMXON_POINTER);
return kvm_skip_emulated_instruction(vcpu); return kvm_skip_emulated_instruction(vcpu);
} }
break; break;
...@@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) ...@@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
switch (exit_reason) { switch (exit_reason) {
case EXIT_REASON_EXCEPTION_NMI: case EXIT_REASON_EXCEPTION_NMI:
if (!is_exception(intr_info)) if (is_nmi(intr_info))
return false; return false;
else if (is_page_fault(intr_info)) else if (is_page_fault(intr_info))
return enable_ept; return enable_ept;
...@@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) ...@@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
kvm_machine_check(); kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */ /* We need to handle NMIs before interrupts are enabled */
if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && if (is_nmi(exit_intr_info)) {
(exit_intr_info & INTR_INFO_VALID_MASK)) {
kvm_before_handle_nmi(&vmx->vcpu); kvm_before_handle_nmi(&vmx->vcpu);
asm("int $2"); asm("int $2");
kvm_after_handle_nmi(&vmx->vcpu); kvm_after_handle_nmi(&vmx->vcpu);
......
...@@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) ...@@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{ {
int idx;
/*
* Disable page faults because we're in atomic context here.
* kvm_write_guest_offset_cached() would call might_fault()
* that relies on pagefault_disable() to tell if there's a
* bug. NOTE: the write to guest memory may not go through if
* during postcopy live migration or if there's heavy guest
* paging.
*/
pagefault_disable();
/*
* kvm_memslots() will be called by
* kvm_write_guest_offset_cached() so take the srcu lock.
*/
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_steal_time_set_preempted(vcpu); kvm_steal_time_set_preempted(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu); kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu); kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc(); vcpu->arch.last_host_tsc = rdtsc();
...@@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock); raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock); mutex_init(&kvm->arch.apic_map_lock);
mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment