Commit 45d36906 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "Early fixes for x86.

  Instead of the (botched) revert, the lockdep/might_sleep splat has a
  real fix provided by Andrea"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF)
  kvm: take srcu lock around kvm_steal_time_set_preempted()
  kvm: fix schedule in atomic in kvm_steal_time_set_preempted()
  KVM: hyperv: fix locking of struct kvm_hv fields
  KVM: x86: Expose Intel AVX512IFMA/AVX512VBMI/SHA features to guest.
  kvm: nVMX: Correct a VMX instruction error code for VMPTRLD
parents f1e91324 ef85b673
......@@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows:
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything
else is a leaf: no other lock is taken inside the critical sections.
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
2: Exception
------------
......
......@@ -704,6 +704,7 @@ struct kvm_apic_map {
/* Hyper-V emulation context */
struct kvm_hv {
struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
......
......@@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(AVX512BW) | F(AVX512VL);
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
......
......@@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
return;
mutex_lock(&kvm->arch.hyperv.hv_lock);
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
goto out_unlock;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
/*
* Because the TSC parameters only vary when there is a
......@@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
*/
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq))))
return;
goto out_unlock;
/*
* While we're computing and writing the parameters, force the
......@@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
return;
goto out_unlock;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
return;
goto out_unlock;
/* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb();
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
return;
goto out_unlock;
/*
* Now switch to the TSC page mechanism by writing the sequence.
......@@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = tsc_seq;
kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
out_unlock:
mutex_unlock(&kvm->arch.hyperv.hv_lock);
}
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
......@@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
mutex_unlock(&vcpu->kvm->lock);
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_set_msr(vcpu, msr, data, host);
......@@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
mutex_unlock(&vcpu->kvm->lock);
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_get_msr(vcpu, msr, pdata);
......@@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
}
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
......
......@@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}
static inline bool is_exception(u32 intr_info)
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
== (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
......@@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
......@@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
VMXERR_VMPTRLD_VMXON_POINTER);
return kvm_skip_emulated_instruction(vcpu);
}
break;
......@@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
switch (exit_reason) {
case EXIT_REASON_EXCEPTION_NMI:
if (!is_exception(intr_info))
if (is_nmi(intr_info))
return false;
else if (is_page_fault(intr_info))
return enable_ept;
......@@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
(exit_intr_info & INTR_INFO_VALID_MASK)) {
if (is_nmi(exit_intr_info)) {
kvm_before_handle_nmi(&vmx->vcpu);
asm("int $2");
kvm_after_handle_nmi(&vmx->vcpu);
......
......@@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
int idx;
/*
* Disable page faults because we're in atomic context here.
* kvm_write_guest_offset_cached() would call might_fault()
* that relies on pagefault_disable() to tell if there's a
* bug. NOTE: the write to guest memory may not go through if
* during postcopy live migration or if there's heavy guest
* paging.
*/
pagefault_disable();
/*
* kvm_memslots() will be called by
* kvm_write_guest_offset_cached() so take the srcu lock.
*/
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_steal_time_set_preempted(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
......@@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment