Commit b5de3c50 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "5.1 keeps its reputation as a big bugfix release for KVM x86.

   - Fix for a memory leak introduced during the merge window

   - Fixes for nested VMX with ept=0

   - Fixes for AMD (APIC virtualization, NMI injection)

   - Fixes for Hyper-V under KVM and KVM under Hyper-V

   - Fixes for 32-bit SMM and tests for SMM virtualization

   - More array_index_nospec peppering"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
  KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing
  KVM: fix spectrev1 gadgets
  KVM: x86: fix warning Using plain integer as NULL pointer
  selftests: kvm: add a selftest for SMM
  selftests: kvm: fix for compilers that do not support -no-pie
  selftests: kvm/evmcs_test: complete I/O before migrating guest state
  KVM: x86: Always use 32-bit SMRAM save state for 32-bit kernels
  KVM: x86: Don't clear EFER during SMM transitions for 32-bit vCPU
  KVM: x86: clear SMM flags before loading state while leaving SMM
  KVM: x86: Open code kvm_set_hflags
  KVM: x86: Load SMRAM in a single shot when leaving SMM
  KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU
  KVM: x86: Raise #GP when guest vCPU do not support PMU
  x86/kvm: move kvm_load/put_guest_xcr0 into atomic context
  KVM: x86: svm: make sure NMI is injected after nmi_singlestep
  svm/avic: Fix invalidate logical APIC id entry
  Revert "svm: Fix AVIC incomplete IPI emulation"
  kvm: mmu: Fix overflow on kvm mmu page limit calculation
  KVM: nVMX: always use early vmcs check when EPT is disabled
  KVM: nVMX: allow tests to use bad virtual-APIC page address
  ...
parents 618d919c 7a223e06
......@@ -226,7 +226,9 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
const char *smstate);
void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
};
......
......@@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
}
#define KVM_PERMILLE_MMU_PAGES 20
#define KVM_MIN_ALLOC_MMU_PAGES 64
#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
......@@ -844,9 +844,9 @@ enum kvm_irqchip_mode {
};
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
unsigned int n_max_mmu_pages;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
......@@ -1182,7 +1182,7 @@ struct kvm_x86_ops {
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
......@@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
bool pdptrs_changed(struct kvm_vcpu *vcpu);
......@@ -1592,4 +1592,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
#define GET_SMSTATE(type, buf, offset) \
(*(type *)((buf) + (offset) - 0x7e00))
#endif /* _ASM_X86_KVM_HOST_H */
......@@ -146,6 +146,7 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
This diff is collapsed.
......@@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
if (offset <= max_apic_id) {
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
offset = array_index_nospec(offset, map->max_apic_id + 1);
*cluster = &map->phys_map[offset];
*mask = dest_id & (0xffff >> (16 - cluster_size));
} else {
......@@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (irq->dest_id > map->max_apic_id) {
*bitmap = 0;
} else {
*dst = &map->phys_map[irq->dest_id];
u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
*dst = &map->phys_map[dest_id];
*bitmap = 1;
}
return true;
......
......@@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
......@@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
struct list_head *invalid_list,
bool remote_flush)
{
if (!remote_flush && !list_empty(invalid_list))
if (!remote_flush && list_empty(invalid_list))
return false;
if (!list_empty(invalid_list))
......@@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
......@@ -6031,10 +6031,10 @@ int kvm_mmu_module_init(void)
/*
* Calculate mmu pages needed for kvm.
*/
unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
{
unsigned int nr_mmu_pages;
unsigned int nr_pages = 0;
unsigned long nr_mmu_pages;
unsigned long nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
......@@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
}
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
nr_mmu_pages = max(nr_mmu_pages,
(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
......
......@@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
......
......@@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
u64 ctr_val;
if (!pmu->version)
return 1;
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
......
......@@ -262,6 +262,7 @@ struct amd_svm_iommu_ir {
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
......@@ -2692,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm)
static int db_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
struct kvm_vcpu *vcpu = &svm->vcpu;
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
......@@ -2702,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->nmi_singlestep) {
disable_nmi_singlestep(svm);
/* Make sure we check for pending NMIs upon entry */
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (svm->vcpu.guest_debug &
......@@ -4517,14 +4521,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
int i;
struct kvm_vcpu *vcpu;
struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
* Update ICR high and low, then emulate sending IPI,
* which is handled when writing APIC_ICR.
* At this point, we expect that the AVIC HW has already
* set the appropriate IRR bits on the valid target
* vcpus. So, we just need to kick the appropriate vcpu.
*/
kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
kvm_for_each_vcpu(i, vcpu, kvm) {
bool m = kvm_apic_match_dest(vcpu, apic,
icrl & KVM_APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
icrl & KVM_APIC_DEST_MASK);
if (m && !avic_vcpu_is_running(vcpu))
kvm_vcpu_wake_up(vcpu);
}
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
......@@ -4596,7 +4611,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
if (entry)
WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
......@@ -5621,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
kvm_load_guest_xcr0(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
......@@ -5766,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
kvm_put_guest_xcr0(vcpu);
stgi();
/* Any pending NMI will happen here */
......@@ -6215,32 +6232,24 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
struct page *page;
struct {
u64 guest;
u64 vmcb;
} svm_state_save;
int ret;
u64 guest;
u64 vmcb;
ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
sizeof(svm_state_save));
if (ret)
return ret;
guest = GET_SMSTATE(u64, smstate, 0x7ed8);
vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
if (svm_state_save.guest) {
vcpu->arch.hflags &= ~HF_SMM_MASK;
nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
if (nested_vmcb)
enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
else
ret = 1;
vcpu->arch.hflags |= HF_SMM_MASK;
if (guest) {
nested_vmcb = nested_svm_map(svm, vmcb, &page);
if (!nested_vmcb)
return 1;
enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
}
return ret;
return 0;
}
static int enable_smi_window(struct kvm_vcpu *vcpu)
......
......@@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u16, tm )
__field( __u8, vec )
),
......
......@@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
* Failing the vm entry is _not_ what the processor
* does but it's basically the only possibility we
* have. We could still enter the guest if CR8 load
* exits are enabled, CR8 store exits are enabled, and
* virtualize APIC access is disabled; in this case
* the processor would never use the TPR shadow and we
* could simply clear the bit from the execution
* control. But such a configuration is useless, so
* let's keep the code simple.
*/
if (!is_error_page(page)) {
vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* The processor will never use the TPR shadow, simply
* clear the bit from the execution control. Such a
* configuration is useless, but it happens in tests.
* For any other configuration, failing the vm entry is
* _not_ what the processor does but it's basically the
* only possibility we have.
*/
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_TPR_SHADOW);
} else {
printk("bad virtual-APIC page address\n");
dump_vmcs();
}
}
......@@ -3789,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
/*
* This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
* points to shadow pages! Fortunately we only get here after a WARN_ON
* if EPT is disabled, so a VMabort is perfectly fine.
*/
if (enable_ept) {
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
} else {
nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
}
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
......@@ -5738,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
/*
* Without EPT it is not possible to restore L1's CR3 and PDPTR on
* VMfail, because they are not available in vmcs01. Just always
* use hardware checks.
*/
if (!enable_ept)
nested_early_check = 1;
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
......
......@@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
static void dump_vmcs(void)
void dump_vmcs(void)
{
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
......@@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
kvm_load_guest_xcr0(vcpu);
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
......@@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
kvm_put_guest_xcr0(vcpu);
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
......@@ -6852,6 +6856,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
}
}
static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
union cpuid10_eax eax;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
return false;
eax.full = entry->eax;
return (eax.split.version_id > 0);
}
static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
if (pmu_enabled)
vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
else
vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
}
static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
......@@ -6940,6 +6968,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
nested_vmx_procbased_ctls_update(vcpu);
}
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
......@@ -7369,7 +7398,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
......@@ -7380,9 +7409,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
}
if (vmx->nested.smm.guest_mode) {
vcpu->arch.hflags &= ~HF_SMM_MASK;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
......
......@@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
void dump_vmcs(void);
#endif /* __KVM_X86_VMX_H */
......@@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
......@@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 1;
}
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
......@@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 0;
}
}
EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
......@@ -3093,7 +3095,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
break;
default:
break;
......@@ -3528,7 +3530,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}
static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
static void kvm_smm_changed(struct kvm_vcpu *vcpu);
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
......@@ -3588,12 +3590,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
u32 hflags = vcpu->arch.hflags;
if (events->smi.smm)
hflags |= HF_SMM_MASK;
else
hflags &= ~HF_SMM_MASK;
kvm_set_hflags(vcpu, hflags);
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
if (events->smi.smm)
vcpu->arch.hflags |= HF_SMM_MASK;
else
vcpu->arch.hflags &= ~HF_SMM_MASK;
kvm_smm_changed(vcpu);
}
vcpu->arch.smi_pending = events->smi.pending;
......@@ -4270,7 +4273,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
u32 kvm_nr_mmu_pages)
unsigned long kvm_nr_mmu_pages)
{
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
......@@ -4284,7 +4287,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return 0;
}
static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
return kvm->arch.n_max_mmu_pages;
}
......@@ -5958,12 +5961,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
{
return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
kvm_smm_changed(emul_to_vcpu(ctxt));
}
static const struct x86_emulate_ops emulate_ops = {
......@@ -6006,6 +6015,7 @@ static const struct x86_emulate_ops emulate_ops = {
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
.pre_leave_smm = emulator_pre_leave_smm,
.post_leave_smm = emulator_post_leave_smm,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
......@@ -6247,16 +6257,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
{
unsigned changed = vcpu->arch.hflags ^ emul_flags;
vcpu->arch.hflags = emul_flags;
if (changed & HF_SMM_MASK)
kvm_smm_changed(vcpu);
}
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
......@@ -7441,9 +7441,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
unsigned long val;
......@@ -7493,10 +7493,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
#else
WARN_ON_ONCE(1);
#endif
}
#endif
static void enter_smm(struct kvm_vcpu *vcpu)
{
......@@ -7507,9 +7505,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
memset(buf, 0, 512);
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
#endif
enter_smm_save_state_32(vcpu, buf);
/*
......@@ -7567,8 +7567,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
#endif
kvm_update_cpuid(vcpu);
kvm_mmu_reset_context(vcpu);
......@@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
kvm_load_guest_xcr0(vcpu);
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
......@@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
kvm_put_guest_xcr0(vcpu);
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
......
......@@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}
void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
#endif
......@@ -28,6 +28,7 @@
#include <linux/irqbypass.h>
#include <linux/swait.h>
#include <linux/refcount.h>
#include <linux/nospec.h>
#include <asm/signal.h>
#include <linux/kvm.h>
......@@ -513,10 +514,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
* the caller has read kvm->online_vcpus before (as is the case
* for kvm_for_each_vcpu, for example).
*/
int num_vcpus = atomic_read(&kvm->online_vcpus);
i = array_index_nospec(i, num_vcpus);
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
return kvm->vcpus[i];
}
......@@ -600,6 +601,7 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
......
include ../../../../scripts/Kbuild.include
all:
top_srcdir = ../../../..
......@@ -17,6 +19,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
......@@ -30,7 +33,11 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
LDFLAGS += -pthread -no-pie
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
LDFLAGS += -pthread $(no-pie-option)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
......
......@@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define APIC_BASE_MSR 0x800
#define X2APIC_ENABLE (1UL << 10)
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
#define APIC_DEST_ALLBUT 0xC0000
#define APIC_ICR_RR_MASK 0x30000
#define APIC_ICR_RR_INVALID 0x00000
#define APIC_ICR_RR_INPROG 0x10000
#define APIC_ICR_RR_VALID 0x20000
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
#define APIC_ICR_BUSY 0x01000
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DM_FIXED 0x00000
#define APIC_DM_FIXED_MASK 0x00700
#define APIC_DM_LOWEST 0x00100
#define APIC_DM_SMI 0x00200
#define APIC_DM_REMRD 0x00300
#define APIC_DM_NMI 0x00400
#define APIC_DM_INIT 0x00500
#define APIC_DM_STARTUP 0x00600
#define APIC_DM_EXTINT 0x00700
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
......
......@@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
fprintf(stderr, "immediate_exit not available, skipping test\n");
exit(KSFT_SKIP);
}
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
......
......@@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
nested_size, sizeof(state->nested_));
}
/*
* When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
* guest state is consistent only after userspace re-enters the
* kernel with KVM_RUN. Complete IO prior to migrating state
* to a new VM.
*/
vcpu_run_complete_io(vm, vcpuid);
nmsrs = kvm_get_num_msrs(vm);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
......@@ -1093,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
if (state->nested.size) {
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
r);
}
r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
......@@ -1130,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
r);
if (state->nested.size) {
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
r);
}
}
......@@ -123,8 +123,6 @@ int main(int argc, char *argv[])
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
memset(&regs1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, &regs1);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
......@@ -144,6 +142,9 @@ int main(int argc, char *argv[])
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
memset(&regs1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018, Red Hat, Inc.
*
* Tests for SMM.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "vmx.h"
#define VCPU_ID 1
#define PAGE_SIZE 4096
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
#define SMRAM_GPA 0x1000000
#define SMRAM_STAGE 0xfe
#define STR(x) #x
#define XSTR(s) STR(s)
#define SYNC_PORT 0xe
#define DONE 0xff
/*
* This is compiled as normal 64-bit code, however, SMI handler is executed
* in real-address mode. To stay simple we're limiting ourselves to a mode
* independent subset of asm here.
* SMI handler always report back fixed stage SMRAM_STAGE.
*/
uint8_t smi_handler[] = {
0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
0x0f, 0xaa, /* rsm */
};
void sync_with_host(uint64_t phase)
{
asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
: : "a" (phase));
}
void self_smi(void)
{
wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
void guest_code(struct vmx_pages *vmx_pages)
{
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
sync_with_host(1);
wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
sync_with_host(2);
self_smi();
sync_with_host(4);
if (vmx_pages) {
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
sync_with_host(5);
self_smi();
sync_with_host(7);
}
sync_with_host(DONE);
}
int main(int argc, char *argv[])
{
struct vmx_pages *vmx_pages = NULL;
vm_vaddr_t vmx_pages_gva = 0;
struct kvm_regs regs;
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_x86_state *state;
int stage, stage_reported;
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
run = vcpu_state(vm, VCPU_ID);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
SMRAM_MEMSLOT, SMRAM_PAGES, 0);
TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
== SMRAM_GPA, "could not allocate guest physical addresses?");
memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
sizeof(smi_handler));
vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
} else {
printf("will skip SMM test with VMX enabled\n");
vcpu_args_set(vm, VCPU_ID, 1, 0);
}
for (stage = 1;; stage++) {
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
memset(&regs, 0, sizeof(regs));
vcpu_regs_get(vm, VCPU_ID, &regs);
stage_reported = regs.rax & 0xff;
if (stage_reported == DONE)
goto done;
TEST_ASSERT(stage_reported == stage ||
stage_reported == SMRAM_STAGE,
"Unexpected stage: #%x, got %x",
stage, stage_reported);
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
free(state);
}
done:
kvm_vm_free(vm);
}
......@@ -134,11 +134,6 @@ int main(int argc, char *argv[])
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
fprintf(stderr, "immediate_exit not available, skipping test\n");
exit(KSFT_SKIP);
}
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
......@@ -179,18 +174,10 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
stage, (ulong)uc.args[1]);
/*
* When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
* guest state is consistent only after userspace re-enters the
* kernel with KVM_RUN. Complete IO prior to migrating state
* to a new VM.
*/
vcpu_run_complete_io(vm, VCPU_ID);
state = vcpu_save_state(vm, VCPU_ID);
memset(&regs1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, &regs1);
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
/* Restore state in a new VM. */
......
......@@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm,
{
struct kvm_kernel_irq_routing_entry *ei;
int r;
u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and non-irqchip routing.
*/
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
hlist_for_each_entry(ei, &rt->map[gsi], link)
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return -EINVAL;
e->gsi = ue->gsi;
e->gsi = gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
......
......@@ -2977,12 +2977,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;
int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENODEV;
ops = kvm_device_ops_table[cd->type];
type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
ops = kvm_device_ops_table[type];
if (ops == NULL)
return -ENODEV;
......@@ -2997,7 +2999,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
dev->kvm = kvm;
mutex_lock(&kvm->lock);
ret = ops->create(dev, cd->type);
ret = ops->create(dev, type);
if (ret < 0) {
mutex_unlock(&kvm->lock);
kfree(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment