Commit 255006ad authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-vmx-6.5' of https://github.com/kvm-x86/linux into HEAD

KVM VMX changes for 6.5:

 - Fix missing/incorrect #GP checks on ENCLS

 - Use standard mmu_notifier hooks for handling APIC access page

 - Misc cleanups
parents 24975ce8 0a3869e1
...@@ -1603,6 +1603,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) ...@@ -1603,6 +1603,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled) if (tdp_mmu_enabled)
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
if (kvm_x86_ops.set_apic_access_page_addr &&
range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
return flush; return flush;
} }
......
...@@ -152,8 +152,8 @@ static inline bool cpu_has_vmx_ept(void) ...@@ -152,8 +152,8 @@ static inline bool cpu_has_vmx_ept(void)
static inline bool vmx_umip_emulated(void) static inline bool vmx_umip_emulated(void)
{ {
return vmcs_config.cpu_based_2nd_exec_ctrl & return !boot_cpu_has(X86_FEATURE_UMIP) &&
SECONDARY_EXEC_DESC; (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_DESC);
} }
static inline bool cpu_has_vmx_rdtscp(void) static inline bool cpu_has_vmx_rdtscp(void)
......
...@@ -2328,8 +2328,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 ...@@ -2328,8 +2328,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
* Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
* will not have to rewrite the controls just for this bit. * will not have to rewrite the controls just for this bit.
*/ */
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() && if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP))
(vmcs12->guest_cr4 & X86_CR4_UMIP))
exec_control |= SECONDARY_EXEC_DESC; exec_control |= SECONDARY_EXEC_DESC;
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
......
...@@ -385,8 +385,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -385,8 +385,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
} }
break; break;
case MSR_IA32_DS_AREA: case MSR_IA32_DS_AREA:
if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS))
return 1;
if (is_noncanonical_address(data, vcpu)) if (is_noncanonical_address(data, vcpu))
return 1; return 1;
......
...@@ -357,11 +357,12 @@ static int handle_encls_einit(struct kvm_vcpu *vcpu) ...@@ -357,11 +357,12 @@ static int handle_encls_einit(struct kvm_vcpu *vcpu)
static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
{ {
if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) /*
return false; * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
* be reached if and only if the SGX1 leafs are enabled.
*/
if (leaf >= ECREATE && leaf <= ETRACK) if (leaf >= ECREATE && leaf <= ETRACK)
return guest_cpuid_has(vcpu, X86_FEATURE_SGX1); return true;
if (leaf >= EAUG && leaf <= EMODT) if (leaf >= EAUG && leaf <= EMODT)
return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
...@@ -380,9 +381,11 @@ int handle_encls(struct kvm_vcpu *vcpu) ...@@ -380,9 +381,11 @@ int handle_encls(struct kvm_vcpu *vcpu)
{ {
u32 leaf = (u32)kvm_rax_read(vcpu); u32 leaf = (u32)kvm_rax_read(vcpu);
if (!encls_leaf_enabled_in_guest(vcpu, leaf)) { if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
!guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
kvm_queue_exception(vcpu, UD_VECTOR); kvm_queue_exception(vcpu, UD_VECTOR);
} else if (!sgx_enabled_in_guest_bios(vcpu)) { } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
!sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
kvm_inject_gp(vcpu, 0); kvm_inject_gp(vcpu, 0);
} else { } else {
if (leaf == ECREATE) if (leaf == ECREATE)
......
...@@ -187,7 +187,7 @@ SYM_FUNC_START(__vmx_vcpu_run) ...@@ -187,7 +187,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
_ASM_EXTABLE(.Lvmresume, .Lfixup) _ASM_EXTABLE(.Lvmresume, .Lfixup)
_ASM_EXTABLE(.Lvmlaunch, .Lfixup) _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
/* Restore unwind state from before the VMRESUME/VMLAUNCH. */ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
UNWIND_HINT_RESTORE UNWIND_HINT_RESTORE
......
...@@ -3384,15 +3384,15 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ...@@ -3384,15 +3384,15 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ {
unsigned long old_cr4 = vcpu->arch.cr4; unsigned long old_cr4 = kvm_read_cr4(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long hw_cr4;
/* /*
* Pass through host's Machine Check Enable value to hw_cr4, which * Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control * is in force while we are in guest mode. Do not let guests control
* this bit, even if host CR4.MCE == 0. * this bit, even if host CR4.MCE == 0.
*/ */
unsigned long hw_cr4;
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
if (is_unrestricted_guest(vcpu)) if (is_unrestricted_guest(vcpu))
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
...@@ -3401,7 +3401,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ...@@ -3401,7 +3401,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
else else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { if (vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) { if (cr4 & X86_CR4_UMIP) {
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP; hw_cr4 &= ~X86_CR4_UMIP;
...@@ -5399,7 +5399,13 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) ...@@ -5399,7 +5399,13 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_desc(struct kvm_vcpu *vcpu) static int handle_desc(struct kvm_vcpu *vcpu)
{ {
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); /*
* UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this
* and other code needs to be updated if UMIP can be guest owned.
*/
BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP);
WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP));
return kvm_emulate_instruction(vcpu, 0); return kvm_emulate_instruction(vcpu, 0);
} }
...@@ -6705,7 +6711,12 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) ...@@ -6705,7 +6711,12 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
{ {
struct page *page; const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT;
struct kvm *kvm = vcpu->kvm;
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *slot;
unsigned long mmu_seq;
kvm_pfn_t pfn;
/* Defer reload until vmcs01 is the current VMCS. */ /* Defer reload until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) { if (is_guest_mode(vcpu)) {
...@@ -6717,18 +6728,53 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) ...@@ -6717,18 +6728,53 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
return; return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); /*
if (is_error_page(page)) * Grab the memslot so that the hva lookup for the mmu_notifier retry
* is guaranteed to use the same memslot as the pfn lookup, i.e. rely
* on the pfn lookup's validation of the memslot to ensure a valid hva
* is used for the retry check.
*/
slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return; return;
vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); /*
* Ensure that the mmu_notifier sequence count is read before KVM
* retrieves the pfn from the primary MMU. Note, the memslot is
* protected by SRCU, not the mmu_notifier. Pairs with the smp_wmb()
* in kvm_mmu_invalidate_end().
*/
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/*
* No need to retry if the memslot does not exist or is invalid. KVM
* controls the APIC-access page memslot, and only deletes the memslot
* if APICv is permanently inhibited, i.e. the memslot won't reappear.
*/
pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_noslot_pfn(pfn))
return;
read_lock(&vcpu->kvm->mmu_lock);
if (mmu_invalidate_retry_hva(kvm, mmu_seq,
gfn_to_hva_memslot(slot, gfn))) {
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
read_unlock(&vcpu->kvm->mmu_lock);
goto out;
}
vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
read_unlock(&vcpu->kvm->mmu_lock);
vmx_flush_tlb_current(vcpu); vmx_flush_tlb_current(vcpu);
out:
/* /*
* Do not pin apic access page in memory, the MMU notifier * Do not pin apic access page in memory, the MMU notifier
* will call us again if it is migrated or swapped out. * will call us again if it is migrated or swapped out.
*/ */
put_page(page); kvm_release_pfn_clean(pfn);
} }
static void vmx_hwapic_isr_update(int max_isr) static void vmx_hwapic_isr_update(int max_isr)
......
...@@ -10449,20 +10449,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) ...@@ -10449,20 +10449,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
} }
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
unsigned long apic_address;
/*
* The physical address of apic access page is stored in the VMCS.
* Update it when it becomes invalid.
*/
apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (start <= apic_address && apic_address < end)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{ {
static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm); static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
......
...@@ -2239,9 +2239,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, ...@@ -2239,9 +2239,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
} }
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
......
...@@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms; ...@@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms;
static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
}
__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{ {
} }
...@@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) ...@@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
return container_of(mn, struct kvm, mmu_notifier); return container_of(mn, struct kvm, mmu_notifier);
} }
static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
idx = srcu_read_lock(&kvm->srcu);
kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
srcu_read_unlock(&kvm->srcu, idx);
}
typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start, typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
...@@ -910,7 +893,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, ...@@ -910,7 +893,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
} }
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_range = kvm_mmu_notifier_invalidate_range,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young, .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment