Commit ca0ea8a6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:

 - Fix for compilation of selftests on non-x86 architectures

 - Fix for kvm_run->if_flag on SEV-ES

 - Fix for page table use-after-free if yielding during exit_mm()

 - Improve behavior when userspace starts a nested guest with invalid
   state

 - Fix missed wakeup with assigned devices but no VT-d posted interrupts

 - Do not tell userspace to save/restore an unsupported PMU MSR

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: VMX: Wake vCPU when delivering posted IRQ even if vCPU == this vCPU
  KVM: selftests: Add test to verify TRIPLE_FAULT on invalid L2 guest state
  KVM: VMX: Fix stale docs for kvm-intel.emulate_invalid_guest_state
  KVM: nVMX: Synthesize TRIPLE_FAULT for L2 if emulation is required
  KVM: VMX: Always clear vmx->fail on emulation_required
  selftests: KVM: Fix non-x86 compiling
  KVM: x86: Always set kvm_run->if_flag
  KVM: x86/mmu: Don't advance iterator after restart due to yielding
  KVM: x86: remove PMU FIXED_CTR3 from msrs_to_save_all
parents 5dbdc4c5 fdba608f
......@@ -2413,8 +2413,12 @@
Default is 1 (enabled)
kvm-intel.emulate_invalid_guest_state=
[KVM,Intel] Enable emulation of invalid guest states
Default is 0 (disabled)
[KVM,Intel] Disable emulation of invalid guest state.
Ignored if kvm-intel.enable_unrestricted_guest=1, as
guest state is never invalid for unrestricted guests.
This param doesn't apply to nested guests (L2), as KVM
never emulates invalid L2 guest state.
Default is 1 (enabled)
kvm-intel.flexpriority=
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
......
......@@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
KVM_X86_OP(set_rflags)
KVM_X86_OP(get_if_flag)
KVM_X86_OP(tlb_flush_all)
KVM_X86_OP(tlb_flush_current)
KVM_X86_OP_NULL(tlb_remote_flush)
......
......@@ -1349,6 +1349,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
bool (*get_if_flag)(struct kvm_vcpu *vcpu);
void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
......
......@@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
*/
void tdp_iter_restart(struct tdp_iter *iter)
{
iter->yielded = false;
iter->yielded_gfn = iter->next_last_level_gfn;
iter->level = iter->root_level;
......@@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter)
*/
void tdp_iter_next(struct tdp_iter *iter)
{
if (iter->yielded) {
tdp_iter_restart(iter);
return;
}
if (try_step_down(iter))
return;
......
......@@ -45,6 +45,12 @@ struct tdp_iter {
* iterator walks off the end of the paging structure.
*/
bool valid;
/*
* True if KVM dropped mmu_lock and yielded in the middle of a walk, in
* which case tdp_iter_next() needs to restart the walk at the root
* level instead of advancing to the next entry.
*/
bool yielded;
};
/*
......
......@@ -502,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
WARN_ON_ONCE(iter->yielded);
lockdep_assert_held_read(&kvm->mmu_lock);
/*
......@@ -575,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte, bool record_acc_track,
bool record_dirty_log)
{
WARN_ON_ONCE(iter->yielded);
lockdep_assert_held_write(&kvm->mmu_lock);
/*
......@@ -640,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
* If this function should yield and flush is set, it will perform a remote
* TLB flush before yielding.
*
* If this function yields, it will also reset the tdp_iter's walk over the
* paging structure and the calling function should skip to the next
* iteration to allow the iterator to continue its traversal from the
* paging structure root.
* If this function yields, iter->yielded is set and the caller must skip to
* the next iteration, where tdp_iter_next() will reset the tdp_iter's walk
* over the paging structures to allow the iterator to continue its traversal
* from the paging structure root.
*
* Return true if this function yielded and the iterator's traversal was reset.
* Return false if a yield was not needed.
* Returns true if this function yielded.
*/
static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
struct tdp_iter *iter, bool flush,
bool shared)
static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
struct tdp_iter *iter,
bool flush, bool shared)
{
WARN_ON(iter->yielded);
/* Ensure forward progress has been made before yielding. */
if (iter->next_last_level_gfn == iter->yielded_gfn)
return false;
......@@ -671,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
WARN_ON(iter->gfn > iter->next_last_level_gfn);
tdp_iter_restart(iter);
return true;
iter->yielded = true;
}
return false;
return iter->yielded;
}
/*
......
......@@ -1585,6 +1585,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
{
struct vmcb *vmcb = to_svm(vcpu)->vmcb;
return sev_es_guest(vcpu->kvm)
? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
: kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
}
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
......@@ -3568,14 +3577,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
if (!gif_set(svm))
return true;
if (sev_es_guest(vcpu->kvm)) {
/*
* SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
* bit to determine the state of the IF flag.
*/
if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
return true;
} else if (is_guest_mode(vcpu)) {
if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
......@@ -3586,7 +3588,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
if (nested_exit_on_intr(svm))
return false;
} else {
if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
if (!svm_get_if_flag(vcpu))
return true;
}
......@@ -4621,6 +4623,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
.get_if_flag = svm_get_if_flag,
.tlb_flush_all = svm_flush_tlb,
.tlb_flush_current = svm_flush_tlb,
......
......@@ -1363,6 +1363,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmx->emulation_required = vmx_emulation_required(vcpu);
}
static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
{
return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
}
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
......@@ -3959,8 +3964,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
if (pi_test_and_set_on(&vmx->pi_desc))
return 0;
if (vcpu != kvm_get_running_vcpu() &&
!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
return 0;
......@@ -5877,18 +5881,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vmx_flush_pml_buffer(vcpu);
/*
* We should never reach this point with a pending nested VM-Enter, and
* more specifically emulation of L2 due to invalid guest state (see
* below) should never happen as that means we incorrectly allowed a
* nested VM-Enter with an invalid vmcs12.
* KVM should never reach this point with a pending nested VM-Enter.
* More specifically, short-circuiting VM-Entry to emulate L2 due to
* invalid guest state should never happen as that means KVM knowingly
* allowed a nested VM-Enter with an invalid vmcs12. More below.
*/
if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
return -EIO;
/* If guest state is invalid, start emulating */
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
if (is_guest_mode(vcpu)) {
/*
* PML is never enabled when running L2, bail immediately if a
......@@ -5910,10 +5910,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
*/
nested_mark_vmcs12_pages_dirty(vcpu);
/*
* Synthesize a triple fault if L2 state is invalid. In normal
* operation, nested VM-Enter rejects any attempt to enter L2
* with invalid state. However, those checks are skipped if
* state is being stuffed via RSM or KVM_SET_NESTED_STATE. If
* L2 state is invalid, it means either L1 modified SMRAM state
* or userspace provided bad state. Synthesize TRIPLE_FAULT as
* doing so is architecturally allowed in the RSM case, and is
* the least awful solution for the userspace case without
* risking false positives.
*/
if (vmx->emulation_required) {
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
return 1;
}
if (nested_vmx_reflect_vmexit(vcpu))
return 1;
}
/* If guest state is invalid, start emulating. L2 is handled above. */
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
if (exit_reason.failed_vmentry) {
dump_vmcs(vcpu);
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
......@@ -6608,9 +6628,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
* consistency check VM-Exit due to invalid guest state and bail.
*/
if (unlikely(vmx->emulation_required)) {
/* We don't emulate invalid state of a nested guest */
vmx->fail = is_guest_mode(vcpu);
vmx->fail = 0;
vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
vmx->exit_reason.failed_vmentry = 1;
......@@ -7579,6 +7597,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
.get_if_flag = vmx_get_if_flag,
.tlb_flush_all = vmx_flush_tlb_all,
.tlb_flush_current = vmx_flush_tlb_current,
......
......@@ -1331,7 +1331,7 @@ static const u32 msrs_to_save_all[] = {
MSR_IA32_UMWAIT_CONTROL,
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
......@@ -9001,14 +9001,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
/*
* if_flag is obsolete and useless, so do not bother
* setting it for SEV-ES guests. Userspace can just
* use kvm_run->ready_for_interrupt_injection.
*/
kvm_run->if_flag = !vcpu->arch.guest_state_protected
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
......
......@@ -35,6 +35,7 @@
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
/x86_64/vmx_invalid_nested_guest_state
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
......
......@@ -64,6 +64,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
......
......@@ -71,15 +71,6 @@ enum vm_guest_mode {
#endif
#if defined(__x86_64__)
unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
#else
static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
#endif
#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
......@@ -330,6 +321,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm);
unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
uint64_t vm_get_max_gfn(struct kvm_vm *vm);
int vm_get_fd(struct kvm_vm *vm);
......
......@@ -2328,6 +2328,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
return vm->page_shift;
}
unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
{
return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
uint64_t vm_get_max_gfn(struct kvm_vm *vm)
{
return vm->max_gfn;
......
// SPDX-License-Identifier: GPL-2.0-only
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
#include <string.h>
#include <sys/ioctl.h>
#include "kselftest.h"
#define VCPU_ID 0
#define ARBITRARY_IO_PORT 0x2000
static struct kvm_vm *vm;
static void l2_guest_code(void)
{
/*
* Generate an exit to L0 userspace, i.e. main(), via I/O to an
* arbitrary port.
*/
asm volatile("inb %%dx, %%al"
: : [port] "d" (ARBITRARY_IO_PORT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
GUEST_ASSERT(load_vmcs(vmx_pages));
/* Prepare the VMCS for L2 execution. */
prepare_vmcs(vmx_pages, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
/*
* L2 must be run without unrestricted guest, verify that the selftests
* library hasn't enabled it. Because KVM selftests jump directly to
* 64-bit mode, unrestricted guest support isn't required.
*/
GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
!(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
GUEST_ASSERT(!vmlaunch());
/* L2 should triple fault after main() stuffs invalid guest state. */
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
GUEST_DONE();
}
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
struct kvm_sregs sregs;
struct kvm_run *run;
struct ucall uc;
nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
vcpu_run(vm, VCPU_ID);
run = vcpu_state(vm, VCPU_ID);
/*
* The first exit to L0 userspace should be an I/O access from L2.
* Running L1 should launch L2 without triggering an exit to userspace.
*/
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Expected KVM_EXIT_IO, got: %u (%s)\n",
run->exit_reason, exit_reason_str(run->exit_reason));
TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
"Expected IN from port %d from L2, got port %d",
ARBITRARY_IO_PORT, run->io.port);
/*
* Stuff invalid guest state for L2 by making TR unusuable. The next
* KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
* emulating invalid guest state for L2.
*/
memset(&sregs, 0, sizeof(sregs));
vcpu_sregs_get(vm, VCPU_ID, &sregs);
sregs.tr.unusable = 1;
vcpu_sregs_set(vm, VCPU_ID, &sregs);
vcpu_run(vm, VCPU_ID);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_DONE:
break;
case UCALL_ABORT:
TEST_FAIL("%s", (const char *)uc.args[0]);
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment