Commit f526d6a8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "x86:

   - fix lock initialization race in gfn-to-pfn cache (+selftests)

   - fix two refcounting errors

   - emulator fixes

   - mask off reserved bits in CPUID

   - fix bug with disabling SGX

  RISC-V:

   - update MAINTAINERS"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/xen: Fix eventfd error handling in kvm_xen_eventfd_assign()
  KVM: x86: smm: number of GPRs in the SMRAM image depends on the image format
  KVM: x86: emulator: update the emulation mode after CR0 write
  KVM: x86: emulator: update the emulation mode after rsm
  KVM: x86: emulator: introduce emulator_recalc_and_set_mode
  KVM: x86: emulator: em_sysexit should update ctxt->mode
  KVM: selftests: Mark "guest_saw_irq" as volatile in xen_shinfo_test
  KVM: selftests: Add tests in xen_shinfo_test to detect lock races
  KVM: Reject attempts to consume or refresh inactive gfn_to_pfn_cache
  KVM: Initialize gfn_to_pfn_cache locks in dedicated helper
  KVM: VMX: fully disable SGX if SECONDARY_EXEC_ENCLS_EXITING unavailable
  KVM: x86: Exempt pending triple fault from event injection sanity check
  MAINTAINERS: git://github -> https://github.com for kvm-riscv
  KVM: debugfs: Return retval of simple_attr_open() if it fails
  KVM: x86: Reduce refcount if single_open() fails in kvm_mmu_rmaps_stat_open()
  KVM: x86: Mask off reserved bits in CPUID.8000001FH
  KVM: x86: Mask off reserved bits in CPUID.8000001AH
  KVM: x86: Mask off reserved bits in CPUID.80000008H
  KVM: x86: Mask off reserved bits in CPUID.80000006H
  KVM: x86: Mask off reserved bits in CPUID.80000001H
parents d79dcde0 73536338
...@@ -11248,7 +11248,7 @@ L: kvm@vger.kernel.org ...@@ -11248,7 +11248,7 @@ L: kvm@vger.kernel.org
L: kvm-riscv@lists.infradead.org L: kvm-riscv@lists.infradead.org
L: linux-riscv@lists.infradead.org L: linux-riscv@lists.infradead.org
S: Maintained S: Maintained
T: git git://github.com/kvm-riscv/linux.git T: git https://github.com/kvm-riscv/linux.git
F: arch/riscv/include/asm/kvm* F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm* F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/ F: arch/riscv/kvm/
......
...@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021); entry->eax = max(entry->eax, 0x80000021);
break; break;
case 0x80000001: case 0x80000001:
entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX); cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break; break;
case 0x80000006: case 0x80000006:
/* L2 cache and TLB: pass through host info. */ /* Drop reserved bits, pass host L2 cache and TLB info. */
entry->edx &= ~GENMASK(17, 16);
break; break;
case 0x80000007: /* Advanced power management */ case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */ /* invariant TSC is CPUID.80000007H:EDX[8] */
...@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as; g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8); entry->eax = g_phys_as | (virt_as << 8);
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0; entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX); cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break; break;
...@@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0; entry->ecx = entry->edx = 0;
break; break;
case 0x8000001a: case 0x8000001a:
entry->eax &= GENMASK(2, 0);
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e: case 0x8000001e:
break; break;
case 0x8000001F: case 0x8000001F:
...@@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0; entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else { } else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX); cpuid_entry_override(entry, CPUID_8000_001F_EAX);
/* Clear NumVMPL since KVM does not support VMPL. */
entry->ebx &= ~GENMASK(31, 12);
/* /*
* Enumerate '0' for "PA bits reduction", the adjusted * Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008). * MAXPHYADDR is enumerated directly (see 0x80000008).
......
...@@ -158,11 +158,16 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v) ...@@ -158,11 +158,16 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file) static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{ {
struct kvm *kvm = inode->i_private; struct kvm *kvm = inode->i_private;
int r;
if (!kvm_get_kvm_safe(kvm)) if (!kvm_get_kvm_safe(kvm))
return -ENOENT; return -ENOENT;
return single_open(file, kvm_mmu_rmaps_stat_show, kvm); r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
if (r < 0)
kvm_put_kvm(kvm);
return r;
} }
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
......
...@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, ...@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear); ctxt->mode, linear);
} }
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
enum x86emul_mode mode)
{ {
ulong linear; ulong linear;
int rc; int rc;
...@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, ...@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long)) if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE) if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea; ctxt->_eip = addr.ea;
return rc; return rc;
} }
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
{ {
return assign_eip(ctxt, dst, ctxt->mode); u64 efer;
} struct desc_struct cs;
u16 selector;
u32 base3;
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
const struct desc_struct *cs_desc)
{
enum x86emul_mode mode = ctxt->mode;
int rc;
#ifdef CONFIG_X86_64 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
if (ctxt->mode >= X86EMUL_MODE_PROT16) { /* Real mode. cpu must not have long mode active */
if (cs_desc->l) { if (efer & EFER_LMA)
u64 efer = 0; return X86EMUL_UNHANDLEABLE;
ctxt->mode = X86EMUL_MODE_REAL;
return X86EMUL_CONTINUE;
}
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (ctxt->eflags & X86_EFLAGS_VM) {
/* Protected/VM86 mode. cpu must not have long mode active */
if (efer & EFER_LMA) if (efer & EFER_LMA)
mode = X86EMUL_MODE_PROT64; return X86EMUL_UNHANDLEABLE;
} else ctxt->mode = X86EMUL_MODE_VM86;
mode = X86EMUL_MODE_PROT32; /* temporary value */ return X86EMUL_CONTINUE;
} }
#endif
if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; return X86EMUL_UNHANDLEABLE;
rc = assign_eip(ctxt, dst, mode);
if (rc == X86EMUL_CONTINUE) if (efer & EFER_LMA) {
ctxt->mode = mode; if (cs.l) {
/* Proper long mode */
ctxt->mode = X86EMUL_MODE_PROT64;
} else if (cs.d) {
/* 32 bit compatibility mode*/
ctxt->mode = X86EMUL_MODE_PROT32;
} else {
ctxt->mode = X86EMUL_MODE_PROT16;
}
} else {
/* Legacy 32 bit / 16 bit mode */
ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
}
return X86EMUL_CONTINUE;
}
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
return assign_eip(ctxt, dst);
}
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
int rc = emulator_recalc_and_set_mode(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc; return rc;
return assign_eip(ctxt, dst);
} }
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
...@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) ...@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */ /* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
...@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) ...@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc); &new_desc);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
rc = assign_eip_far(ctxt, eip, &new_desc); rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */ /* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
...@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, ...@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < NR_EMULATOR_GPRS; i++) for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc); val = GET_SMSTATE(u32, smstate, 0x7fcc);
...@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, ...@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u16 selector; u16 selector;
int i, r; int i, r;
for (i = 0; i < NR_EMULATOR_GPRS; i++) for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
...@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) ...@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* those side effects need to be explicitly handled for both success * those side effects need to be explicitly handled for both success
* and shutdown. * and shutdown.
*/ */
return X86EMUL_CONTINUE; return emulator_recalc_and_set_mode(ctxt);
emulate_shutdown: emulate_shutdown:
ctxt->ops->triple_fault(ctxt); ctxt->ops->triple_fault(ctxt);
...@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ...@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx; ctxt->_eip = rdx;
ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx; *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
...@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ...@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
goto fail; goto fail;
...@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) ...@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt) static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{ {
if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) int cr_num = ctxt->modrm_reg;
int r;
if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0); return emulate_gp(ctxt, 0);
/* Disable writeback. */ /* Disable writeback. */
ctxt->dst.type = OP_NONE; ctxt->dst.type = OP_NONE;
if (cr_num == 0) {
/*
* CR0 write might have updated CR0.PE and/or CR0.PG
* which can affect the cpu's execution mode.
*/
r = emulator_recalc_and_set_mode(ctxt);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
......
...@@ -8263,6 +8263,11 @@ static __init int hardware_setup(void) ...@@ -8263,6 +8263,11 @@ static __init int hardware_setup(void)
if (!cpu_has_virtual_nmis()) if (!cpu_has_virtual_nmis())
enable_vnmi = 0; enable_vnmi = 0;
#ifdef CONFIG_X86_SGX_KVM
if (!cpu_has_vmx_encls_vmexit())
enable_sgx = false;
#endif
/* /*
* set_apic_access_page_addr() is used to reload apic access * set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not * page upon invalidation. No need to do anything if not
......
...@@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, ...@@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
/* we verify if the enable bit is set... */ /* we verify if the enable bit is set... */
if (system_time & 1) { if (system_time & 1) {
kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu, kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
KVM_HOST_USES_PFN, system_time & ~1ULL, KVM_HOST_USES_PFN, system_time & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)); sizeof(struct pvclock_vcpu_time_info));
} else { } else {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
} }
return; return;
...@@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) ...@@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu) static void kvmclock_reset(struct kvm_vcpu *vcpu)
{ {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
vcpu->arch.time = 0; vcpu->arch.time = 0;
} }
...@@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, ...@@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_x86_ops.nested_ops->has_events(vcpu)) kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true; *req_immediate_exit = true;
WARN_ON(kvm_is_exception_pending(vcpu)); /*
* KVM must never queue a new exception while injecting an event; KVM
* is done emulating and should only propagate the to-be-injected event
* to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
* infinite loop as KVM will bail from VM-Enter to inject the pending
* exception and start the cycle all over.
*
* Exempt triple faults as they have special handling and won't put the
* vCPU into an infinite loop. Triple fault can be queued when running
* VMX without unrestricted guest, as that requires KVM to emulate Real
* Mode events (see kvm_inject_realmode_interrupt()).
*/
WARN_ON_ONCE(vcpu->arch.exception.pending ||
vcpu->arch.exception_vmexit.pending);
return 0; return 0;
out: out:
...@@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0; vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0; vcpu->arch.regs_dirty = ~0;
kvm_gpc_init(&vcpu->arch.pv_time);
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else else
......
...@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) ...@@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
int idx = srcu_read_lock(&kvm->srcu); int idx = srcu_read_lock(&kvm->srcu);
if (gfn == GPA_INVALID) { if (gfn == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(kvm, gpc); kvm_gpc_deactivate(kvm, gpc);
goto out; goto out;
} }
do { do {
ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN, ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
gpa, PAGE_SIZE); PAGE_SIZE);
if (ret) if (ret)
goto out; goto out;
...@@ -554,14 +554,14 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -554,14 +554,14 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time)); offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) { if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0; r = 0;
break; break;
} }
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, r = kvm_gpc_activate(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache, &vcpu->arch.xen.vcpu_info_cache, NULL,
NULL, KVM_HOST_USES_PFN, data->u.gpa, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_info)); sizeof(struct vcpu_info));
if (!r) if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
...@@ -570,13 +570,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -570,13 +570,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) { if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, kvm_gpc_deactivate(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache); &vcpu->arch.xen.vcpu_time_info_cache);
r = 0; r = 0;
break; break;
} }
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, r = kvm_gpc_activate(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache, &vcpu->arch.xen.vcpu_time_info_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa, NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct pvclock_vcpu_time_info)); sizeof(struct pvclock_vcpu_time_info));
...@@ -590,14 +590,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -590,14 +590,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break; break;
} }
if (data->u.gpa == GPA_INVALID) { if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, kvm_gpc_deactivate(vcpu->kvm,
&vcpu->arch.xen.runstate_cache); &vcpu->arch.xen.runstate_cache);
r = 0; r = 0;
break; break;
} }
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm, r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
&vcpu->arch.xen.runstate_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa, NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_runstate_info)); sizeof(struct vcpu_runstate_info));
break; break;
...@@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, ...@@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
case EVTCHNSTAT_ipi: case EVTCHNSTAT_ipi:
/* IPI must map back to the same port# */ /* IPI must map back to the same port# */
if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port) if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
goto out; /* -EINVAL */ goto out_noeventfd; /* -EINVAL */
break; break;
case EVTCHNSTAT_interdomain: case EVTCHNSTAT_interdomain:
if (data->u.evtchn.deliver.port.port) { if (data->u.evtchn.deliver.port.port) {
if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm)) if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
goto out; /* -EINVAL */ goto out_noeventfd; /* -EINVAL */
} else { } else {
eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd); eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
if (IS_ERR(eventfd)) { if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd); ret = PTR_ERR(eventfd);
goto out; goto out_noeventfd;
} }
} }
break; break;
...@@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, ...@@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
out: out:
if (eventfd) if (eventfd)
eventfd_ctx_put(eventfd); eventfd_ctx_put(eventfd);
out_noeventfd:
kfree(evtchnfd); kfree(evtchnfd);
return ret; return ret;
} }
...@@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) ...@@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
vcpu->arch.xen.poll_evtchn = 0; vcpu->arch.xen.poll_evtchn = 0;
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
} }
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
...@@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) ...@@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
if (kvm_xen_timer_enabled(vcpu)) if (kvm_xen_timer_enabled(vcpu))
kvm_xen_stop_timer(vcpu); kvm_xen_stop_timer(vcpu);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
&vcpu->arch.xen.runstate_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
&vcpu->arch.xen.vcpu_info_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache);
del_timer_sync(&vcpu->arch.xen.poll_timer); del_timer_sync(&vcpu->arch.xen.poll_timer);
} }
void kvm_xen_init_vm(struct kvm *kvm) void kvm_xen_init_vm(struct kvm *kvm)
{ {
idr_init(&kvm->arch.xen.evtchn_ports); idr_init(&kvm->arch.xen.evtchn_ports);
kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
} }
void kvm_xen_destroy_vm(struct kvm *kvm) void kvm_xen_destroy_vm(struct kvm *kvm)
...@@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm) ...@@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
struct evtchnfd *evtchnfd; struct evtchnfd *evtchnfd;
int i; int i;
kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache); kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
if (!evtchnfd->deliver.port.port) if (!evtchnfd->deliver.port.port)
......
...@@ -1240,8 +1240,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, ...@@ -1240,8 +1240,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
/** /**
* kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a * kvm_gpc_init - initialize gfn_to_pfn_cache.
* given guest physical address. *
* @gpc: struct gfn_to_pfn_cache object.
*
* This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must
* be zero-allocated (or zeroed by the caller before init).
*/
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
/**
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
* physical address.
* *
* @kvm: pointer to kvm instance. * @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object. * @gpc: struct gfn_to_pfn_cache object.
...@@ -1265,7 +1275,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); ...@@ -1265,7 +1275,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
* kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
* accessing the target page. * accessing the target page.
*/ */
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len); gpa_t gpa, unsigned long len);
...@@ -1324,7 +1334,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -1324,7 +1334,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
/** /**
* kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache. * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
* *
* @kvm: pointer to kvm instance. * @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object. * @gpc: struct gfn_to_pfn_cache object.
...@@ -1332,7 +1342,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); ...@@ -1332,7 +1342,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
* This removes a cache from the @kvm's list to be processed on MMU notifier * This removes a cache from the @kvm's list to be processed on MMU notifier
* invocation. * invocation.
*/ */
void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
......
...@@ -15,9 +15,13 @@ ...@@ -15,9 +15,13 @@
#include <time.h> #include <time.h>
#include <sched.h> #include <sched.h>
#include <signal.h> #include <signal.h>
#include <pthread.h>
#include <sys/eventfd.h> #include <sys/eventfd.h>
/* Defined in include/linux/kvm_types.h */
#define GPA_INVALID (~(ulong)0)
#define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10 #define SHINFO_REGION_SLOT 10
...@@ -44,6 +48,8 @@ ...@@ -44,6 +48,8 @@
#define MIN_STEAL_TIME 50000 #define MIN_STEAL_TIME 50000
#define SHINFO_RACE_TIMEOUT 2 /* seconds */
#define __HYPERVISOR_set_timer_op 15 #define __HYPERVISOR_set_timer_op 15
#define __HYPERVISOR_sched_op 29 #define __HYPERVISOR_sched_op 29
#define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_event_channel_op 32
...@@ -126,7 +132,7 @@ struct { ...@@ -126,7 +132,7 @@ struct {
struct kvm_irq_routing_entry entries[2]; struct kvm_irq_routing_entry entries[2];
} irq_routes; } irq_routes;
bool guest_saw_irq; static volatile bool guest_saw_irq;
static void evtchn_handler(struct ex_regs *regs) static void evtchn_handler(struct ex_regs *regs)
{ {
...@@ -148,6 +154,7 @@ static void guest_wait_for_irq(void) ...@@ -148,6 +154,7 @@ static void guest_wait_for_irq(void)
static void guest_code(void) static void guest_code(void)
{ {
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
__asm__ __volatile__( __asm__ __volatile__(
"sti\n" "sti\n"
...@@ -325,6 +332,49 @@ static void guest_code(void) ...@@ -325,6 +332,49 @@ static void guest_code(void)
guest_wait_for_irq(); guest_wait_for_irq();
GUEST_SYNC(21); GUEST_SYNC(21);
/* Racing host ioctls */
guest_wait_for_irq();
GUEST_SYNC(22);
/* Racing vmcall against host ioctl */
ports[0] = 0;
p = (struct sched_poll) {
.ports = ports,
.nr_ports = 1,
.timeout = 0
};
wait_for_timer:
/*
* Poll for a timer wake event while the worker thread is mucking with
* the shared info. KVM XEN drops timer IRQs if the shared info is
* invalid when the timer expires. Arbitrarily poll 100 times before
* giving up and asking the VMM to re-arm the timer. 100 polls should
* consume enough time to beat on KVM without taking too long if the
* timer IRQ is dropped due to an invalid event channel.
*/
for (i = 0; i < 100 && !guest_saw_irq; i++)
asm volatile("vmcall"
: "=a" (rax)
: "a" (__HYPERVISOR_sched_op),
"D" (SCHEDOP_poll),
"S" (&p)
: "memory");
/*
* Re-send the timer IRQ if it was (likely) dropped due to the timer
* expiring while the event channel was invalid.
*/
if (!guest_saw_irq) {
GUEST_SYNC(23);
goto wait_for_timer;
}
guest_saw_irq = false;
GUEST_SYNC(24);
} }
static int cmp_timespec(struct timespec *a, struct timespec *b) static int cmp_timespec(struct timespec *a, struct timespec *b)
...@@ -352,11 +402,36 @@ static void handle_alrm(int sig) ...@@ -352,11 +402,36 @@ static void handle_alrm(int sig)
TEST_FAIL("IRQ delivery timed out"); TEST_FAIL("IRQ delivery timed out");
} }
static void *juggle_shinfo_state(void *arg)
{
struct kvm_vm *vm = (struct kvm_vm *)arg;
struct kvm_xen_hvm_attr cache_init = {
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
};
struct kvm_xen_hvm_attr cache_destroy = {
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
.u.shared_info.gfn = GPA_INVALID
};
for (;;) {
__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init);
__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy);
pthread_testcancel();
};
return NULL;
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
struct timespec min_ts, max_ts, vm_ts; struct timespec min_ts, max_ts, vm_ts;
struct kvm_vm *vm; struct kvm_vm *vm;
pthread_t thread;
bool verbose; bool verbose;
int ret;
verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
!strncmp(argv[1], "--verbose", 10)); !strncmp(argv[1], "--verbose", 10));
...@@ -785,6 +860,71 @@ int main(int argc, char *argv[]) ...@@ -785,6 +860,71 @@ int main(int argc, char *argv[])
case 21: case 21:
TEST_ASSERT(!evtchn_irq_expected, TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen"); "Expected event channel IRQ but it didn't happen");
alarm(0);
if (verbose)
printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
struct kvm_irq_routing_xen_evtchn uxe = {
.port = 1,
.vcpu = vcpu->id,
.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
};
evtchn_irq_expected = true;
for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
__vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
break;
case 22:
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
if (verbose)
printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
shinfo->evtchn_pending[0] = 1;
evtchn_irq_expected = true;
tmr.u.timer.expires_ns = rs->state_entry_time +
SHINFO_RACE_TIMEOUT * 1000000000ULL;
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
break;
case 23:
/*
* Optional and possibly repeated sync point.
* Injecting the timer IRQ may fail if the
* shinfo is invalid when the timer expires.
* If the timer has expired but the IRQ hasn't
* been delivered, rearm the timer and retry.
*/
vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
/* Resume the guest if the timer is still pending. */
if (tmr.u.timer.expires_ns)
break;
/* All done if the IRQ was delivered. */
if (!evtchn_irq_expected)
break;
tmr.u.timer.expires_ns = rs->state_entry_time +
SHINFO_RACE_TIMEOUT * 1000000000ULL;
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
break;
case 24:
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
ret = pthread_cancel(thread);
TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
ret = pthread_join(thread, 0);
TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
goto done; goto done;
case 0x20: case 0x20:
......
...@@ -5409,6 +5409,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, ...@@ -5409,6 +5409,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
int (*get)(void *, u64 *), int (*set)(void *, u64), int (*get)(void *, u64 *), int (*set)(void *, u64),
const char *fmt) const char *fmt)
{ {
int ret;
struct kvm_stat_data *stat_data = (struct kvm_stat_data *) struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
inode->i_private; inode->i_private;
...@@ -5420,15 +5421,13 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, ...@@ -5420,15 +5421,13 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
if (!kvm_get_kvm_safe(stat_data->kvm)) if (!kvm_get_kvm_safe(stat_data->kvm))
return -ENOENT; return -ENOENT;
if (simple_attr_open(inode, file, get, ret = simple_attr_open(inode, file, get,
kvm_stats_debugfs_mode(stat_data->desc) & 0222 kvm_stats_debugfs_mode(stat_data->desc) & 0222
? set : NULL, ? set : NULL, fmt);
fmt)) { if (ret)
kvm_put_kvm(stat_data->kvm); kvm_put_kvm(stat_data->kvm);
return -ENOMEM;
}
return 0; return ret;
} }
static int kvm_debugfs_release(struct inode *inode, struct file *file) static int kvm_debugfs_release(struct inode *inode, struct file *file)
......
...@@ -81,6 +81,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -81,6 +81,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
{ {
struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memslots *slots = kvm_memslots(kvm);
if (!gpc->active)
return false;
if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE) if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
return false; return false;
...@@ -240,10 +243,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -240,10 +243,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
{ {
struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memslots *slots = kvm_memslots(kvm);
unsigned long page_offset = gpa & ~PAGE_MASK; unsigned long page_offset = gpa & ~PAGE_MASK;
kvm_pfn_t old_pfn, new_pfn; bool unmap_old = false;
unsigned long old_uhva; unsigned long old_uhva;
kvm_pfn_t old_pfn;
void *old_khva; void *old_khva;
int ret = 0; int ret;
/* /*
* If must fit within a single page. The 'len' argument is * If must fit within a single page. The 'len' argument is
...@@ -261,6 +265,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -261,6 +265,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
write_lock_irq(&gpc->lock); write_lock_irq(&gpc->lock);
if (!gpc->active) {
ret = -EINVAL;
goto out_unlock;
}
old_pfn = gpc->pfn; old_pfn = gpc->pfn;
old_khva = gpc->khva - offset_in_page(gpc->khva); old_khva = gpc->khva - offset_in_page(gpc->khva);
old_uhva = gpc->uhva; old_uhva = gpc->uhva;
...@@ -291,6 +300,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -291,6 +300,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
/* If the HVA→PFN mapping was already valid, don't unmap it. */ /* If the HVA→PFN mapping was already valid, don't unmap it. */
old_pfn = KVM_PFN_ERR_FAULT; old_pfn = KVM_PFN_ERR_FAULT;
old_khva = NULL; old_khva = NULL;
ret = 0;
} }
out: out:
...@@ -305,14 +315,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, ...@@ -305,14 +315,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
gpc->khva = NULL; gpc->khva = NULL;
} }
/* Snapshot the new pfn before dropping the lock! */ /* Detect a pfn change before dropping the lock! */
new_pfn = gpc->pfn; unmap_old = (old_pfn != gpc->pfn);
out_unlock:
write_unlock_irq(&gpc->lock); write_unlock_irq(&gpc->lock);
mutex_unlock(&gpc->refresh_lock); mutex_unlock(&gpc->refresh_lock);
if (old_pfn != new_pfn) if (unmap_old)
gpc_unmap_khva(kvm, old_pfn, old_khva); gpc_unmap_khva(kvm, old_pfn, old_khva);
return ret; return ret;
...@@ -346,42 +357,61 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) ...@@ -346,42 +357,61 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
} }
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
{
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
}
EXPORT_SYMBOL_GPL(kvm_gpc_init);
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len) gpa_t gpa, unsigned long len)
{ {
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage); WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
if (!gpc->active) { if (!gpc->active) {
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
gpc->khva = NULL; gpc->khva = NULL;
gpc->pfn = KVM_PFN_ERR_FAULT; gpc->pfn = KVM_PFN_ERR_FAULT;
gpc->uhva = KVM_HVA_ERR_BAD; gpc->uhva = KVM_HVA_ERR_BAD;
gpc->vcpu = vcpu; gpc->vcpu = vcpu;
gpc->usage = usage; gpc->usage = usage;
gpc->valid = false; gpc->valid = false;
gpc->active = true;
spin_lock(&kvm->gpc_lock); spin_lock(&kvm->gpc_lock);
list_add(&gpc->list, &kvm->gpc_list); list_add(&gpc->list, &kvm->gpc_list);
spin_unlock(&kvm->gpc_lock); spin_unlock(&kvm->gpc_lock);
/*
* Activate the cache after adding it to the list, a concurrent
* refresh must not establish a mapping until the cache is
* reachable by mmu_notifier events.
*/
write_lock_irq(&gpc->lock);
gpc->active = true;
write_unlock_irq(&gpc->lock);
} }
return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len); return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
} }
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init); EXPORT_SYMBOL_GPL(kvm_gpc_activate);
void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
{ {
if (gpc->active) { if (gpc->active) {
/*
* Deactivate the cache before removing it from the list, KVM
* must stall mmu_notifier events until all users go away, i.e.
* until gpc->lock is dropped and refresh is guaranteed to fail.
*/
write_lock_irq(&gpc->lock);
gpc->active = false;
write_unlock_irq(&gpc->lock);
spin_lock(&kvm->gpc_lock); spin_lock(&kvm->gpc_lock);
list_del(&gpc->list); list_del(&gpc->list);
spin_unlock(&kvm->gpc_lock); spin_unlock(&kvm->gpc_lock);
kvm_gfn_to_pfn_cache_unmap(kvm, gpc); kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
gpc->active = false;
} }
} }
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy); EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment