Commit 66cecb67 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "One NULL pointer dereference, and two fixes for regressions introduced
  during the merge window.

  The rest are fixes for MIPS, s390 and nested VMX"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: x86: Check memopp before dereference (CVE-2016-8630)
  kvm: nVMX: VMCLEAR an active shadow VMCS after last use
  KVM: x86: drop TSC offsetting kvm_x86_ops to fix KVM_GET/SET_CLOCK
  KVM: x86: fix wbinvd_dirty_mask use-after-free
  kvm/x86: Show WRMSR data is in hex
  kvm: nVMX: Fix kernel panics induced by illegal INVEPT/INVVPID types
  KVM: document lock orders
  KVM: fix OOPS on flush_work
  KVM: s390: Fix STHYI buffer alignment for diag224
  KVM: MIPS: Precalculate MMIO load resume PC
  KVM: MIPS: Make ERET handle ERL before EXL
  KVM: MIPS: Fix lazy user ASID regenerate for SMP
parents 34c510b2 d9092f52
...@@ -4,7 +4,17 @@ KVM Lock Overview ...@@ -4,7 +4,17 @@ KVM Lock Overview
1. Acquisition Orders 1. Acquisition Orders
--------------------- ---------------------
(to be written) The acquisition orders for mutexes are as follows:
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything
else is a leaf: no other lock is taken inside the critical sections.
2: Exception 2: Exception
------------ ------------
......
...@@ -293,7 +293,10 @@ struct kvm_vcpu_arch { ...@@ -293,7 +293,10 @@ struct kvm_vcpu_arch {
/* Host KSEG0 address of the EI/DI offset */ /* Host KSEG0 address of the EI/DI offset */
void *kseg0_commpage; void *kseg0_commpage;
u32 io_gpr; /* GPR used as IO source/target */ /* Resume PC after MMIO completion */
unsigned long io_pc;
/* GPR used as IO source/target */
u32 io_gpr;
struct hrtimer comparecount_timer; struct hrtimer comparecount_timer;
/* Count timer control KVM register */ /* Count timer control KVM register */
...@@ -315,8 +318,6 @@ struct kvm_vcpu_arch { ...@@ -315,8 +318,6 @@ struct kvm_vcpu_arch {
/* Bitmask of pending exceptions to be cleared */ /* Bitmask of pending exceptions to be cleared */
unsigned long pending_exceptions_clr; unsigned long pending_exceptions_clr;
u32 pending_load_cause;
/* Save/Restore the entryhi register when are are preempted/scheduled back in */ /* Save/Restore the entryhi register when are are preempted/scheduled back in */
unsigned long preempt_entryhi; unsigned long preempt_entryhi;
......
...@@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) ...@@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
struct mips_coproc *cop0 = vcpu->arch.cop0; struct mips_coproc *cop0 = vcpu->arch.cop0;
enum emulation_result er = EMULATE_DONE; enum emulation_result er = EMULATE_DONE;
if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
kvm_clear_c0_guest_status(cop0, ST0_ERL);
vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
} else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc, kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
kvm_read_c0_guest_epc(cop0)); kvm_read_c0_guest_epc(cop0));
kvm_clear_c0_guest_status(cop0, ST0_EXL); kvm_clear_c0_guest_status(cop0, ST0_EXL);
vcpu->arch.pc = kvm_read_c0_guest_epc(cop0); vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
} else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
kvm_clear_c0_guest_status(cop0, ST0_ERL);
vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
} else { } else {
kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
vcpu->arch.pc); vcpu->arch.pc);
...@@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, ...@@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
struct kvm_vcpu *vcpu) struct kvm_vcpu *vcpu)
{ {
enum emulation_result er = EMULATE_DO_MMIO; enum emulation_result er = EMULATE_DO_MMIO;
unsigned long curr_pc;
u32 op, rt; u32 op, rt;
u32 bytes; u32 bytes;
rt = inst.i_format.rt; rt = inst.i_format.rt;
op = inst.i_format.opcode; op = inst.i_format.opcode;
vcpu->arch.pending_load_cause = cause; /*
* Find the resume PC now while we have safe and easy access to the
* prior branch instruction, and save it for
* kvm_mips_complete_mmio_load() to restore later.
*/
curr_pc = vcpu->arch.pc;
er = update_pc(vcpu, cause);
if (er == EMULATE_FAIL)
return er;
vcpu->arch.io_pc = vcpu->arch.pc;
vcpu->arch.pc = curr_pc;
vcpu->arch.io_gpr = rt; vcpu->arch.io_gpr = rt;
switch (op) { switch (op) {
...@@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, ...@@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
goto done; goto done;
} }
er = update_pc(vcpu, vcpu->arch.pending_load_cause); /* Restore saved resume PC */
if (er == EMULATE_FAIL) vcpu->arch.pc = vcpu->arch.io_pc;
return er;
switch (run->mmio.len) { switch (run->mmio.len) {
case 4: case 4:
...@@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, ...@@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
break; break;
} }
if (vcpu->arch.pending_load_cause & CAUSEF_BD)
kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n",
vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr,
vcpu->mmio_needed);
done: done:
return er; return er;
} }
......
...@@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, ...@@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
{ {
struct mips_coproc *cop0 = vcpu->arch.cop0; struct mips_coproc *cop0 = vcpu->arch.cop0;
int cpu = smp_processor_id(); int i, cpu = smp_processor_id();
unsigned int gasid; unsigned int gasid;
/* /*
...@@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) ...@@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
vcpu); vcpu);
vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_asid[cpu] =
vcpu->arch.guest_user_mm.context.asid[cpu]; vcpu->arch.guest_user_mm.context.asid[cpu];
for_each_possible_cpu(i)
if (i != cpu)
vcpu->arch.guest_user_asid[cpu] = 0;
vcpu->arch.last_user_gasid = gasid; vcpu->arch.last_user_gasid = gasid;
} }
} }
......
...@@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) & if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
asid_version_mask(cpu)) { asid_version_mask(cpu)) {
u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
KVM_ENTRYHI_ASID;
kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_asid[cpu] =
vcpu->arch.guest_user_mm.context.asid[cpu]; vcpu->arch.guest_user_mm.context.asid[cpu];
vcpu->arch.last_user_gasid = gasid;
newasid++; newasid++;
kvm_debug("[%d]: cpu_context: %#lx\n", cpu, kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
......
...@@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns) ...@@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
if (r < 0) if (r < 0)
goto out; goto out;
diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
if (!diag224_buf || diag224(diag224_buf)) if (!diag224_buf || diag224(diag224_buf))
goto out; goto out;
...@@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns) ...@@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
sctns->par.infpval1 |= PAR_WGHT_VLD; sctns->par.infpval1 |= PAR_WGHT_VLD;
out: out:
kfree(diag224_buf); free_page((unsigned long)diag224_buf);
vfree(diag204_buf); vfree(diag204_buf);
} }
......
...@@ -948,7 +948,6 @@ struct kvm_x86_ops { ...@@ -948,7 +948,6 @@ struct kvm_x86_ops {
int (*get_lpage_level)(void); int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void); bool (*rdtscp_supported)(void);
bool (*invpcid_supported)(void); bool (*invpcid_supported)(void);
void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
...@@ -958,8 +957,6 @@ struct kvm_x86_ops { ...@@ -958,8 +957,6 @@ struct kvm_x86_ops {
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
int (*check_intercept)(struct kvm_vcpu *vcpu, int (*check_intercept)(struct kvm_vcpu *vcpu,
......
...@@ -5045,7 +5045,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ...@@ -5045,7 +5045,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
/* Decode and fetch the destination operand: register or memory. */ /* Decode and fetch the destination operand: register or memory. */
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
if (ctxt->rip_relative) if (ctxt->rip_relative && likely(ctxt->memopp))
ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea = address_mask(ctxt,
ctxt->memopp->addr.mem.ea + ctxt->_eip); ctxt->memopp->addr.mem.ea + ctxt->_eip);
......
...@@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS); mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
} }
static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
else
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
svm->vmcb->control.tsc_offset - adjustment,
svm->vmcb->control.tsc_offset);
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
static void avic_init_vmcb(struct vcpu_svm *svm) static void avic_init_vmcb(struct vcpu_svm *svm)
{ {
struct vmcb *vmcb = svm->vmcb; struct vmcb *vmcb = svm->vmcb;
...@@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm) ...@@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0; return 0;
} }
static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset + host_tsc;
}
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { ...@@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit, .has_wbinvd_exit = svm_has_wbinvd_exit,
.write_tsc_offset = svm_write_tsc_offset, .write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
.read_l1_tsc = svm_read_l1_tsc,
.set_tdp_cr3 = set_tdp_cr3, .set_tdp_cr3 = set_tdp_cr3,
......
...@@ -187,6 +187,7 @@ struct vmcs { ...@@ -187,6 +187,7 @@ struct vmcs {
*/ */
struct loaded_vmcs { struct loaded_vmcs {
struct vmcs *vmcs; struct vmcs *vmcs;
struct vmcs *shadow_vmcs;
int cpu; int cpu;
int launched; int launched;
struct list_head loaded_vmcss_on_cpu_link; struct list_head loaded_vmcss_on_cpu_link;
...@@ -411,7 +412,6 @@ struct nested_vmx { ...@@ -411,7 +412,6 @@ struct nested_vmx {
* memory during VMXOFF, VMCLEAR, VMPTRLD. * memory during VMXOFF, VMCLEAR, VMPTRLD.
*/ */
struct vmcs12 *cached_vmcs12; struct vmcs12 *cached_vmcs12;
struct vmcs *current_shadow_vmcs;
/* /*
* Indicates if the shadow vmcs must be updated with the * Indicates if the shadow vmcs must be updated with the
* data hold by vmcs12 * data hold by vmcs12
...@@ -421,7 +421,6 @@ struct nested_vmx { ...@@ -421,7 +421,6 @@ struct nested_vmx {
/* vmcs02_list cache of VMCSs recently used to run L2 guests */ /* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct list_head vmcs02_pool; struct list_head vmcs02_pool;
int vmcs02_num; int vmcs02_num;
u64 vmcs01_tsc_offset;
bool change_vmcs01_virtual_x2apic_mode; bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */ /* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending; bool nested_run_pending;
...@@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs) ...@@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs)
static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
{ {
vmcs_clear(loaded_vmcs->vmcs); vmcs_clear(loaded_vmcs->vmcs);
if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
vmcs_clear(loaded_vmcs->shadow_vmcs);
loaded_vmcs->cpu = -1; loaded_vmcs->cpu = -1;
loaded_vmcs->launched = 0; loaded_vmcs->launched = 0;
} }
...@@ -2604,20 +2605,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu) ...@@ -2604,20 +2605,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
} }
/*
* Like guest_read_tsc, but always returns L1's notion of the timestamp
* counter, even if a nested guest (L2) is currently running.
*/
static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
u64 tsc_offset;
tsc_offset = is_guest_mode(vcpu) ?
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
vmcs_read64(TSC_OFFSET);
return host_tsc + tsc_offset;
}
/* /*
* writes 'offset' into guest's timestamp counter offset register * writes 'offset' into guest's timestamp counter offset register
*/ */
...@@ -2631,7 +2618,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -2631,7 +2618,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* to the newly set TSC to get L2's TSC. * to the newly set TSC to get L2's TSC.
*/ */
struct vmcs12 *vmcs12; struct vmcs12 *vmcs12;
to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
/* recalculate vmcs02.TSC_OFFSET: */ /* recalculate vmcs02.TSC_OFFSET: */
vmcs12 = get_vmcs12(vcpu); vmcs12 = get_vmcs12(vcpu);
vmcs_write64(TSC_OFFSET, offset + vmcs_write64(TSC_OFFSET, offset +
...@@ -2644,19 +2630,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -2644,19 +2630,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
} }
} }
static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{
u64 offset = vmcs_read64(TSC_OFFSET);
vmcs_write64(TSC_OFFSET, offset + adjustment);
if (is_guest_mode(vcpu)) {
/* Even when running L2, the adjustment needs to apply to L1 */
to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
} else
trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
offset + adjustment);
}
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
{ {
struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
...@@ -3562,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) ...@@ -3562,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
loaded_vmcs_clear(loaded_vmcs); loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs); free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL; loaded_vmcs->vmcs = NULL;
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
} }
static void free_kvm_area(void) static void free_kvm_area(void)
...@@ -6696,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) ...@@ -6696,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
if (!item) if (!item)
return NULL; return NULL;
item->vmcs02.vmcs = alloc_vmcs(); item->vmcs02.vmcs = alloc_vmcs();
item->vmcs02.shadow_vmcs = NULL;
if (!item->vmcs02.vmcs) { if (!item->vmcs02.vmcs) {
kfree(item); kfree(item);
return NULL; return NULL;
...@@ -7072,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -7072,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
shadow_vmcs->revision_id |= (1u << 31); shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */ /* init shadow vmcs */
vmcs_clear(shadow_vmcs); vmcs_clear(shadow_vmcs);
vmx->nested.current_shadow_vmcs = shadow_vmcs; vmx->vmcs01.shadow_vmcs = shadow_vmcs;
} }
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
...@@ -7174,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7174,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx)
free_page((unsigned long)vmx->nested.msr_bitmap); free_page((unsigned long)vmx->nested.msr_bitmap);
vmx->nested.msr_bitmap = NULL; vmx->nested.msr_bitmap = NULL;
} }
if (enable_shadow_vmcs) if (enable_shadow_vmcs) {
free_vmcs(vmx->nested.current_shadow_vmcs); vmcs_clear(vmx->vmcs01.shadow_vmcs);
free_vmcs(vmx->vmcs01.shadow_vmcs);
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12); kfree(vmx->nested.cached_vmcs12);
/* Unpin physical memory we referred to in current vmcs02 */ /* Unpin physical memory we referred to in current vmcs02 */
if (vmx->nested.apic_access_page) { if (vmx->nested.apic_access_page) {
...@@ -7352,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) ...@@ -7352,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
int i; int i;
unsigned long field; unsigned long field;
u64 field_value; u64 field_value;
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
const unsigned long *fields = shadow_read_write_fields; const unsigned long *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields;
...@@ -7401,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) ...@@ -7401,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
int i, q; int i, q;
unsigned long field; unsigned long field;
u64 field_value = 0; u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
vmcs_load(shadow_vmcs); vmcs_load(shadow_vmcs);
...@@ -7591,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) ...@@ -7591,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_SHADOW_VMCS); SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, vmcs_write64(VMCS_LINK_POINTER,
__pa(vmx->nested.current_shadow_vmcs)); __pa(vmx->vmcs01.shadow_vmcs));
vmx->nested.sync_shadow_vmcs = true; vmx->nested.sync_shadow_vmcs = true;
} }
} }
...@@ -7659,7 +7637,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) ...@@ -7659,7 +7637,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
if (!(types & (1UL << type))) { if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu, nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
...@@ -7722,7 +7700,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) ...@@ -7722,7 +7700,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
if (!(types & (1UL << type))) { if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu, nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
...@@ -9156,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -9156,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs = &vmx->vmcs01;
vmx->loaded_vmcs->vmcs = alloc_vmcs(); vmx->loaded_vmcs->vmcs = alloc_vmcs();
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs) if (!vmx->loaded_vmcs->vmcs)
goto free_msrs; goto free_msrs;
if (!vmm_exclusive) if (!vmm_exclusive)
...@@ -10061,9 +10040,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ...@@ -10061,9 +10040,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET, vmcs_write64(TSC_OFFSET,
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); vcpu->arch.tsc_offset + vmcs12->tsc_offset);
else else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (kvm_has_tsc_control) if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx); decache_tsc_multiplier(vmx);
...@@ -10293,8 +10272,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ...@@ -10293,8 +10272,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enter_guest_mode(vcpu); enter_guest_mode(vcpu);
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
...@@ -10818,7 +10795,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, ...@@ -10818,7 +10795,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12); load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */ /* Update any VMCS fields that might have changed while L2 ran */
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1) if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER); PIN_BASED_VMX_PREEMPTION_TIMER);
...@@ -11339,8 +11316,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { ...@@ -11339,8 +11316,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.write_tsc_offset = vmx_write_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
.read_l1_tsc = vmx_read_l1_tsc,
.set_tdp_cr3 = vmx_set_cr3, .set_tdp_cr3 = vmx_set_cr3,
......
...@@ -1409,7 +1409,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) ...@@ -1409,7 +1409,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{ {
return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
} }
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
...@@ -1547,7 +1547,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); ...@@ -1547,7 +1547,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment) s64 adjustment)
{ {
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
} }
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
...@@ -1555,7 +1555,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) ...@@ -1555,7 +1555,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
WARN_ON(adjustment < 0); WARN_ON(adjustment < 0);
adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); adjust_tsc_offset_guest(vcpu, adjustment);
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr /* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail. * counterpart for further detail.
*/ */
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
break; break;
case MSR_AMD64_OSVW_ID_LENGTH: case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu)) if (!guest_cpuid_has_osvw(vcpu))
...@@ -2280,11 +2280,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2280,11 +2280,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr)) if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info); return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) { if (!ignore_msrs) {
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data); msr, data);
return 1; return 1;
} else { } else {
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
msr, data); msr, data);
break; break;
} }
...@@ -7410,10 +7410,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -7410,10 +7410,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{ {
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
kvmclock_reset(vcpu); kvmclock_reset(vcpu);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
kvm_x86_ops->vcpu_free(vcpu); kvm_x86_ops->vcpu_free(vcpu);
free_cpumask_var(wbinvd_dirty_mask);
} }
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#ifdef CONFIG_HAVE_KVM_IRQFD #ifdef CONFIG_HAVE_KVM_IRQFD
static struct workqueue_struct *irqfd_cleanup_wq;
static void static void
irqfd_inject(struct work_struct *work) irqfd_inject(struct work_struct *work)
...@@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) ...@@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
list_del_init(&irqfd->list); list_del_init(&irqfd->list);
schedule_work(&irqfd->shutdown); queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
} }
int __attribute__((weak)) kvm_arch_set_irq_inatomic( int __attribute__((weak)) kvm_arch_set_irq_inatomic(
...@@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) ...@@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
* so that we guarantee there will not be any more interrupts on this * so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns. * gsi once this deassign function returns.
*/ */
flush_work(&irqfd->shutdown); flush_workqueue(irqfd_cleanup_wq);
return 0; return 0;
} }
...@@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm) ...@@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm)
* Block until we know all outstanding shutdown jobs have completed * Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference. * since we do not take a kvm* reference.
*/ */
flush_work(&irqfd->shutdown); flush_workqueue(irqfd_cleanup_wq);
} }
...@@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm) ...@@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_unlock_irq(&kvm->irqfds.lock); spin_unlock_irq(&kvm->irqfds.lock);
} }
/*
* create a host-wide workqueue for issuing deferred shutdown requests
* aggregated from all vm* instances. We need our own isolated
* queue to ease flushing work items when a VM exits.
*/
int kvm_irqfd_init(void)
{
irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
if (!irqfd_cleanup_wq)
return -ENOMEM;
return 0;
}
void kvm_irqfd_exit(void) void kvm_irqfd_exit(void)
{ {
destroy_workqueue(irqfd_cleanup_wq);
} }
#endif #endif
......
...@@ -3844,7 +3844,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, ...@@ -3844,7 +3844,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
* kvm_arch_init makes sure there's at most one caller * kvm_arch_init makes sure there's at most one caller
* for architectures that support multiple implementations, * for architectures that support multiple implementations,
* like intel and amd on x86. * like intel and amd on x86.
* kvm_arch_init must be called before kvm_irqfd_init to avoid creating
* conflicts in case kvm is already setup for another implementation.
*/ */
r = kvm_irqfd_init();
if (r)
goto out_irqfd;
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM; r = -ENOMEM;
...@@ -3926,6 +3931,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, ...@@ -3926,6 +3931,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
free_cpumask_var(cpus_hardware_enabled); free_cpumask_var(cpus_hardware_enabled);
out_free_0: out_free_0:
kvm_irqfd_exit(); kvm_irqfd_exit();
out_irqfd:
kvm_arch_exit(); kvm_arch_exit();
out_fail: out_fail:
return r; return r;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment