Commit 1a715810 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

Revert "KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest()"

Revert back to refreshing vmcs.HOST_CR3 immediately prior to VM-Enter.
The PCID (ASID) part of CR3 can be bumped without KVM being scheduled
out, as the kernel will switch CR3 during __text_poke(), e.g. in response
to a static key toggling.  If switch_mm_irqs_off() chooses a new ASID for
the mm associate with KVM, KVM will do VM-Enter => VM-Exit with a stale
vmcs.HOST_CR3.

Add a comment to explain why KVM must wait until VM-Enter is imminent to
refresh vmcs.HOST_CR3.

The following splat was captured by stashing vmcs.HOST_CR3 in kvm_vcpu
and adding a WARN in load_new_mm_cr3() to fire if a new ASID is being
loaded for the KVM-associated mm while KVM has a "running" vCPU:

  static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
  {
	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();

	...

	WARN(vcpu && (vcpu->cr3 & GENMASK(11, 0)) != (new_mm_cr3 & GENMASK(11, 0)) &&
	     (vcpu->cr3 & PHYSICAL_PAGE_MASK) == (new_mm_cr3 & PHYSICAL_PAGE_MASK),
	     "KVM is hosed, loading CR3 = %lx, vmcs.HOST_CR3 = %lx", new_mm_cr3, vcpu->cr3);
  }

  ------------[ cut here ]------------
  KVM is hosed, loading CR3 = 8000000105393004, vmcs.HOST_CR3 = 105393003
  WARNING: CPU: 4 PID: 20717 at arch/x86/mm/tlb.c:291 load_new_mm_cr3+0x82/0xe0
  Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel
  CPU: 4 PID: 20717 Comm: stable Tainted: G        W         5.17.0-rc3+ #747
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:load_new_mm_cr3+0x82/0xe0
  RSP: 0018:ffffc9000489fa98 EFLAGS: 00010082
  RAX: 0000000000000000 RBX: 8000000105393004 RCX: 0000000000000027
  RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: ffff888277d1b788
  RBP: 0000000000000004 R08: ffff888277d1b780 R09: ffffc9000489f8b8
  R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000
  R13: ffff88810678a800 R14: 0000000000000004 R15: 0000000000000c33
  FS:  00007fa9f0e72700(0000) GS:ffff888277d00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 00000001001b5003 CR4: 0000000000172ea0
  Call Trace:
   <TASK>
   switch_mm_irqs_off+0x1cb/0x460
   __text_poke+0x308/0x3e0
   text_poke_bp_batch+0x168/0x220
   text_poke_finish+0x1b/0x30
   arch_jump_label_transform_apply+0x18/0x30
   static_key_slow_inc_cpuslocked+0x7c/0x90
   static_key_slow_inc+0x16/0x20
   kvm_lapic_set_base+0x116/0x190
   kvm_set_apic_base+0xa5/0xe0
   kvm_set_msr_common+0x2f4/0xf60
   vmx_set_msr+0x355/0xe70 [kvm_intel]
   kvm_set_msr_ignored_check+0x91/0x230
   kvm_emulate_wrmsr+0x36/0x120
   vmx_handle_exit+0x609/0x6c0 [kvm_intel]
   kvm_arch_vcpu_ioctl_run+0x146f/0x1b80
   kvm_vcpu_ioctl+0x279/0x690
   __x64_sys_ioctl+0x83/0xb0
   do_syscall_64+0x3b/0xc0
   entry_SYSCALL_64_after_hwframe+0x44/0xae
   </TASK>
  ---[ end trace 0000000000000000 ]---

This reverts commit 15ad9762.

Fixes: 15ad9762 ("KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest()")
Reported-by: default avatarWanpeng Li <kernellwp@gmail.com>
Cc: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Acked-by: default avatarLai Jiangshan <jiangshanlai@gmail.com>
Message-Id: <20220224191917.3508476-3-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent bca06b85
...@@ -3055,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, ...@@ -3055,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr4; unsigned long cr3, cr4;
bool vm_fail; bool vm_fail;
if (!nested_early_check) if (!nested_early_check)
...@@ -3078,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) ...@@ -3078,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/ */
vmcs_writel(GUEST_RFLAGS, 0); vmcs_writel(GUEST_RFLAGS, 0);
cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
vmcs_writel(HOST_CR3, cr3);
vmx->loaded_vmcs->host_state.cr3 = cr3;
}
cr4 = cr4_read_shadow(); cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4); vmcs_writel(HOST_CR4, cr4);
......
...@@ -1114,7 +1114,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) ...@@ -1114,7 +1114,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
int cpu = raw_smp_processor_id(); int cpu = raw_smp_processor_id();
#endif #endif
unsigned long cr3;
unsigned long fs_base, gs_base; unsigned long fs_base, gs_base;
u16 fs_sel, gs_sel; u16 fs_sel, gs_sel;
int i; int i;
...@@ -1179,14 +1178,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) ...@@ -1179,14 +1178,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
#endif #endif
vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
/* Host CR3 including its PCID is stable when guest state is loaded. */
cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != host_state->cr3)) {
vmcs_writel(HOST_CR3, cr3);
host_state->cr3 = cr3;
}
vmx->guest_state_loaded = true; vmx->guest_state_loaded = true;
} }
...@@ -6793,7 +6784,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, ...@@ -6793,7 +6784,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr4; unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */ /* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi && if (unlikely(!enable_vnmi &&
...@@ -6836,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6836,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0; vcpu->arch.regs_dirty = 0;
/*
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
* it switches back to the current->mm, which can occur in KVM context
* when switching to a temporary mm to patch kernel code, e.g. if KVM
* toggles a static key while handling a VM-Exit.
*/
cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
vmcs_writel(HOST_CR3, cr3);
vmx->loaded_vmcs->host_state.cr3 = cr3;
}
cr4 = cr4_read_shadow(); cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4); vmcs_writel(HOST_CR4, cr4);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment