Commit fc02735b authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Borislav Petkov

KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS

On eIBRS systems, the returns in the vmexit return path from
__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks.

Fix that by moving the post-vmexit spec_ctrl handling to immediately
after the vmexit.
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent bb066506
...@@ -274,6 +274,7 @@ static inline void indirect_branch_prediction_barrier(void) ...@@ -274,6 +274,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */ /* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base; extern u64 x86_spec_ctrl_base;
extern u64 x86_spec_ctrl_current;
extern void write_spec_ctrl_current(u64 val, bool force); extern void write_spec_ctrl_current(u64 val, bool force);
extern u64 spec_ctrl_current(void); extern u64 spec_ctrl_current(void);
......
...@@ -195,6 +195,10 @@ void __init check_bugs(void) ...@@ -195,6 +195,10 @@ void __init check_bugs(void)
#endif #endif
} }
/*
* NOTE: For VMX, this function is not called in the vmexit path.
* It uses vmx_spec_ctrl_restore_host() instead.
*/
void void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{ {
......
...@@ -3,5 +3,6 @@ ...@@ -3,5 +3,6 @@
#define __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H
#define VMX_RUN_VMRESUME (1 << 0) #define VMX_RUN_VMRESUME (1 << 0)
#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
...@@ -33,9 +33,10 @@ ...@@ -33,9 +33,10 @@
/** /**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
* @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) * @vmx: struct vcpu_vmx *
* @regs: unsigned long * (to guest registers) * @regs: unsigned long * (to guest registers)
* @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
* VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
* *
* Returns: * Returns:
* 0 on VM-Exit, 1 on VM-Fail * 0 on VM-Exit, 1 on VM-Fail
...@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) ...@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
#endif #endif
push %_ASM_BX push %_ASM_BX
/* Save @vmx for SPEC_CTRL handling */
push %_ASM_ARG1
/* Save @flags for SPEC_CTRL handling */
push %_ASM_ARG3
/* /*
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
* @regs is needed after VM-Exit to save the guest's register values. * @regs is needed after VM-Exit to save the guest's register values.
...@@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ...@@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
mov %r15, VCPU_R15(%_ASM_AX) mov %r15, VCPU_R15(%_ASM_AX)
#endif #endif
/* IMPORTANT: RSB must be stuffed before the first return. */ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE xor %ebx, %ebx
/* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
xor %eax, %eax
.Lclear_regs: .Lclear_regs:
/* /*
* Clear all general purpose registers except RSP and RAX to prevent * Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded * speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers * via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values. * could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as RSP is restored by hardware during * free. RSP and RAX are exempt as RSP is restored by hardware during
* VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
* value.
*/ */
xor %eax, %eax
xor %ecx, %ecx xor %ecx, %ecx
xor %edx, %edx xor %edx, %edx
xor %ebx, %ebx
xor %ebp, %ebp xor %ebp, %ebp
xor %esi, %esi xor %esi, %esi
xor %edi, %edi xor %edi, %edi
...@@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ...@@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
/* "POP" @regs. */ /* "POP" @regs. */
add $WORD_SIZE, %_ASM_SP add $WORD_SIZE, %_ASM_SP
/*
* IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
* the first unbalanced RET after vmexit!
*
* For retpoline, RSB filling is needed to prevent poisoned RSB entries
* and (in some cases) RSB underflow.
*
* eIBRS has its own protection against poisoned RSB, so it doesn't
* need the RSB filling sequence. But it does need to be enabled
* before the first unbalanced RET.
*/
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
pop %_ASM_ARG2 /* @flags */
pop %_ASM_ARG1 /* @vmx */
call vmx_spec_ctrl_restore_host
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
pop %_ASM_BX pop %_ASM_BX
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
pop %r12 pop %r12
...@@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ...@@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
ud2 ud2
.Lvmfail: .Lvmfail:
/* VM-Fail: set return value to 1 */ /* VM-Fail: set return value to 1 */
mov $1, %eax mov $1, %_ASM_BX
jmp .Lclear_regs jmp .Lclear_regs
SYM_FUNC_END(__vmx_vcpu_run) SYM_FUNC_END(__vmx_vcpu_run)
......
...@@ -846,6 +846,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) ...@@ -846,6 +846,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (vmx->loaded_vmcs->launched) if (vmx->loaded_vmcs->launched)
flags |= VMX_RUN_VMRESUME; flags |= VMX_RUN_VMRESUME;
/*
* If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
* to change it directly without causing a vmexit. In that case read
* it after vmexit and store it in vmx->spec_ctrl.
*/
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
flags |= VMX_RUN_SAVE_SPEC_CTRL;
return flags; return flags;
} }
...@@ -6823,6 +6831,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) ...@@ -6823,6 +6831,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
} }
} }
void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
unsigned int flags)
{
u64 hostval = this_cpu_read(x86_spec_ctrl_current);
if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
return;
if (flags & VMX_RUN_SAVE_SPEC_CTRL)
vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
/*
* If the guest/host SPEC_CTRL values differ, restore the host value.
*/
if (vmx->spec_ctrl != hostval)
native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
barrier_nospec();
}
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{ {
switch (to_vmx(vcpu)->exit_reason.basic) { switch (to_vmx(vcpu)->exit_reason.basic) {
...@@ -6966,26 +6994,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6966,26 +6994,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* The actual VMENTER/EXIT is in the .noinstr.text section. */ /* The actual VMENTER/EXIT is in the .noinstr.text section. */
vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
* turn it off. This is much more efficient than blindly adding
* it to the atomic save/restore list. Especially as the former
* (Saving guest MSRs on vmexit) doesn't even exist in KVM.
*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
* save it.
*
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* All fields are clean at this point */ /* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs)) { if (static_branch_unlikely(&enable_evmcs)) {
current_evmcs->hv_clean_fields |= current_evmcs->hv_clean_fields |=
......
...@@ -405,6 +405,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); ...@@ -405,6 +405,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
unsigned int flags); unsigned int flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment