Commit c8401dda authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Radim Krčmář

KVM: x86: fix singlestepping over syscall

TF is handled a bit differently for syscall and sysret, compared
to the other instructions: TF is checked after the instruction completes,
so that the OS can disable #DB at a syscall by adding TF to FMASK.
When the sysret is executed the #DB is taken "as if" the syscall insn
just completed.

KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
Fix the behavior, otherwise you could get #DB on a user stack which is not
nice.  This does not affect Linux guests, as they use an IST or task gate
for #DB.

This fixes CVE-2017-7518.

Cc: stable@vger.kernel.org
Reported-by: default avatarAndy Lutomirski <luto@kernel.org>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
parent d6aa07c1
...@@ -296,6 +296,7 @@ struct x86_emulate_ctxt { ...@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */ bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */ bool ud; /* inject an #UD if host doesn't support insn */
bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception; bool have_exception;
struct x86_exception exception; struct x86_exception exception;
......
...@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ...@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
} }
ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
......
...@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ...@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu); ctxt->eflags = kvm_get_rflags(vcpu);
ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
ctxt->eip = kvm_rip_read(vcpu); ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
...@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, ...@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6; return dr6;
} }
static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{ {
struct kvm_run *kvm_run = vcpu->run; struct kvm_run *kvm_run = vcpu->run;
/* if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
* rflags is the old, "raw" value of the flags. The new value has kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
* not been saved yet. kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
* kvm_run->debug.arch.exception = DB_VECTOR;
* This is correct even for TF set by the guest, because "the kvm_run->exit_reason = KVM_EXIT_DEBUG;
* processor will not generate this exception after the instruction *r = EMULATE_USER_EXIT;
* that sets the TF flag". } else {
*/ /*
if (unlikely(rflags & X86_EFLAGS_TF)) { * "Certain debug exceptions may clear bit 0-3. The
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { * remaining contents of the DR6 register are never
kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | * cleared by the processor".
DR6_RTM; */
kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; vcpu->arch.dr6 &= ~15;
kvm_run->debug.arch.exception = DB_VECTOR; vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_queue_exception(vcpu, DB_VECTOR);
*r = EMULATE_USER_EXIT;
} else {
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
* cleared by the processor".
*/
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
}
} }
} }
...@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) ...@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
int r = EMULATE_DONE; int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu);
kvm_vcpu_check_singlestep(vcpu, rflags, &r);
/*
* rflags is the old, "raw" value of the flags. The new value has
* not been saved yet.
*
* This is correct even for TF set by the guest, because "the
* processor will not generate this exception after the instruction
* that sets the TF flag".
*/
if (unlikely(rflags & X86_EFLAGS_TF))
kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE; return r == EMULATE_DONE;
} }
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
...@@ -5726,8 +5727,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, ...@@ -5726,8 +5727,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
toggle_interruptibility(vcpu, ctxt->interruptibility); toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false; vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip); kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE) if (r == EMULATE_DONE &&
kvm_vcpu_check_singlestep(vcpu, rflags, &r); (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception || if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags); __kvm_set_rflags(vcpu, ctxt->eflags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment