Commit b645af2d authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Linus Torvalds

x86_64, traps: Rework bad_iret

It's possible for iretq to userspace to fail.  This can happen because
of a bad CS, SS, or RIP.

Historically, we've handled it by fixing up an exception from iretq to
land at bad_iret, which pretends that the failed iret frame was really
the hardware part of #GP(0) from userspace.  To make this work, there's
an extra fixup to fudge the gs base into a usable state.

This is suboptimal because it loses the original exception.  It's also
buggy because there's no guarantee that we were on the kernel stack to
begin with.  For example, if the failing iret happened on return from an
NMI, then we'll end up executing general_protection on the NMI stack.
This is bad for several reasons, the most immediate of which is that
general_protection, as a non-paranoid idtentry, will try to deliver
signals and/or schedule from the wrong stack.

This patch throws out bad_iret entirely.  As a replacement, it augments
the existing swapgs fudge into a full-blown iret fixup, mostly written
in C.  It's should be clearer and more correct.
Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6f442be2
...@@ -830,8 +830,13 @@ ENTRY(native_iret) ...@@ -830,8 +830,13 @@ ENTRY(native_iret)
.global native_irq_return_iret .global native_irq_return_iret
native_irq_return_iret: native_irq_return_iret:
/*
* This may fault. Non-paranoid faults on return to userspace are
* handled by fixup_bad_iret. These include #SS, #GP, and #NP.
* Double-faults due to espfix64 are handled in do_double_fault.
* Other faults here are fatal.
*/
iretq iretq
_ASM_EXTABLE(native_irq_return_iret, bad_iret)
#ifdef CONFIG_X86_ESPFIX64 #ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt: native_irq_return_ldt:
...@@ -859,25 +864,6 @@ native_irq_return_ldt: ...@@ -859,25 +864,6 @@ native_irq_return_ldt:
jmp native_irq_return_iret jmp native_irq_return_iret
#endif #endif
.section .fixup,"ax"
bad_iret:
/*
* The iret traps when the %cs or %ss being restored is bogus.
* We've lost the original trap vector and error code.
* #GPF is the most likely one to get for an invalid selector.
* So pretend we completed the iret and took the #GPF in user mode.
*
* We are now running with the kernel GS after exception recovery.
* But error_entry expects us to have user GS to match the user %cs,
* so swap back.
*/
pushq $0
SWAPGS
jmp general_protection
.previous
/* edi: workmask, edx: work */ /* edi: workmask, edx: work */
retint_careful: retint_careful:
CFI_RESTORE_STATE CFI_RESTORE_STATE
...@@ -1369,17 +1355,16 @@ error_sti: ...@@ -1369,17 +1355,16 @@ error_sti:
/* /*
* There are two places in the kernel that can potentially fault with * There are two places in the kernel that can potentially fault with
* usergs. Handle them here. The exception handlers after iret run with * usergs. Handle them here. B stepping K8s sometimes report a
* kernel gs again, so don't set the user space flag. B stepping K8s * truncated RIP for IRET exceptions returning to compat mode. Check
* sometimes report an truncated RIP for IRET exceptions returning to * for these here too.
* compat mode. Check for these here too.
*/ */
error_kernelspace: error_kernelspace:
CFI_REL_OFFSET rcx, RCX+8 CFI_REL_OFFSET rcx, RCX+8
incl %ebx incl %ebx
leaq native_irq_return_iret(%rip),%rcx leaq native_irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp) cmpq %rcx,RIP+8(%rsp)
je error_swapgs je error_bad_iret
movl %ecx,%eax /* zero extend */ movl %ecx,%eax /* zero extend */
cmpq %rax,RIP+8(%rsp) cmpq %rax,RIP+8(%rsp)
je bstep_iret je bstep_iret
...@@ -1390,7 +1375,15 @@ error_kernelspace: ...@@ -1390,7 +1375,15 @@ error_kernelspace:
bstep_iret: bstep_iret:
/* Fix truncated RIP */ /* Fix truncated RIP */
movq %rcx,RIP+8(%rsp) movq %rcx,RIP+8(%rsp)
jmp error_swapgs /* fall through */
error_bad_iret:
SWAPGS
mov %rsp,%rdi
call fixup_bad_iret
mov %rax,%rsp
decl %ebx /* Return to usergs */
jmp error_sti
CFI_ENDPROC CFI_ENDPROC
END(error_entry) END(error_entry)
......
...@@ -407,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) ...@@ -407,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
return regs; return regs;
} }
NOKPROBE_SYMBOL(sync_regs); NOKPROBE_SYMBOL(sync_regs);
struct bad_iret_stack {
void *error_entry_ret;
struct pt_regs regs;
};
asmlinkage __visible
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
{
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
* correctly, we want move our stack frame to task_pt_regs
* and we want to pretend that the exception came from the
* iret target.
*/
struct bad_iret_stack *new_stack =
container_of(task_pt_regs(current),
struct bad_iret_stack, regs);
/* Copy the IRET target to the new stack. */
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
/* Copy the remainder of the stack from the current stack. */
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
BUG_ON(!user_mode_vm(&new_stack->regs));
return new_stack;
}
#endif #endif
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment