Commit b02fcf9b authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Ingo Molnar

x86/unwinder: Handle stack overflows more gracefully

There are at least two unwinder bugs hindering the debugging of
stack-overflow crashes:

- It doesn't deal gracefully with the case where the stack overflows and
  the stack pointer itself isn't on a valid stack but the
  to-be-dereferenced data *is*.

- The ORC oops dump code doesn't know how to print partial pt_regs, for the
  case where if we get an interrupt/exception in *early* entry code
  before the full pt_regs have been saved.

Fix both issues.

http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@trebleSigned-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarBorislav Petkov <bpetkov@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.deSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d3a09104
...@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); ...@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long); extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs); extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, int all); extern void __show_regs(struct pt_regs *regs, int all);
extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void); extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr); extern void oops_end(unsigned long, struct pt_regs *, int signr);
......
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
struct unwind_state { struct unwind_state {
struct stack_info stack_info; struct stack_info stack_info;
unsigned long stack_mask; unsigned long stack_mask;
...@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, ...@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
} }
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
* only the iret frame registers are accessible. Use with caution!
*/
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{ {
if (unwind_done(state)) if (unwind_done(state))
......
...@@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable, ...@@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
} }
void show_iret_regs(struct pt_regs *regs)
{
printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
regs->sp, regs->flags);
}
static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
{
if (on_stack(info, regs, sizeof(*regs)))
__show_regs(regs, 0);
else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
IRET_FRAME_SIZE)) {
/*
* When an interrupt or exception occurs in entry code, the
* full pt_regs might not have been saved yet. In that case
* just print the iret frame.
*/
show_iret_regs(regs);
}
}
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl) unsigned long *stack, char *log_lvl)
{ {
...@@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack_name) if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name); printk("%s <%s>\n", log_lvl, stack_name);
if (regs && on_stack(&stack_info, regs, sizeof(*regs))) if (regs)
__show_regs(regs, 0); show_regs_safe(&stack_info, regs);
/* /*
* Scan the stack, printing any text addresses we find. At the * Scan the stack, printing any text addresses we find. At the
...@@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/* /*
* Don't print regs->ip again if it was already printed * Don't print regs->ip again if it was already printed
* by __show_regs() below. * by show_regs_safe() below.
*/ */
if (regs && stack == &regs->ip) if (regs && stack == &regs->ip)
goto next; goto next;
...@@ -155,8 +177,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -155,8 +177,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/* if the frame has entry regs, print them */ /* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state); regs = unwind_get_entry_regs(&state);
if (regs && on_stack(&stack_info, regs, sizeof(*regs))) if (regs)
__show_regs(regs, 0); show_regs_safe(&stack_info, regs);
} }
if (stack_name) if (stack_name)
......
...@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex; unsigned int fsindex, gsindex;
unsigned int ds, cs, es; unsigned int ds, cs, es;
printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); show_iret_regs(regs);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
regs->sp, regs->flags);
if (regs->orig_ax != -1) if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else else
...@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15); regs->r13, regs->r14, regs->r15);
if (!all)
return;
asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es)); asm("movl %%es,%0" : "=r" (es));
...@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
if (!all)
return;
cr0 = read_cr0(); cr0 = read_cr0();
cr2 = read_cr2(); cr2 = read_cr2();
cr3 = __read_cr3(); cr3 = __read_cr3();
......
...@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) ...@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL; return NULL;
} }
static bool stack_access_ok(struct unwind_state *state, unsigned long addr, static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
size_t len) size_t len)
{ {
struct stack_info *info = &state->stack_info; struct stack_info *info = &state->stack_info;
void *addr = (void *)_addr;
/* if (!on_stack(info, addr, len) &&
* If the address isn't on the current stack, switch to the next one. (get_stack_info(addr, state->task, info, &state->stack_mask)))
* return false;
* We may have to traverse multiple stacks to deal with the possibility
* that info->next_sp could point to an empty stack and the address
* could be on a subsequent stack.
*/
while (!on_stack(info, (void *)addr, len))
if (get_stack_info(info->next_sp, state->task, info,
&state->stack_mask))
return false;
return true; return true;
} }
...@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, ...@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
return true; return true;
} }
#define REGS_SIZE (sizeof(struct pt_regs))
#define SP_OFFSET (offsetof(struct pt_regs, sp))
#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
unsigned long *ip, unsigned long *sp, bool full) unsigned long *ip, unsigned long *sp)
{ {
size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; struct pt_regs *regs = (struct pt_regs *)addr;
size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
if (IS_ENABLED(CONFIG_X86_64)) {
if (!stack_access_ok(state, addr, regs_size))
return false;
*ip = regs->ip;
*sp = regs->sp;
return true; /* x86-32 support will be more complicated due to the &regs->sp hack */
} BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
if (!stack_access_ok(state, addr, sp_offset)) if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false; return false;
*ip = regs->ip; *ip = regs->ip;
*sp = regs->sp;
return true;
}
if (user_mode(regs)) { static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
if (!stack_access_ok(state, addr + sp_offset, unsigned long *ip, unsigned long *sp)
REGS_SIZE - SP_OFFSET)) {
return false; struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
*sp = regs->sp; if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
} else return false;
*sp = (unsigned long)&regs->sp;
*ip = regs->ip;
*sp = regs->sp;
return true; return true;
} }
...@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type; enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc; struct orc_entry *orc;
struct pt_regs *ptregs;
bool indirect = false; bool indirect = false;
if (unwind_done(state)) if (unwind_done(state))
...@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
break; break;
case ORC_TYPE_REGS: case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference registers at %p for ip %pB\n", orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip); (void *)sp, (void *)orig_ip);
goto done; goto done;
...@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
break; break;
case ORC_TYPE_REGS_IRET: case ORC_TYPE_REGS_IRET:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference iret registers at %p for ip %pB\n", orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip); (void *)sp, (void *)orig_ip);
goto done; goto done;
} }
ptregs = container_of((void *)sp, struct pt_regs, ip); state->regs = (void *)sp - IRET_FRAME_OFFSET;
if ((unsigned long)ptregs >= prev_sp && state->full_regs = false;
on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
state->regs = ptregs;
state->full_regs = false;
} else
state->regs = NULL;
state->signal = true; state->signal = true;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment