Commit 81b67439 authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Ingo Molnar

x86/unwind/orc: Fix premature unwind stoppage due to IRET frames

The following execution path is possible:

  fsnotify()
    [ realign the stack and store previous SP in R10 ]
    <IRQ>
      [ only IRET regs saved ]
      common_interrupt()
        interrupt_entry()
	  <NMI>
	    [ full pt_regs saved ]
	    ...
	    [ unwind stack ]

When the unwinder goes through the NMI and the IRQ on the stack, and
then sees fsnotify(), it doesn't have access to the value of R10,
because it only has the five IRET registers.  So the unwind stops
prematurely.

However, because the interrupt_entry() code is careful not to clobber
R10 before saving the full regs, the unwinder should be able to read R10
from the previously saved full pt_regs associated with the NMI.

Handle this case properly.  When encountering an IRET regs frame
immediately after a full pt_regs frame, use the pt_regs as a backup
which can be used to get the C register values.

Also, note that a call frame resets the 'prev_regs' value, because a
function is free to clobber the registers.  For this fix to work, the
IRET and full regs frames must be adjacent, with no FUNC frames in
between.  So replace the FUNC hint in interrupt_entry() with an
IRET_REGS hint.

Fixes: ee9f8fce ("x86/unwind: Add the ORC unwinder")
Reviewed-by: default avatarMiroslav Benes <mbenes@suse.cz>
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Jones <dsj@fb.com>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lore.kernel.org/r/97a408167cc09f1cfa0de31a7b70dd88868d743f.1587808742.git.jpoimboe@redhat.com
parent a0f81bf2
...@@ -511,7 +511,7 @@ SYM_CODE_END(spurious_entries_start) ...@@ -511,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+ * +----------------------------------------------------+
*/ */
SYM_CODE_START(interrupt_entry) SYM_CODE_START(interrupt_entry)
UNWIND_HINT_FUNC UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC ASM_CLAC
cld cld
...@@ -543,9 +543,9 @@ SYM_CODE_START(interrupt_entry) ...@@ -543,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */ pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */ pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */ pushq 3*8(%rdi) /* regs->ip */
UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */ pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */ pushq 8(%rdi) /* return address */
UNWIND_HINT_FUNC
movq (%rdi), %rdi movq (%rdi), %rdi
jmp 2f jmp 2f
......
...@@ -19,7 +19,7 @@ struct unwind_state { ...@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC) #if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs; bool signal, full_regs;
unsigned long sp, bp, ip; unsigned long sp, bp, ip;
struct pt_regs *regs; struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER) #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq; bool got_irq;
unsigned long *bp, *orig_sp, ip; unsigned long *bp, *orig_sp, ip;
......
...@@ -384,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr ...@@ -384,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true; return true;
} }
/*
* If state->regs is non-NULL, and points to a full pt_regs, just get the reg
* value from state->regs.
*
* Otherwise, if state->regs just points to IRET regs, and the previous frame
* had full regs, it's safe to get the value from the previous regs. This can
* happen when early/late IRQ entry code gets interrupted by an NMI.
*/
static bool get_reg(struct unwind_state *state, unsigned int reg_off,
unsigned long *val)
{
unsigned int reg = reg_off/8;
if (!state->regs)
return false;
if (state->full_regs) {
*val = ((unsigned long *)state->regs)[reg];
return true;
}
if (state->prev_regs) {
*val = ((unsigned long *)state->prev_regs)[reg];
return true;
}
return false;
}
bool unwind_next_frame(struct unwind_state *state) bool unwind_next_frame(struct unwind_state *state)
{ {
unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type; enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc; struct orc_entry *orc;
bool indirect = false; bool indirect = false;
...@@ -448,39 +477,35 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -448,39 +477,35 @@ bool unwind_next_frame(struct unwind_state *state)
break; break;
case ORC_REG_R10: case ORC_REG_R10:
if (!state->regs || !state->full_regs) { if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
orc_warn_current("missing R10 value at %pB\n", orc_warn_current("missing R10 value at %pB\n",
(void *)state->ip); (void *)state->ip);
goto err; goto err;
} }
sp = state->regs->r10;
break; break;
case ORC_REG_R13: case ORC_REG_R13:
if (!state->regs || !state->full_regs) { if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
orc_warn_current("missing R13 value at %pB\n", orc_warn_current("missing R13 value at %pB\n",
(void *)state->ip); (void *)state->ip);
goto err; goto err;
} }
sp = state->regs->r13;
break; break;
case ORC_REG_DI: case ORC_REG_DI:
if (!state->regs || !state->full_regs) { if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
orc_warn_current("missing RDI value at %pB\n", orc_warn_current("missing RDI value at %pB\n",
(void *)state->ip); (void *)state->ip);
goto err; goto err;
} }
sp = state->regs->di;
break; break;
case ORC_REG_DX: case ORC_REG_DX:
if (!state->regs || !state->full_regs) { if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
orc_warn_current("missing DX value at %pB\n", orc_warn_current("missing DX value at %pB\n",
(void *)state->ip); (void *)state->ip);
goto err; goto err;
} }
sp = state->regs->dx;
break; break;
default: default:
...@@ -507,6 +532,7 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -507,6 +532,7 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp; state->sp = sp;
state->regs = NULL; state->regs = NULL;
state->prev_regs = NULL;
state->signal = false; state->signal = false;
break; break;
...@@ -518,6 +544,7 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -518,6 +544,7 @@ bool unwind_next_frame(struct unwind_state *state)
} }
state->regs = (struct pt_regs *)sp; state->regs = (struct pt_regs *)sp;
state->prev_regs = NULL;
state->full_regs = true; state->full_regs = true;
state->signal = true; state->signal = true;
break; break;
...@@ -529,6 +556,8 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -529,6 +556,8 @@ bool unwind_next_frame(struct unwind_state *state)
goto err; goto err;
} }
if (state->full_regs)
state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET; state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false; state->full_regs = false;
state->signal = true; state->signal = true;
...@@ -543,8 +572,8 @@ bool unwind_next_frame(struct unwind_state *state) ...@@ -543,8 +572,8 @@ bool unwind_next_frame(struct unwind_state *state)
/* Find BP: */ /* Find BP: */
switch (orc->bp_reg) { switch (orc->bp_reg) {
case ORC_REG_UNDEFINED: case ORC_REG_UNDEFINED:
if (state->regs && state->full_regs) if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
state->bp = state->regs->bp; state->bp = tmp;
break; break;
case ORC_REG_PREV_SP: case ORC_REG_PREV_SP:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment