Commit 13cb7349 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-entry-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry code updates from Thomas Gleixner:
 "More consolidation and correctness fixes for the debug exception:

   - Ensure BTF synchronization under all circumstances

   - Distangle kernel and user mode #DB further

   - Get ordering vs. the debug notifier correct to make KGDB work more
     reliably.

   - Cleanup historical gunk and make the code simpler to understand"

* tag 'x86-entry-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/debug: Change thread.debugreg6 to thread.virtual_dr6
  x86/debug: Support negative polarity DR6 bits
  x86/debug: Simplify hw_breakpoint_handler()
  x86/debug: Remove aout_dump_debugregs()
  x86/debug: Remove the historical junk
  x86/debug: Move cond_local_irq_enable() block into exc_debug_user()
  x86/debug: Move historical SYSENTER junk into exc_debug_kernel()
  x86/debug: Simplify #DB signal code
  x86/debug: Remove handle_debug(.user) argument
  x86/debug: Move kprobe_debug_handler() into exc_debug_kernel()
  x86/debug: Sync BTF earlier
parents cc734372 d53d9bc0
...@@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void) ...@@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void)
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
} }
extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void); extern void hw_breakpoint_restore(void);
static __always_inline unsigned long local_db_save(void) static __always_inline unsigned long local_db_save(void)
......
...@@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, ...@@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_int3_handler(struct pt_regs *regs); extern int kprobe_int3_handler(struct pt_regs *regs);
extern int kprobe_debug_handler(struct pt_regs *regs); extern int kprobe_debug_handler(struct pt_regs *regs);
#else
static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
#endif /* CONFIG_KPROBES */ #endif /* CONFIG_KPROBES */
#endif /* _ASM_X86_KPROBES_H */ #endif /* _ASM_X86_KPROBES_H */
...@@ -517,7 +517,7 @@ struct thread_struct { ...@@ -517,7 +517,7 @@ struct thread_struct {
/* Save middle states of ptrace breakpoints */ /* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM]; struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */ /* Debug status used for traps, single steps, etc... */
unsigned long debugreg6; unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */ /* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7; unsigned long ptrace_dr7;
/* Fault info: */ /* Fault info: */
......
...@@ -441,42 +441,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, ...@@ -441,42 +441,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
return 0; return 0;
} }
/*
* Dump the debug register contents to the user.
* We can't dump our per cpu values because it
* may contain cpu wide breakpoint, something that
* doesn't belong to the current task.
*
* TODO: include non-ptrace user breakpoints (perf)
*/
void aout_dump_debugregs(struct user *dump)
{
int i;
int dr7 = 0;
struct perf_event *bp;
struct arch_hw_breakpoint *info;
struct thread_struct *thread = &current->thread;
for (i = 0; i < HBP_NUM; i++) {
bp = thread->ptrace_bps[i];
if (bp && !bp->attr.disabled) {
dump->u_debugreg[i] = bp->attr.bp_addr;
info = counter_arch_bp(bp);
dr7 |= encode_dr7(i, info->len, info->type);
} else {
dump->u_debugreg[i] = 0;
}
}
dump->u_debugreg[4] = 0;
dump->u_debugreg[5] = 0;
dump->u_debugreg[6] = current->thread.debugreg6;
dump->u_debugreg[7] = dr7;
}
EXPORT_SYMBOL_GPL(aout_dump_debugregs);
/* /*
* Release the user breakpoints used by ptrace * Release the user breakpoints used by ptrace
*/ */
...@@ -490,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) ...@@ -490,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
t->ptrace_bps[i] = NULL; t->ptrace_bps[i] = NULL;
} }
t->debugreg6 = 0; t->virtual_dr6 = 0;
t->ptrace_dr7 = 0; t->ptrace_dr7 = 0;
} }
...@@ -500,7 +464,7 @@ void hw_breakpoint_restore(void) ...@@ -500,7 +464,7 @@ void hw_breakpoint_restore(void)
set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
set_debugreg(current->thread.debugreg6, 6); set_debugreg(DR6_RESERVED, 6);
set_debugreg(__this_cpu_read(cpu_dr7), 7); set_debugreg(__this_cpu_read(cpu_dr7), 7);
} }
EXPORT_SYMBOL_GPL(hw_breakpoint_restore); EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
...@@ -523,10 +487,10 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); ...@@ -523,10 +487,10 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
*/ */
static int hw_breakpoint_handler(struct die_args *args) static int hw_breakpoint_handler(struct die_args *args)
{ {
int i, cpu, rc = NOTIFY_STOP; int i, rc = NOTIFY_STOP;
struct perf_event *bp; struct perf_event *bp;
unsigned long dr6;
unsigned long *dr6_p; unsigned long *dr6_p;
unsigned long dr6;
/* The DR6 value is pointed by args->err */ /* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err); dr6_p = (unsigned long *)ERR_PTR(args->err);
...@@ -540,14 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -540,14 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args)
if ((dr6 & DR_TRAP_BITS) == 0) if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE; return NOTIFY_DONE;
/*
* Assert that local interrupts are disabled
* Reset the DRn bits in the virtualized register value.
* The ptrace trigger routine will add in whatever is needed.
*/
current->thread.debugreg6 &= ~DR_TRAP_BITS;
cpu = get_cpu();
/* Handle all the breakpoints that were triggered */ /* Handle all the breakpoints that were triggered */
for (i = 0; i < HBP_NUM; ++i) { for (i = 0; i < HBP_NUM; ++i) {
if (likely(!(dr6 & (DR_TRAP0 << i)))) if (likely(!(dr6 & (DR_TRAP0 << i))))
...@@ -561,7 +517,7 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -561,7 +517,7 @@ static int hw_breakpoint_handler(struct die_args *args)
*/ */
rcu_read_lock(); rcu_read_lock();
bp = per_cpu(bp_per_reg[i], cpu); bp = this_cpu_read(bp_per_reg[i]);
/* /*
* Reset the 'i'th TRAP bit in dr6 to denote completion of * Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling * exception handling
...@@ -592,12 +548,10 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -592,12 +548,10 @@ static int hw_breakpoint_handler(struct die_args *args)
* breakpoints (to generate signals) and b) when the system has * breakpoints (to generate signals) and b) when the system has
* taken exception due to multiple causes * taken exception due to multiple causes
*/ */
if ((current->thread.debugreg6 & DR_TRAP_BITS) || if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
(dr6 & (~DR_TRAP_BITS))) (dr6 & (~DR_TRAP_BITS)))
rc = NOTIFY_DONE; rc = NOTIFY_DONE;
put_cpu();
return rc; return rc;
} }
......
...@@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event *event, ...@@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event *event,
struct task_struct *tsk = current; struct task_struct *tsk = current;
int i; int i;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++) {
if (breakinfo[i].enabled) if (breakinfo[i].enabled)
tsk->thread.debugreg6 |= (DR_TRAP0 << i); tsk->thread.virtual_dr6 |= (DR_TRAP0 << i);
}
} }
void kgdb_arch_late(void) void kgdb_arch_late(void)
......
...@@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp, ...@@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp,
break; break;
} }
thread->debugreg6 |= (DR_TRAP0 << i); thread->virtual_dr6 |= (DR_TRAP0 << i);
} }
/* /*
...@@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) ...@@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
if (bp) if (bp)
val = bp->hw.info.address; val = bp->hw.info.address;
} else if (n == 6) { } else if (n == 6) {
val = thread->debugreg6; val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */
} else if (n == 7) { } else if (n == 7) {
val = thread->ptrace_dr7; val = thread->ptrace_dr7;
} }
...@@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, ...@@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n,
if (n < HBP_NUM) { if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val); rc = ptrace_set_breakpoint_addr(tsk, n, val);
} else if (n == 6) { } else if (n == 6) {
thread->debugreg6 = val; thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */
rc = 0; rc = 0;
} else if (n == 7) { } else if (n == 7) {
rc = ptrace_write_dr7(tsk, val); rc = ptrace_write_dr7(tsk, val);
......
...@@ -745,9 +745,21 @@ static __always_inline unsigned long debug_read_clear_dr6(void) ...@@ -745,9 +745,21 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
* Keep it simple: clear DR6 immediately. * Keep it simple: clear DR6 immediately.
*/ */
get_debugreg(dr6, 6); get_debugreg(dr6, 6);
set_debugreg(0, 6); set_debugreg(DR6_RESERVED, 6);
/* Filter out all the reserved bits which are preset to 1 */ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
dr6 &= ~DR6_RESERVED;
/*
* Clear the virtual DR6 value, ptrace routines will set bits here for
* things we want signals for.
*/
current->thread.virtual_dr6 = 0;
/*
* The SDM says "The processor clears the BTF flag when it
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
*/
clear_thread_flag(TIF_BLOCKSTEP);
return dr6; return dr6;
} }
...@@ -776,74 +788,20 @@ static __always_inline unsigned long debug_read_clear_dr6(void) ...@@ -776,74 +788,20 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
* *
* May run on IST stack. * May run on IST stack.
*/ */
static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
{
struct task_struct *tsk = current;
bool user_icebp;
int si_code;
/*
* The SDM says "The processor clears the BTF flag when it
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
*/
clear_thread_flag(TIF_BLOCKSTEP);
/*
* If DR6 is zero, no point in trying to handle it. The kernel is
* not using INT1.
*/
if (!user && !dr6)
return;
/*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
user_icebp = user && !dr6;
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
#ifdef CONFIG_KPROBES
if (kprobe_debug_handler(regs)) {
return;
}
#endif
if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
SIGTRAP) == NOTIFY_STOP) {
return;
}
/* It's safe to allow irq's after DR6 has been saved */
cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
X86_TRAP_DB);
goto out;
}
if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
{
/* /*
* Historical junk that used to handle SYSENTER single-stepping. * Notifiers will clear bits in @dr6 to indicate the event has been
* This should be unreachable now. If we survive for a while * consumed - hw_breakpoint_handler(), single_stop_cont().
* without anyone hitting this warning, we'll turn this into *
* an oops. * Notifiers will set bits in @virtual_dr6 to indicate the desire
* for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
*/ */
tsk->thread.debugreg6 &= ~DR_STEP; if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
set_tsk_thread_flag(tsk, TIF_SINGLESTEP); return true;
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(regs, 0, si_code);
out: return false;
cond_local_irq_disable(regs);
} }
static __always_inline void exc_debug_kernel(struct pt_regs *regs, static __always_inline void exc_debug_kernel(struct pt_regs *regs,
...@@ -877,8 +835,32 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, ...@@ -877,8 +835,32 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP; dr6 &= ~DR_STEP;
handle_debug(regs, dr6, false); if (kprobe_debug_handler(regs))
goto out;
/*
* The kernel doesn't use INT1
*/
if (!dr6)
goto out;
if (notify_debug(regs, &dr6))
goto out;
/*
* The kernel doesn't use TF single-step outside of:
*
* - Kprobes, consumed through kprobe_debug_handler()
* - KGDB, consumed through notify_debug()
*
* So if we get here with DR_STEP set, something is wonky.
*
* A known way to trigger this is through QEMU's GDB stub,
* which leaks #DB into the guest and causes IST recursion.
*/
if (WARN_ON_ONCE(dr6 & DR_STEP))
regs->flags &= ~X86_EFLAGS_TF;
out:
instrumentation_end(); instrumentation_end();
idtentry_exit_nmi(regs, irq_state); idtentry_exit_nmi(regs, irq_state);
...@@ -888,6 +870,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, ...@@ -888,6 +870,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6) unsigned long dr6)
{ {
bool icebp;
/* /*
* If something gets miswired and we end up here for a kernel mode * If something gets miswired and we end up here for a kernel mode
* #DB, we will malfunction. * #DB, we will malfunction.
...@@ -906,8 +890,32 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, ...@@ -906,8 +890,32 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
irqentry_enter_from_user_mode(regs); irqentry_enter_from_user_mode(regs);
instrumentation_begin(); instrumentation_begin();
handle_debug(regs, dr6, true); /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
icebp = !dr6;
if (notify_debug(regs, &dr6))
goto out;
/* It's safe to allow irq's after DR6 has been saved */
local_irq_enable();
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
goto out_irq;
}
/* Add the virtual_dr6 bits for signals. */
dr6 |= current->thread.virtual_dr6;
if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
send_sigtrap(regs, 0, get_si_code(dr6));
out_irq:
local_irq_disable();
out:
instrumentation_end(); instrumentation_end();
irqentry_exit_to_user_mode(regs); irqentry_exit_to_user_mode(regs);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment