Commit ba1f2b2e authored by Peter Zijlstra's avatar Peter Zijlstra

x86/entry: Fix NMI vs IRQ state tracking

While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.

Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20200623083721.216740948@infradead.org
parent 859d069e
...@@ -592,7 +592,7 @@ SYSCALL_DEFINE0(ni_syscall) ...@@ -592,7 +592,7 @@ SYSCALL_DEFINE0(ni_syscall)
* The return value must be fed into the state argument of * The return value must be fed into the state argument of
* idtentry_exit(). * idtentry_exit().
*/ */
idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs) noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs)
{ {
idtentry_state_t ret = { idtentry_state_t ret = {
.exit_rcu = false, .exit_rcu = false,
...@@ -687,7 +687,7 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched) ...@@ -687,7 +687,7 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
* Counterpart to idtentry_enter(). The return value of the entry * Counterpart to idtentry_enter(). The return value of the entry
* function must be fed into the @state argument. * function must be fed into the @state argument.
*/ */
void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state) noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
{ {
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
...@@ -731,7 +731,7 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state) ...@@ -731,7 +731,7 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
* Invokes enter_from_user_mode() to establish the proper context for * Invokes enter_from_user_mode() to establish the proper context for
* NOHZ_FULL. Otherwise scheduling on exit would not be possible. * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
*/ */
void noinstr idtentry_enter_user(struct pt_regs *regs) noinstr void idtentry_enter_user(struct pt_regs *regs)
{ {
check_user_regs(regs); check_user_regs(regs);
enter_from_user_mode(); enter_from_user_mode();
...@@ -749,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs) ...@@ -749,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs)
* *
* Counterpart to idtentry_enter_user(). * Counterpart to idtentry_enter_user().
*/ */
void noinstr idtentry_exit_user(struct pt_regs *regs) noinstr void idtentry_exit_user(struct pt_regs *regs)
{ {
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
prepare_exit_to_usermode(regs); prepare_exit_to_usermode(regs);
} }
noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
{
bool irq_state = lockdep_hardirqs_enabled(current);
__nmi_enter();
lockdep_hardirqs_off(CALLER_ADDR0);
lockdep_hardirq_enter();
rcu_nmi_enter();
instrumentation_begin();
trace_hardirqs_off_finish();
ftrace_nmi_enter();
instrumentation_end();
return irq_state;
}
noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
{
instrumentation_begin();
ftrace_nmi_exit();
if (restore) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
}
instrumentation_end();
rcu_nmi_exit();
lockdep_hardirq_exit();
if (restore)
lockdep_hardirqs_on(CALLER_ADDR0);
__nmi_exit();
}
#ifdef CONFIG_XEN_PV #ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION #ifndef CONFIG_PREEMPTION
/* /*
......
...@@ -20,6 +20,9 @@ typedef struct idtentry_state { ...@@ -20,6 +20,9 @@ typedef struct idtentry_state {
idtentry_state_t idtentry_enter(struct pt_regs *regs); idtentry_state_t idtentry_enter(struct pt_regs *regs);
void idtentry_exit(struct pt_regs *regs, idtentry_state_t state); void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
bool idtentry_enter_nmi(struct pt_regs *regs);
void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
/** /**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware * No error code pushed by hardware
......
...@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs) ...@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip); __this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin(); instrumentation_begin();
trace_hardirqs_off_finish();
handled = nmi_handle(NMI_LOCAL, regs); handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled); __this_cpu_add(nmi_stats.normal, handled);
...@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs) ...@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs); unknown_nmi_error(reason, regs);
out: out:
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end(); instrumentation_end();
} }
...@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7); ...@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi) DEFINE_IDTENTRY_RAW(exc_nmi)
{ {
bool irq_state;
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return; return;
...@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_RAW(exc_nmi) ...@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
this_cpu_write(nmi_dr7, local_db_save()); this_cpu_write(nmi_dr7, local_db_save());
nmi_enter(); irq_state = idtentry_enter_nmi(regs);
inc_irq_stat(__nmi_count); inc_irq_stat(__nmi_count);
if (!ignore_nmis) if (!ignore_nmis)
default_do_nmi(regs); default_do_nmi(regs);
nmi_exit(); idtentry_exit_nmi(regs, irq_state);
local_db_restore(this_cpu_read(nmi_dr7)); local_db_restore(this_cpu_read(nmi_dr7));
......
...@@ -403,7 +403,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault) ...@@ -403,7 +403,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
} }
#endif #endif
nmi_enter(); idtentry_enter_nmi(regs);
instrumentation_begin(); instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
...@@ -649,15 +649,12 @@ DEFINE_IDTENTRY_RAW(exc_int3) ...@@ -649,15 +649,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
instrumentation_end(); instrumentation_end();
idtentry_exit_user(regs); idtentry_exit_user(regs);
} else { } else {
nmi_enter(); bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin(); instrumentation_begin();
trace_hardirqs_off_finish();
if (!do_int3(regs)) if (!do_int3(regs))
die("int3", regs, 0); die("int3", regs, 0);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end(); instrumentation_end();
nmi_exit(); idtentry_exit_nmi(regs, irq_state);
} }
} }
...@@ -865,9 +862,8 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) ...@@ -865,9 +862,8 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
static __always_inline void exc_debug_kernel(struct pt_regs *regs, static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6) unsigned long dr6)
{ {
nmi_enter(); bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin(); instrumentation_begin();
trace_hardirqs_off_finish();
/* /*
* If something gets miswired and we end up here for a user mode * If something gets miswired and we end up here for a user mode
...@@ -884,10 +880,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, ...@@ -884,10 +880,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
handle_debug(regs, dr6, false); handle_debug(regs, dr6, false);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end(); instrumentation_end();
nmi_exit(); idtentry_exit_nmi(regs, irq_state);
} }
static __always_inline void exc_debug_user(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs,
...@@ -903,6 +897,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, ...@@ -903,6 +897,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
instrumentation_begin(); instrumentation_begin();
handle_debug(regs, dr6, true); handle_debug(regs, dr6, true);
instrumentation_end(); instrumentation_end();
idtentry_exit_user(regs); idtentry_exit_user(regs);
} }
......
...@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void); ...@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/* /*
* nmi_enter() can nest up to 15 times; see NMI_BITS. * nmi_enter() can nest up to 15 times; see NMI_BITS.
*/ */
#define nmi_enter() \ #define __nmi_enter() \
do { \ do { \
lockdep_off(); \
arch_nmi_enter(); \ arch_nmi_enter(); \
printk_nmi_enter(); \ printk_nmi_enter(); \
lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \ BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \ } while (0)
#define nmi_enter() \
do { \
__nmi_enter(); \
lockdep_hardirq_enter(); \ lockdep_hardirq_enter(); \
rcu_nmi_enter(); \
instrumentation_begin(); \ instrumentation_begin(); \
ftrace_nmi_enter(); \ ftrace_nmi_enter(); \
instrumentation_end(); \ instrumentation_end(); \
} while (0) } while (0)
#define __nmi_exit() \
do { \
BUG_ON(!in_nmi()); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
printk_nmi_exit(); \
arch_nmi_exit(); \
lockdep_on(); \
} while (0)
#define nmi_exit() \ #define nmi_exit() \
do { \ do { \
instrumentation_begin(); \ instrumentation_begin(); \
ftrace_nmi_exit(); \ ftrace_nmi_exit(); \
instrumentation_end(); \ instrumentation_end(); \
lockdep_hardirq_exit(); \
rcu_nmi_exit(); \ rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \ lockdep_hardirq_exit(); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ __nmi_exit(); \
lockdep_on(); \
printk_nmi_exit(); \
arch_nmi_exit(); \
} while (0) } while (0)
#endif /* LINUX_HARDIRQ_H */ #endif /* LINUX_HARDIRQ_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment