Commit f87032ae authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'locking/nmi' into x86/entry

Resolve conflicts with ongoing lockdep work that fixed the NMI entry code.

Conflicts:
	arch/x86/entry/common.c
	arch/x86/include/asm/idtentry.h
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents d25c8be6 ba1f2b2e
......@@ -198,6 +198,40 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
{
bool irq_state = lockdep_hardirqs_enabled();
__nmi_enter();
lockdep_hardirqs_off(CALLER_ADDR0);
lockdep_hardirq_enter();
rcu_nmi_enter();
instrumentation_begin();
trace_hardirqs_off_finish();
ftrace_nmi_enter();
instrumentation_end();
return irq_state;
}
noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
{
instrumentation_begin();
ftrace_nmi_exit();
if (restore) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
}
instrumentation_end();
rcu_nmi_exit();
lockdep_hardirq_exit();
if (restore)
lockdep_hardirqs_on(CALLER_ADDR0);
__nmi_exit();
}
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
......
......@@ -11,6 +11,9 @@
#include <asm/irq_stack.h>
bool idtentry_enter_nmi(struct pt_regs *regs);
void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
......
......@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin();
trace_hardirqs_off_finish();
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
......@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs);
out:
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
}
......@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
bool irq_state;
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
......@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
this_cpu_write(nmi_dr7, local_db_save());
nmi_enter();
irq_state = idtentry_enter_nmi(regs);
inc_irq_stat(__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
nmi_exit();
idtentry_exit_nmi(regs, irq_state);
local_db_restore(this_cpu_read(nmi_dr7));
......
......@@ -405,7 +405,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
}
#endif
nmi_enter();
idtentry_enter_nmi(regs);
instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
......@@ -651,15 +651,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
instrumentation_end();
irqentry_exit_to_user_mode(regs);
} else {
nmi_enter();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
trace_hardirqs_off_finish();
if (!do_int3(regs))
die("int3", regs, 0);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
nmi_exit();
idtentry_exit_nmi(regs, irq_state);
}
}
......@@ -867,9 +864,8 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
nmi_enter();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
trace_hardirqs_off_finish();
/*
* If something gets miswired and we end up here for a user mode
......@@ -886,10 +882,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
handle_debug(regs, dr6, false);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
nmi_exit();
idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
......@@ -905,6 +899,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
instrumentation_begin();
handle_debug(regs, dr6, true);
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
......
......@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
#define nmi_enter() \
#define __nmi_enter() \
do { \
lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \
} while (0)
#define nmi_enter() \
do { \
__nmi_enter(); \
lockdep_hardirq_enter(); \
rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
#define __nmi_exit() \
do { \
BUG_ON(!in_nmi()); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
printk_nmi_exit(); \
arch_nmi_exit(); \
lockdep_on(); \
} while (0)
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
lockdep_on(); \
printk_nmi_exit(); \
arch_nmi_exit(); \
lockdep_hardirq_exit(); \
__nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
......@@ -10,181 +10,20 @@
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
#include <linux/lockdep_types.h>
struct task_struct;
struct lockdep_map;
/* for sysctl */
extern int prove_locking;
extern int lock_stat;
#define MAX_LOCKDEP_SUBCLASSES 8UL
#include <linux/types.h>
enum lockdep_wait_type {
LD_WAIT_INV = 0, /* not checked, catch all */
LD_WAIT_FREE, /* wait free, rcu etc.. */
LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
#else
LD_WAIT_CONFIG = LD_WAIT_SPIN,
#endif
LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
LD_WAIT_MAX, /* must be last */
};
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
#include <linux/list.h>
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
/*
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-(
*/
#define XXX_LOCK_USAGE_STATES (1+2*4)
/*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes
* cached in the instance of lockdep_map
*
* Currently main class (subclass == 0) and signle depth subclass
* are cached in lockdep_map. This optimization is mainly targeting
* on rq->lock. double_rq_lock() acquires this highly competitive with
* single depth.
*/
#define NR_LOCKDEP_CACHING_CLASSES 2
/*
* A lockdep key is associated with each lock object. For static locks we use
* the lock address itself as the key. Dynamically allocated lock objects can
* have a statically or dynamically allocated key. Dynamically allocated lock
* keys must be registered before being used and must be unregistered before
* the key memory is freed.
*/
struct lockdep_subclass_key {
char __one_byte;
} __attribute__ ((__packed__));
/* hash_entry is used to keep track of dynamically allocated keys. */
struct lock_class_key {
union {
struct hlist_node hash_entry;
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
};
};
extern struct lock_class_key __lockdep_no_validate__;
struct lock_trace;
#define LOCKSTAT_POINTS 4
/*
* The lock-class itself. The order of the structure members matters.
* reinit_class() zeroes the key member and all subsequent members.
*/
struct lock_class {
/*
* class-hash:
*/
struct hlist_node hash_entry;
/*
* Entry in all_lock_classes when in use. Entry in free_lock_classes
* when not in use. Instances that are being freed are on one of the
* zapped_classes lists.
*/
struct list_head lock_entry;
/*
* These fields represent a directed graph of lock dependencies,
* to every node we attach a list of "forward" and a list of
* "backward" graph nodes.
*/
struct list_head locks_after, locks_before;
const struct lockdep_subclass_key *key;
unsigned int subclass;
unsigned int dep_gen_id;
/*
* IRQ/softirq usage tracking bits:
*/
unsigned long usage_mask;
const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
/*
* Generation counter, when doing certain classes of graph walking,
* to ensure that we check one node only once:
*/
int name_version;
const char *name;
short wait_type_inner;
short wait_type_outer;
#ifdef CONFIG_LOCK_STAT
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
#endif
} __no_randomize_layout;
#ifdef CONFIG_LOCK_STAT
struct lock_time {
s64 min;
s64 max;
s64 total;
unsigned long nr;
};
enum bounce_type {
bounce_acquired_write,
bounce_acquired_read,
bounce_contended_write,
bounce_contended_read,
nr_bounce_types,
bounce_acquired = bounce_acquired_write,
bounce_contended = bounce_contended_write,
};
struct lock_class_stats {
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
struct lock_time read_waittime;
struct lock_time write_waittime;
struct lock_time read_holdtime;
struct lock_time write_holdtime;
unsigned long bounces[nr_bounce_types];
};
struct lock_class_stats lock_stats(struct lock_class *class);
void clear_lock_stats(struct lock_class *class);
#endif
/*
* Map the lock object (the lock instance) to the lock-class object.
* This is embedded into specific lock instances:
*/
struct lockdep_map {
struct lock_class_key *key;
struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
const char *name;
short wait_type_outer; /* can be taken in this context */
short wait_type_inner; /* presents this context */
#ifdef CONFIG_LOCK_STAT
int cpu;
unsigned long ip;
#endif
};
static inline void lockdep_copy_map(struct lockdep_map *to,
struct lockdep_map *from)
{
......@@ -440,8 +279,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
struct pin_cookie { unsigned int val; };
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
......@@ -520,10 +357,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
/*
* The class key takes no space if lockdep is disabled:
*/
struct lock_class_key { };
static inline void lockdep_register_key(struct lock_class_key *key)
{
......@@ -533,11 +366,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}
/*
* The lockdep_map takes no space if lockdep is disabled:
*/
struct lockdep_map { };
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
......@@ -549,8 +377,6 @@ struct lockdep_map { };
#define lockdep_recursing(tsk) (0)
struct pin_cookie { };
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Runtime locking correctness validator
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* see Documentation/locking/lockdep-design.rst for more details.
*/
#ifndef __LINUX_LOCKDEP_TYPES_H
#define __LINUX_LOCKDEP_TYPES_H
#include <linux/types.h>
#define MAX_LOCKDEP_SUBCLASSES 8UL
enum lockdep_wait_type {
LD_WAIT_INV = 0, /* not checked, catch all */
LD_WAIT_FREE, /* wait free, rcu etc.. */
LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
#else
LD_WAIT_CONFIG = LD_WAIT_SPIN,
#endif
LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
LD_WAIT_MAX, /* must be last */
};
#ifdef CONFIG_LOCKDEP
#include <linux/list.h>
/*
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-(
*/
#define XXX_LOCK_USAGE_STATES (1+2*4)
/*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes
* cached in the instance of lockdep_map
*
* Currently main class (subclass == 0) and signle depth subclass
* are cached in lockdep_map. This optimization is mainly targeting
* on rq->lock. double_rq_lock() acquires this highly competitive with
* single depth.
*/
#define NR_LOCKDEP_CACHING_CLASSES 2
/*
* A lockdep key is associated with each lock object. For static locks we use
* the lock address itself as the key. Dynamically allocated lock objects can
* have a statically or dynamically allocated key. Dynamically allocated lock
* keys must be registered before being used and must be unregistered before
* the key memory is freed.
*/
struct lockdep_subclass_key {
char __one_byte;
} __attribute__ ((__packed__));
/* hash_entry is used to keep track of dynamically allocated keys. */
struct lock_class_key {
union {
struct hlist_node hash_entry;
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
};
};
extern struct lock_class_key __lockdep_no_validate__;
struct lock_trace;
#define LOCKSTAT_POINTS 4
/*
* The lock-class itself. The order of the structure members matters.
* reinit_class() zeroes the key member and all subsequent members.
*/
struct lock_class {
/*
* class-hash:
*/
struct hlist_node hash_entry;
/*
* Entry in all_lock_classes when in use. Entry in free_lock_classes
* when not in use. Instances that are being freed are on one of the
* zapped_classes lists.
*/
struct list_head lock_entry;
/*
* These fields represent a directed graph of lock dependencies,
* to every node we attach a list of "forward" and a list of
* "backward" graph nodes.
*/
struct list_head locks_after, locks_before;
const struct lockdep_subclass_key *key;
unsigned int subclass;
unsigned int dep_gen_id;
/*
* IRQ/softirq usage tracking bits:
*/
unsigned long usage_mask;
const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
/*
* Generation counter, when doing certain classes of graph walking,
* to ensure that we check one node only once:
*/
int name_version;
const char *name;
short wait_type_inner;
short wait_type_outer;
#ifdef CONFIG_LOCK_STAT
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
#endif
} __no_randomize_layout;
#ifdef CONFIG_LOCK_STAT
struct lock_time {
s64 min;
s64 max;
s64 total;
unsigned long nr;
};
enum bounce_type {
bounce_acquired_write,
bounce_acquired_read,
bounce_contended_write,
bounce_contended_read,
nr_bounce_types,
bounce_acquired = bounce_acquired_write,
bounce_contended = bounce_contended_write,
};
struct lock_class_stats {
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
struct lock_time read_waittime;
struct lock_time write_waittime;
struct lock_time read_holdtime;
struct lock_time write_holdtime;
unsigned long bounces[nr_bounce_types];
};
struct lock_class_stats lock_stats(struct lock_class *class);
void clear_lock_stats(struct lock_class *class);
#endif
/*
* Map the lock object (the lock instance) to the lock-class object.
* This is embedded into specific lock instances:
*/
struct lockdep_map {
struct lock_class_key *key;
struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
const char *name;
short wait_type_outer; /* can be taken in this context */
short wait_type_inner; /* presents this context */
#ifdef CONFIG_LOCK_STAT
int cpu;
unsigned long ip;
#endif
};
struct pin_cookie { unsigned int val; };
#else /* !CONFIG_LOCKDEP */
/*
* The class key takes no space if lockdep is disabled:
*/
struct lock_class_key { };
/*
* The lockdep_map takes no space if lockdep is disabled:
*/
struct lockdep_map { };
struct pin_cookie { };
#endif /* !LOCKDEP */
#endif /* __LINUX_LOCKDEP_TYPES_H */
......@@ -56,6 +56,7 @@
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
#include <linux/lockdep.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>
......
......@@ -15,7 +15,7 @@
# include <linux/spinlock_types_up.h>
#endif
#include <linux/lockdep.h>
#include <linux/lockdep_types.h>
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
......
......@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}
if (!kcsan_interrupt_watcher)
/* Use raw to avoid lockdep recursion via IRQ flags tracing. */
raw_local_irq_save(irq_flags);
local_irq_save(irq_flags);
watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
......@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
raw_local_irq_restore(irq_flags);
local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
......
......@@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;
/*
* With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
* we do not turn off lockdep here; this could happen due to recursion
* into lockdep via KCSAN if we detect a race in utilities used by
* lockdep.
* Because we may generate reports when we're in scheduler code, the use
* of printk() could deadlock. Until such time that all printing code
* called in print_report() is scheduler-safe, accept the risk, and just
* get our message out. As such, also disable lockdep to hide the
* warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();
......
......@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)
static __always_inline void lockdep_recursion_finish(void)
{
if (WARN_ON_ONCE(--current->lockdep_recursion))
if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
current->lockdep_recursion = 0;
}
......@@ -3646,7 +3646,16 @@ static void __trace_hardirqs_on_caller(void)
*/
void lockdep_hardirqs_on_prepare(unsigned long ip)
{
if (unlikely(!debug_locks || current->lockdep_recursion))
if (unlikely(!debug_locks))
return;
/*
* NMIs do not (and cannot) track lock dependencies, nothing to do.
*/
if (unlikely(in_nmi()))
return;
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
if (unlikely(current->hardirqs_enabled)) {
......@@ -3692,7 +3701,27 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks || curr->lockdep_recursion))
if (unlikely(!debug_locks))
return;
/*
* NMIs can happen in the middle of local_irq_{en,dis}able() where the
* tracking state and hardware state are out of sync.
*
* NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
* and not rely on hardware state like normal interrupts.
*/
if (unlikely(in_nmi())) {
/*
* Skip:
* - recursion check, because NMI can hit lockdep;
* - hardware state check, because above;
* - chain_key check, see lockdep_hardirqs_on_prepare().
*/
goto skip_checks;
}
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;
if (curr->hardirqs_enabled) {
......@@ -3720,6 +3749,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
skip_checks:
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
curr->hardirq_enable_ip = ip;
......@@ -3735,7 +3765,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks || curr->lockdep_recursion))
if (unlikely(!debug_locks))
return;
/*
* Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
* they will restore the software state. This ensures the software
* state is consistent inside NMIs as well.
*/
if (unlikely(!in_nmi() && (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)))
return;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment