Commit 2e83b879 authored by Paul E. McKenney's avatar Paul E. McKenney

srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe()

On strict load-store architectures, the use of this_cpu_inc() by
srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU.
To see this suppose that an NMI arrives in the middle of srcu_read_lock(),
just after it has read ->srcu_lock_count, but before it has written
the incremented value back to memory.  If that NMI handler also does
srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure,
then upon return from that NMI handler, the interrupted srcu_read_lock()
will overwrite the NMI handler's update to ->srcu_lock_count, but
leave unchanged the NMI handler's update by srcu_read_unlock() to
->srcu_unlock_count.

This can result in a too-short SRCU grace period, which can in turn
result in arbitrary memory corruption.

If the NMI handler instead interrupts the srcu_read_unlock(), this
can result in eternal SRCU grace periods, which is not much better.

This commit therefore creates a pair of new srcu_read_lock_nmisafe()
and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in
both NMI handlers and in process and IRQ context.  It is bad practice
to mix the existing and the new _nmisafe() primitives on the same
srcu_struct structure.  Use one set or the other, not both.

Just to underline that "bad practice" point, using srcu_read_lock() at
process level and srcu_read_lock_nmisafe() in your NMI handler will not,
repeat NOT, work.  If you do not immediately understand why this is the
case, please review the earlier paragraphs in this commit log.

[ paulmck: Apply kernel test robot feedback. ]
[ paulmck: Apply feedback from Randy Dunlap. ]
[ paulmck: Apply feedback from John Ogness. ]
[ paulmck: Apply feedback from Frederic Weisbecker. ]

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Reviewed-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
parent 5d0f5953
...@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM ...@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
config ARCH_HAVE_NMI_SAFE_CMPXCHG config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool bool
config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
bool
config HAVE_ALIGNED_STRUCT_PAGE config HAVE_ALIGNED_STRUCT_PAGE
bool bool
help help
......
...@@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); ...@@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
#else
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
return __srcu_read_lock(ssp);
}
static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
__srcu_read_unlock(ssp, idx);
}
#endif /* CONFIG_NEED_SRCU_NMI_SAFE */
#ifdef CONFIG_SRCU #ifdef CONFIG_SRCU
void srcu_init(void); void srcu_init(void);
#else /* #ifdef CONFIG_SRCU */ #else /* #ifdef CONFIG_SRCU */
...@@ -166,6 +180,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) ...@@ -166,6 +180,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
return retval; return retval;
} }
/**
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
* Enter an SRCU read-side critical section, but in an NMI-safe manner.
* See srcu_read_lock() for more information.
*/
static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
retval = __srcu_read_lock_nmisafe(ssp);
else
retval = __srcu_read_lock(ssp);
rcu_lock_acquire(&(ssp)->dep_map);
return retval;
}
/* Used by tracing, cannot be traced and cannot invoke lockdep. */ /* Used by tracing, cannot be traced and cannot invoke lockdep. */
static inline notrace int static inline notrace int
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
...@@ -191,6 +224,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) ...@@ -191,6 +224,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__srcu_read_unlock(ssp, idx); __srcu_read_unlock(ssp, idx);
} }
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
*
* Exit an SRCU read-side critical section, but in an NMI-safe manner.
*/
static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
rcu_lock_release(&(ssp)->dep_map);
if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
__srcu_read_unlock_nmisafe(ssp, idx);
else
__srcu_read_unlock(ssp, idx);
}
/* Used by tracing, cannot be traced and cannot call lockdep. */ /* Used by tracing, cannot be traced and cannot call lockdep. */
static inline notrace void static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
......
...@@ -72,6 +72,9 @@ config TREE_SRCU ...@@ -72,6 +72,9 @@ config TREE_SRCU
help help
This option selects the full-fledged version of SRCU. This option selects the full-fledged version of SRCU.
config NEED_SRCU_NMI_SAFE
def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
config TASKS_RCU_GENERIC config TASKS_RCU_GENERIC
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
select SRCU select SRCU
......
...@@ -615,9 +615,13 @@ static struct rcu_torture_ops rcu_busted_ops = { ...@@ -615,9 +615,13 @@ static struct rcu_torture_ops rcu_busted_ops = {
DEFINE_STATIC_SRCU(srcu_ctl); DEFINE_STATIC_SRCU(srcu_ctl);
static struct srcu_struct srcu_ctld; static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl; static struct srcu_struct *srcu_ctlp = &srcu_ctl;
static struct rcu_torture_ops srcud_ops;
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
{ {
if (cur_ops == &srcud_ops)
return srcu_read_lock_nmisafe(srcu_ctlp);
else
return srcu_read_lock(srcu_ctlp); return srcu_read_lock(srcu_ctlp);
} }
...@@ -642,6 +646,9 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) ...@@ -642,6 +646,9 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
{ {
if (cur_ops == &srcud_ops)
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
else
srcu_read_unlock(srcu_ctlp, idx); srcu_read_unlock(srcu_ctlp, idx);
} }
......
...@@ -654,6 +654,41 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) ...@@ -654,6 +654,41 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
} }
EXPORT_SYMBOL_GPL(__srcu_read_unlock); EXPORT_SYMBOL_GPL(__srcu_read_unlock);
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct, but in an NMI-safe manner using RMW atomics.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
int idx;
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
atomic_long_inc(&sdp->srcu_lock_count[idx]);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
*/
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
atomic_long_inc(&sdp->srcu_unlock_count[idx]);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
#endif // CONFIG_NEED_SRCU_NMI_SAFE
/* /*
* Start an SRCU grace period. * Start an SRCU grace period.
*/ */
...@@ -1090,7 +1125,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1090,7 +1125,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
int ss_state; int ss_state;
check_init_srcu_struct(ssp); check_init_srcu_struct(ssp);
idx = srcu_read_lock(ssp); idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_size_state); ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL) if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, 0); sdp = per_cpu_ptr(ssp->sda, 0);
...@@ -1123,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1123,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
srcu_funnel_gp_start(ssp, sdp, s, do_norm); srcu_funnel_gp_start(ssp, sdp, s, do_norm);
else if (needexp) else if (needexp)
srcu_funnel_exp_start(ssp, sdp_mynode, s); srcu_funnel_exp_start(ssp, sdp_mynode, s);
srcu_read_unlock(ssp, idx); __srcu_read_unlock_nmisafe(ssp, idx);
return s; return s;
} }
...@@ -1427,13 +1462,13 @@ void srcu_barrier(struct srcu_struct *ssp) ...@@ -1427,13 +1462,13 @@ void srcu_barrier(struct srcu_struct *ssp)
/* Initial count prevents reaching zero until all CBs are posted. */ /* Initial count prevents reaching zero until all CBs are posted. */
atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
idx = srcu_read_lock(ssp); idx = __srcu_read_lock_nmisafe(ssp);
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
else else
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
srcu_read_unlock(ssp, idx); __srcu_read_unlock_nmisafe(ssp, idx);
/* Remove the initial count, at which point reaching zero can happen. */ /* Remove the initial count, at which point reaching zero can happen. */
if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment