Commit 02219caa authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'doc.2024.06.06a', 'fixes.2024.07.04a', 'mb.2024.06.28a',...

Merge branches 'doc.2024.06.06a', 'fixes.2024.07.04a', 'mb.2024.06.28a', 'nocb.2024.06.03a', 'rcu-tasks.2024.06.06a', 'rcutorture.2024.06.06a' and 'srcu.2024.06.18a' into HEAD

doc.2024.06.06a: Documentation updates.
fixes.2024.07.04a: Miscellaneous fixes.
mb.2024.06.28a: Grace-period memory-barrier redundancy removal.
nocb.2024.06.03a: No-CB CPU updates.
rcu-tasks.2024.06.06a: RCU-Tasks updates.
rcutorture.2024.06.06a: Torture-test updates.
srcu.2024.06.18a: SRCU polled-grace-period updates.
...@@ -149,9 +149,9 @@ This case is handled by calls to the strongly ordered ...@@ -149,9 +149,9 @@ This case is handled by calls to the strongly ordered
``atomic_add_return()`` read-modify-write atomic operation that ``atomic_add_return()`` read-modify-write atomic operation that
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time. time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
The grace-period kthread invokes ``rcu_dynticks_snap()`` and The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
``rcu_dynticks_in_eqs_since()`` (both of which invoke (preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
an ``atomic_add_return()`` of zero) to detect idle CPUs. (both of which rely on acquire semantics) to detect idle CPUs.
+-----------------------------------------------------------------------+ +-----------------------------------------------------------------------+
| **Quick Quiz**: | | **Quick Quiz**: |
......
...@@ -5018,6 +5018,14 @@ ...@@ -5018,6 +5018,14 @@
the ->nocb_bypass queue. The definition of "too the ->nocb_bypass queue. The definition of "too
many" is supplied by this kernel boot parameter. many" is supplied by this kernel boot parameter.
rcutree.nohz_full_patience_delay= [KNL]
On callback-offloaded (rcu_nocbs) CPUs, avoid
disturbing RCU unless the grace period has
reached the specified age in milliseconds.
Defaults to zero. Large values will be capped
at five seconds. All values will be rounded down
to the nearest value representable by jiffies.
rcutree.qhimark= [KNL] rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which Set threshold of queued RCU callbacks beyond which
batch limiting is disabled. batch limiting is disabled.
......
...@@ -18868,6 +18868,7 @@ M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h) ...@@ -18868,6 +18868,7 @@ M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
M: Joel Fernandes <joel@joelfernandes.org> M: Joel Fernandes <joel@joelfernandes.org>
M: Josh Triplett <josh@joshtriplett.org> M: Josh Triplett <josh@joshtriplett.org>
M: Boqun Feng <boqun.feng@gmail.com> M: Boqun Feng <boqun.feng@gmail.com>
M: Uladzislau Rezki <urezki@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org> R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com> R: Lai Jiangshan <jiangshanlai@gmail.com>
......
This diff is collapsed.
...@@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void); ...@@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void);
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void); void exit_tasks_rcu_start(void);
void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void); void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0) #define rcu_tasks_classic_qs(t, preempt) do { } while (0)
...@@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void); ...@@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void);
#define call_rcu_tasks call_rcu #define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu #define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { } static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
...@@ -421,11 +419,71 @@ static inline void rcu_preempt_sleep_check(void) { } ...@@ -421,11 +419,71 @@ static inline void rcu_preempt_sleep_check(void) { }
"Illegal context switch in RCU-sched read-side critical section"); \ "Illegal context switch in RCU-sched read-side critical section"); \
} while (0) } while (0)
// See RCU_LOCKDEP_WARN() for an explanation of the double call to
// debug_lockdep_rcu_enabled().
static inline bool lockdep_assert_rcu_helper(bool c)
{
return debug_lockdep_rcu_enabled() &&
(c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) &&
debug_lockdep_rcu_enabled();
}
/**
* lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock()
*
* Splats if lockdep is enabled and there is no rcu_read_lock() in effect.
*/
#define lockdep_assert_in_rcu_read_lock() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map)))
/**
* lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh()
*
* Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect.
* Note that local_bh_disable() and friends do not suffice here, instead an
* actual rcu_read_lock_bh() is required.
*/
#define lockdep_assert_in_rcu_read_lock_bh() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map)))
/**
* lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched()
*
* Splats if lockdep is enabled and there is no rcu_read_lock_sched()
* in effect. Note that preempt_disable() and friends do not suffice here,
* instead an actual rcu_read_lock_sched() is required.
*/
#define lockdep_assert_in_rcu_read_lock_sched() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map)))
/**
* lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader
*
* Splats if lockdep is enabled and there is no RCU reader of any
* type in effect. Note that regions of code protected by things like
* preempt_disable, local_bh_disable(), and local_irq_disable() all qualify
* as RCU readers.
*
* Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY
* kernels that are not also built with PREEMPT_COUNT. But if you have
* lockdep enabled, you might as well also enable PREEMPT_COUNT.
*/
#define lockdep_assert_in_rcu_reader() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \
!lock_is_held(&rcu_bh_lock_map) && \
!lock_is_held(&rcu_sched_lock_map) && \
preemptible()))
#else /* #ifdef CONFIG_PROVE_RCU */ #else /* #ifdef CONFIG_PROVE_RCU */
#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c))
#define rcu_sleep_check() do { } while (0) #define rcu_sleep_check() do { } while (0)
#define lockdep_assert_in_rcu_read_lock() do { } while (0)
#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0)
#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0)
#define lockdep_assert_in_rcu_reader() do { } while (0)
#endif /* #else #ifdef CONFIG_PROVE_RCU */ #endif /* #else #ifdef CONFIG_PROVE_RCU */
/* /*
......
...@@ -57,10 +57,45 @@ void cleanup_srcu_struct(struct srcu_struct *ssp); ...@@ -57,10 +57,45 @@ void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
void synchronize_srcu(struct srcu_struct *ssp); void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
/**
* get_completed_synchronize_srcu - Return a pre-completed polled state cookie
*
* Returns a value that poll_state_synchronize_srcu() will always treat
* as a cookie whose grace period has already completed.
*/
static inline unsigned long get_completed_synchronize_srcu(void)
{
return SRCU_GET_STATE_COMPLETED;
}
unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
// Maximum number of unsigned long values corresponding to
// not-yet-completed SRCU grace periods.
#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2
/**
* same_state_synchronize_srcu - Are two old-state values identical?
* @oldstate1: First old-state value.
* @oldstate2: Second old-state value.
*
* The two old-state values must have been obtained from either
* get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or
* get_completed_synchronize_srcu(). Returns @true if the two values are
* identical and @false otherwise. This allows structures whose lifetimes
* are tracked by old-state values to push these values to a list header,
* allowing those structures to be slightly smaller.
*/
static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2)
{
return oldstate1 == oldstate2;
}
#ifdef CONFIG_NEED_SRCU_NMI_SAFE #ifdef CONFIG_NEED_SRCU_NMI_SAFE
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
......
...@@ -248,24 +248,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) ...@@ -248,24 +248,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->pid_allocated == init_pids) if (pid_ns->pid_allocated == init_pids)
break; break;
/*
* Release tasks_rcu_exit_srcu to avoid following deadlock:
*
* 1) TASK A unshare(CLONE_NEWPID)
* 2) TASK A fork() twice -> TASK B (child reaper for new ns)
* and TASK C
* 3) TASK B exits, kills TASK C, waits for TASK A to reap it
* 4) TASK A calls synchronize_rcu_tasks()
* -> synchronize_srcu(tasks_rcu_exit_srcu)
* 5) *DEADLOCK*
*
* It is considered safe to release tasks_rcu_exit_srcu here
* because we assume the current task can not be concurrently
* reaped at this point.
*/
exit_tasks_rcu_stop();
schedule(); schedule();
exit_tasks_rcu_start();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "rcu.h" #include "rcu.h"
MODULE_DESCRIPTION("Read-Copy Update module-based scalability-test facility");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include "rcu.h" #include "rcu.h"
MODULE_DESCRIPTION("Read-Copy Update module-based torture test facility");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
...@@ -390,6 +391,7 @@ struct rcu_torture_ops { ...@@ -390,6 +391,7 @@ struct rcu_torture_ops {
int extendables; int extendables;
int slow_gps; int slow_gps;
int no_pi_lock; int no_pi_lock;
int debug_objects;
const char *name; const char *name;
}; };
...@@ -577,6 +579,7 @@ static struct rcu_torture_ops rcu_ops = { ...@@ -577,6 +579,7 @@ static struct rcu_torture_ops rcu_ops = {
.irq_capable = 1, .irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST), .can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND, .extendables = RCUTORTURE_MAX_EXTEND,
.debug_objects = 1,
.name = "rcu" .name = "rcu"
}; };
...@@ -747,6 +750,7 @@ static struct rcu_torture_ops srcu_ops = { ...@@ -747,6 +750,7 @@ static struct rcu_torture_ops srcu_ops = {
.cbflood_max = 50000, .cbflood_max = 50000,
.irq_capable = 1, .irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcu" .name = "srcu"
}; };
...@@ -786,6 +790,7 @@ static struct rcu_torture_ops srcud_ops = { ...@@ -786,6 +790,7 @@ static struct rcu_torture_ops srcud_ops = {
.cbflood_max = 50000, .cbflood_max = 50000,
.irq_capable = 1, .irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcud" .name = "srcud"
}; };
...@@ -2626,7 +2631,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) ...@@ -2626,7 +2631,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
spin_lock_irqsave(&rfp->rcu_fwd_lock, flags); spin_lock_irqsave(&rfp->rcu_fwd_lock, flags);
rfcpp = rfp->rcu_fwd_cb_tail; rfcpp = rfp->rcu_fwd_cb_tail;
rfp->rcu_fwd_cb_tail = &rfcp->rfc_next; rfp->rcu_fwd_cb_tail = &rfcp->rfc_next;
WRITE_ONCE(*rfcpp, rfcp); smp_store_release(rfcpp, rfcp);
WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1); WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1);
i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV)); i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV));
if (i >= ARRAY_SIZE(rfp->n_launders_hist)) if (i >= ARRAY_SIZE(rfp->n_launders_hist))
...@@ -3455,7 +3460,6 @@ rcu_torture_cleanup(void) ...@@ -3455,7 +3460,6 @@ rcu_torture_cleanup(void)
cur_ops->gp_slow_unregister(NULL); cur_ops->gp_slow_unregister(NULL);
} }
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
static void rcu_torture_leak_cb(struct rcu_head *rhp) static void rcu_torture_leak_cb(struct rcu_head *rhp)
{ {
} }
...@@ -3473,7 +3477,6 @@ static void rcu_torture_err_cb(struct rcu_head *rhp) ...@@ -3473,7 +3477,6 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
*/ */
pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME); pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
} }
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
/* /*
* Verify that double-free causes debug-objects to complain, but only * Verify that double-free causes debug-objects to complain, but only
...@@ -3482,39 +3485,43 @@ static void rcu_torture_err_cb(struct rcu_head *rhp) ...@@ -3482,39 +3485,43 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
*/ */
static void rcu_test_debug_objects(void) static void rcu_test_debug_objects(void)
{ {
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
struct rcu_head rh1; struct rcu_head rh1;
struct rcu_head rh2; struct rcu_head rh2;
int idx;
if (!IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) {
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_%s()\n",
KBUILD_MODNAME, cur_ops->name);
return;
}
if (WARN_ON_ONCE(cur_ops->debug_objects &&
(!cur_ops->call || !cur_ops->cb_barrier)))
return;
struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
init_rcu_head_on_stack(&rh1); init_rcu_head_on_stack(&rh1);
init_rcu_head_on_stack(&rh2); init_rcu_head_on_stack(&rh2);
pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME); pr_alert("%s: WARN: Duplicate call_%s() test starting.\n", KBUILD_MODNAME, cur_ops->name);
/* Try to queue the rh2 pair of callbacks for the same grace period. */ /* Try to queue the rh2 pair of callbacks for the same grace period. */
preempt_disable(); /* Prevent preemption from interrupting test. */ idx = cur_ops->readlock(); /* Make it impossible to finish a grace period. */
rcu_read_lock(); /* Make it impossible to finish a grace period. */ cur_ops->call(&rh1, rcu_torture_leak_cb); /* Start grace period. */
call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */ cur_ops->call(&rh2, rcu_torture_leak_cb);
local_irq_disable(); /* Make it harder to start a new grace period. */ cur_ops->call(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
call_rcu_hurry(&rh2, rcu_torture_leak_cb);
call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
if (rhp) { if (rhp) {
call_rcu_hurry(rhp, rcu_torture_leak_cb); cur_ops->call(rhp, rcu_torture_leak_cb);
call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ cur_ops->call(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
} }
local_irq_enable(); cur_ops->readunlock(idx);
rcu_read_unlock();
preempt_enable();
/* Wait for them all to get done so we can safely return. */ /* Wait for them all to get done so we can safely return. */
rcu_barrier(); cur_ops->cb_barrier();
pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME); pr_alert("%s: WARN: Duplicate call_%s() test complete.\n", KBUILD_MODNAME, cur_ops->name);
destroy_rcu_head_on_stack(&rh1); destroy_rcu_head_on_stack(&rh1);
destroy_rcu_head_on_stack(&rh2); destroy_rcu_head_on_stack(&rh2);
kfree(rhp); kfree(rhp);
#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
} }
static void rcutorture_sync(void) static void rcutorture_sync(void)
......
...@@ -63,6 +63,7 @@ do { \ ...@@ -63,6 +63,7 @@ do { \
#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
MODULE_DESCRIPTION("Scalability test for object reference mechanisms");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
......
...@@ -277,7 +277,8 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) ...@@ -277,7 +277,8 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
unsigned long cur_s = READ_ONCE(ssp->srcu_idx); unsigned long cur_s = READ_ONCE(ssp->srcu_idx);
barrier(); barrier();
return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3); return cookie == SRCU_GET_STATE_COMPLETED ||
ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3);
} }
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
......
...@@ -667,7 +667,10 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) ...@@ -667,7 +667,10 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
__func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)), __func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed); rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
return; /* Caller forgot to stop doing call_srcu()? */ return; // Caller forgot to stop doing call_srcu()?
// Or caller invoked start_poll_synchronize_srcu()
// and then cleanup_srcu_struct() before that grace
// period ended?
} }
kfree(sup->node); kfree(sup->node);
sup->node = NULL; sup->node = NULL;
...@@ -845,7 +848,6 @@ static void srcu_gp_end(struct srcu_struct *ssp) ...@@ -845,7 +848,6 @@ static void srcu_gp_end(struct srcu_struct *ssp)
bool cbs; bool cbs;
bool last_lvl; bool last_lvl;
int cpu; int cpu;
unsigned long flags;
unsigned long gpseq; unsigned long gpseq;
int idx; int idx;
unsigned long mask; unsigned long mask;
...@@ -907,12 +909,12 @@ static void srcu_gp_end(struct srcu_struct *ssp) ...@@ -907,12 +909,12 @@ static void srcu_gp_end(struct srcu_struct *ssp)
if (!(gpseq & counter_wrap_check)) if (!(gpseq & counter_wrap_check))
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu); sdp = per_cpu_ptr(ssp->sda, cpu);
spin_lock_irqsave_rcu_node(sdp, flags); spin_lock_irq_rcu_node(sdp);
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq; sdp->srcu_gp_seq_needed = gpseq;
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
sdp->srcu_gp_seq_needed_exp = gpseq; sdp->srcu_gp_seq_needed_exp = gpseq;
spin_unlock_irqrestore_rcu_node(sdp, flags); spin_unlock_irq_rcu_node(sdp);
} }
/* Callback initiation done, allow grace periods after next. */ /* Callback initiation done, allow grace periods after next. */
...@@ -1540,7 +1542,8 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); ...@@ -1540,7 +1542,8 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
*/ */
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{ {
if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) if (cookie != SRCU_GET_STATE_COMPLETED &&
!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false; return false;
// Ensure that the end of the SRCU grace period happens before // Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute. // any subsequent code that the caller might execute.
......
...@@ -122,7 +122,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) ...@@ -122,7 +122,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
* we are called at early boot time but this shouldn't happen. * we are called at early boot time but this shouldn't happen.
*/ */
} }
WRITE_ONCE(rsp->gp_count, rsp->gp_count + 1); rsp->gp_count++;
spin_unlock_irq(&rsp->rss_lock); spin_unlock_irq(&rsp->rss_lock);
if (gp_state == GP_IDLE) { if (gp_state == GP_IDLE) {
...@@ -151,15 +151,11 @@ void rcu_sync_enter(struct rcu_sync *rsp) ...@@ -151,15 +151,11 @@ void rcu_sync_enter(struct rcu_sync *rsp)
*/ */
void rcu_sync_exit(struct rcu_sync *rsp) void rcu_sync_exit(struct rcu_sync *rsp)
{ {
int gpc;
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE); WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
spin_lock_irq(&rsp->rss_lock); spin_lock_irq(&rsp->rss_lock);
gpc = rsp->gp_count - 1; WARN_ON_ONCE(rsp->gp_count == 0);
WRITE_ONCE(rsp->gp_count, gpc); if (!--rsp->gp_count) {
if (!gpc) {
if (rsp->gp_state == GP_PASSED) { if (rsp->gp_state == GP_PASSED) {
WRITE_ONCE(rsp->gp_state, GP_EXIT); WRITE_ONCE(rsp->gp_state, GP_EXIT);
rcu_sync_call(rsp); rcu_sync_call(rsp);
...@@ -178,10 +174,10 @@ void rcu_sync_dtor(struct rcu_sync *rsp) ...@@ -178,10 +174,10 @@ void rcu_sync_dtor(struct rcu_sync *rsp)
{ {
int gp_state; int gp_state;
WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED); WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irq(&rsp->rss_lock); spin_lock_irq(&rsp->rss_lock);
WARN_ON_ONCE(rsp->gp_count);
if (rsp->gp_state == GP_REPLAY) if (rsp->gp_state == GP_REPLAY)
WRITE_ONCE(rsp->gp_state, GP_EXIT); WRITE_ONCE(rsp->gp_state, GP_EXIT);
gp_state = rsp->gp_state; gp_state = rsp->gp_state;
......
...@@ -858,7 +858,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) ...@@ -858,7 +858,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// not know to synchronize with this RCU Tasks grace period) have // not know to synchronize with this RCU Tasks grace period) have
// completed exiting. The synchronize_rcu() in rcu_tasks_postgp() // completed exiting. The synchronize_rcu() in rcu_tasks_postgp()
// will take care of any tasks stuck in the non-preemptible region // will take care of any tasks stuck in the non-preemptible region
// of do_exit() following its call to exit_tasks_rcu_stop(). // of do_exit() following its call to exit_tasks_rcu_finish().
// check_all_holdout_tasks(), repeatedly until holdout list is empty: // check_all_holdout_tasks(), repeatedly until holdout list is empty:
// Scans the holdout list, attempting to identify a quiescent state // Scans the holdout list, attempting to identify a quiescent state
// for each task on the list. If there is a quiescent state, the // for each task on the list. If there is a quiescent state, the
...@@ -1220,7 +1220,7 @@ void exit_tasks_rcu_start(void) ...@@ -1220,7 +1220,7 @@ void exit_tasks_rcu_start(void)
* Remove the task from the "yet another list" because do_exit() is now * Remove the task from the "yet another list" because do_exit() is now
* non-preemptible, allowing synchronize_rcu() to wait beyond this point. * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
*/ */
void exit_tasks_rcu_stop(void) void exit_tasks_rcu_finish(void)
{ {
unsigned long flags; unsigned long flags;
struct rcu_tasks_percpu *rtpcp; struct rcu_tasks_percpu *rtpcp;
...@@ -1231,22 +1231,12 @@ void exit_tasks_rcu_stop(void) ...@@ -1231,22 +1231,12 @@ void exit_tasks_rcu_stop(void)
raw_spin_lock_irqsave_rcu_node(rtpcp, flags); raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
list_del_init(&t->rcu_tasks_exit_list); list_del_init(&t->rcu_tasks_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
/* exit_tasks_rcu_finish_trace(t);
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_finish(void)
{
exit_tasks_rcu_stop();
exit_tasks_rcu_finish_trace(current);
} }
#else /* #ifdef CONFIG_TASKS_RCU */ #else /* #ifdef CONFIG_TASKS_RCU */
void exit_tasks_rcu_start(void) { } void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_stop(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */ #endif /* #else #ifdef CONFIG_TASKS_RCU */
...@@ -1757,6 +1747,16 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop) ...@@ -1757,6 +1747,16 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop)
// allow safe access to the hop list. // allow safe access to the hop list.
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
rcu_read_lock(); rcu_read_lock();
// Note that cpu_curr_snapshot() picks up the target
// CPU's current task while its runqueue is locked with
// an smp_mb__after_spinlock(). This ensures that either
// the grace-period kthread will see that task's read-side
// critical section or the task will see the updater's pre-GP
// accesses. The trailing smp_mb() in cpu_curr_snapshot()
// does not currently play a role other than simplify
// that function's ordering semantics. If these simplified
// ordering semantics continue to be redundant, that smp_mb()
// might be removed.
t = cpu_curr_snapshot(cpu); t = cpu_curr_snapshot(cpu);
if (rcu_tasks_trace_pertask_prep(t, true)) if (rcu_tasks_trace_pertask_prep(t, true))
trc_add_holdout(t, hop); trc_add_holdout(t, hop);
......
...@@ -96,6 +96,7 @@ static struct rcu_state rcu_state = { ...@@ -96,6 +96,7 @@ static struct rcu_state rcu_state = {
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED, .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
.srs_cleanup_work = __WORK_INITIALIZER(rcu_state.srs_cleanup_work, .srs_cleanup_work = __WORK_INITIALIZER(rcu_state.srs_cleanup_work,
rcu_sr_normal_gp_cleanup_work), rcu_sr_normal_gp_cleanup_work),
.srs_cleanups_pending = ATOMIC_INIT(0),
}; };
/* Dump rcu_node combining tree at boot to verify correct setup. */ /* Dump rcu_node combining tree at boot to verify correct setup. */
...@@ -175,6 +176,9 @@ static int gp_init_delay; ...@@ -175,6 +176,9 @@ static int gp_init_delay;
module_param(gp_init_delay, int, 0444); module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay; static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444); module_param(gp_cleanup_delay, int, 0444);
static int nohz_full_patience_delay;
module_param(nohz_full_patience_delay, int, 0444);
static int nohz_full_patience_delay_jiffies;
// Add delay to rcu_read_unlock() for strict grace periods. // Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay; static int rcu_unlock_delay;
...@@ -295,16 +299,6 @@ static void rcu_dynticks_eqs_online(void) ...@@ -295,16 +299,6 @@ static void rcu_dynticks_eqs_online(void)
ct_state_inc(RCU_DYNTICKS_IDX); ct_state_inc(RCU_DYNTICKS_IDX);
} }
/*
* Snapshot the ->dynticks counter with full ordering so as to allow
* stable comparison of this counter with past and future snapshots.
*/
static int rcu_dynticks_snap(int cpu)
{
smp_mb(); // Fundamental RCU ordering guarantee.
return ct_dynticks_cpu_acquire(cpu);
}
/* /*
* Return true if the snapshot returned from rcu_dynticks_snap() * Return true if the snapshot returned from rcu_dynticks_snap()
* indicates that RCU is in an extended quiescent state. * indicates that RCU is in an extended quiescent state.
...@@ -321,7 +315,15 @@ static bool rcu_dynticks_in_eqs(int snap) ...@@ -321,7 +315,15 @@ static bool rcu_dynticks_in_eqs(int snap)
*/ */
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap) static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
{ {
return snap != rcu_dynticks_snap(rdp->cpu); /*
* The first failing snapshot is already ordered against the accesses
* performed by the remote CPU after it exits idle.
*
* The second snapshot therefore only needs to order against accesses
* performed by the remote CPU prior to entering idle and therefore can
* rely solely on acquire semantics.
*/
return snap != ct_dynticks_cpu_acquire(rdp->cpu);
} }
/* /*
...@@ -769,7 +771,18 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) ...@@ -769,7 +771,18 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
*/ */
static int dyntick_save_progress_counter(struct rcu_data *rdp) static int dyntick_save_progress_counter(struct rcu_data *rdp)
{ {
rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu); /*
* Full ordering between remote CPU's post idle accesses and updater's
* accesses prior to current GP (and also the started GP sequence number)
* is enforced by rcu_seq_start() implicit barrier and even further by
* smp_mb__after_unlock_lock() barriers chained all the way throughout the
* rnp locking tree since rcu_gp_init() and up to the current leaf rnp
* locking.
*
* Ordering between remote CPU's pre idle accesses and post grace period
* updater's accesses is enforced by the below acquire semantic.
*/
rdp->dynticks_snap = ct_dynticks_cpu_acquire(rdp->cpu);
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
rcu_gpnum_ovf(rdp->mynode, rdp); rcu_gpnum_ovf(rdp->mynode, rdp);
...@@ -1660,6 +1673,9 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work) ...@@ -1660,6 +1673,9 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
rcu_sr_put_wait_head(rcu); rcu_sr_put_wait_head(rcu);
} }
/* Order list manipulations with atomic access. */
atomic_dec_return_release(&rcu_state.srs_cleanups_pending);
} }
/* /*
...@@ -1667,7 +1683,7 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work) ...@@ -1667,7 +1683,7 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
*/ */
static void rcu_sr_normal_gp_cleanup(void) static void rcu_sr_normal_gp_cleanup(void)
{ {
struct llist_node *wait_tail, *next, *rcu; struct llist_node *wait_tail, *next = NULL, *rcu = NULL;
int done = 0; int done = 0;
wait_tail = rcu_state.srs_wait_tail; wait_tail = rcu_state.srs_wait_tail;
...@@ -1693,16 +1709,34 @@ static void rcu_sr_normal_gp_cleanup(void) ...@@ -1693,16 +1709,34 @@ static void rcu_sr_normal_gp_cleanup(void)
break; break;
} }
// concurrent sr_normal_gp_cleanup work might observe this update. /*
smp_store_release(&rcu_state.srs_done_tail, wait_tail); * Fast path, no more users to process except putting the second last
* wait head if no inflight-workers. If there are in-flight workers,
* they will remove the last wait head.
*
* Note that the ACQUIRE orders atomic access with list manipulation.
*/
if (wait_tail->next && wait_tail->next->next == NULL &&
rcu_sr_is_wait_head(wait_tail->next) &&
!atomic_read_acquire(&rcu_state.srs_cleanups_pending)) {
rcu_sr_put_wait_head(wait_tail->next);
wait_tail->next = NULL;
}
/* Concurrent sr_normal_gp_cleanup work might observe this update. */
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail); ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
smp_store_release(&rcu_state.srs_done_tail, wait_tail);
/* /*
* We schedule a work in order to perform a final processing * We schedule a work in order to perform a final processing
* of outstanding users(if still left) and releasing wait-heads * of outstanding users(if still left) and releasing wait-heads
* added by rcu_sr_normal_gp_init() call. * added by rcu_sr_normal_gp_init() call.
*/ */
queue_work(sync_wq, &rcu_state.srs_cleanup_work); if (wait_tail->next) {
atomic_inc(&rcu_state.srs_cleanups_pending);
if (!queue_work(sync_wq, &rcu_state.srs_cleanup_work))
atomic_dec(&rcu_state.srs_cleanups_pending);
}
} }
/* /*
...@@ -1810,7 +1844,7 @@ static noinline_for_stack bool rcu_gp_init(void) ...@@ -1810,7 +1844,7 @@ static noinline_for_stack bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
/* Exclude CPU hotplug operations. */ /* Exclude CPU hotplug operations. */
rcu_for_each_leaf_node(rnp) { rcu_for_each_leaf_node(rnp) {
local_irq_save(flags); local_irq_disable();
arch_spin_lock(&rcu_state.ofl_lock); arch_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_rcu_node(rnp); raw_spin_lock_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext && if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
...@@ -1818,7 +1852,7 @@ static noinline_for_stack bool rcu_gp_init(void) ...@@ -1818,7 +1852,7 @@ static noinline_for_stack bool rcu_gp_init(void)
/* Nothing to do on this leaf rcu_node structure. */ /* Nothing to do on this leaf rcu_node structure. */
raw_spin_unlock_rcu_node(rnp); raw_spin_unlock_rcu_node(rnp);
arch_spin_unlock(&rcu_state.ofl_lock); arch_spin_unlock(&rcu_state.ofl_lock);
local_irq_restore(flags); local_irq_enable();
continue; continue;
} }
...@@ -1855,7 +1889,7 @@ static noinline_for_stack bool rcu_gp_init(void) ...@@ -1855,7 +1889,7 @@ static noinline_for_stack bool rcu_gp_init(void)
raw_spin_unlock_rcu_node(rnp); raw_spin_unlock_rcu_node(rnp);
arch_spin_unlock(&rcu_state.ofl_lock); arch_spin_unlock(&rcu_state.ofl_lock);
local_irq_restore(flags); local_irq_enable();
} }
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
...@@ -4313,11 +4347,15 @@ static int rcu_pending(int user) ...@@ -4313,11 +4347,15 @@ static int rcu_pending(int user)
return 1; return 1;
/* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */ /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */
if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu()) gp_in_progress = rcu_gp_in_progress();
if ((user || rcu_is_cpu_rrupt_from_idle() ||
(gp_in_progress &&
time_before(jiffies, READ_ONCE(rcu_state.gp_start) +
nohz_full_patience_delay_jiffies))) &&
rcu_nohz_full_cpu())
return 0; return 0;
/* Is the RCU core waiting for a quiescent state from this CPU? */ /* Is the RCU core waiting for a quiescent state from this CPU? */
gp_in_progress = rcu_gp_in_progress();
if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)
return 1; return 1;
...@@ -4767,7 +4805,7 @@ rcu_boot_init_percpu_data(int cpu) ...@@ -4767,7 +4805,7 @@ rcu_boot_init_percpu_data(int cpu)
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
INIT_WORK(&rdp->strict_work, strict_work_handler); INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(ct->dynticks_nesting != 1); WARN_ON_ONCE(ct->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu))); WARN_ON_ONCE(rcu_dynticks_in_eqs(ct_dynticks_cpu(cpu)));
rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->barrier_seq_snap = rcu_state.barrier_sequence;
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
rdp->rcu_ofl_gp_state = RCU_GP_CLEANED; rdp->rcu_ofl_gp_state = RCU_GP_CLEANED;
...@@ -5110,11 +5148,15 @@ void rcutree_migrate_callbacks(int cpu) ...@@ -5110,11 +5148,15 @@ void rcutree_migrate_callbacks(int cpu)
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
bool needwake; bool needwake;
if (rcu_rdp_is_offloaded(rdp) || if (rcu_rdp_is_offloaded(rdp))
rcu_segcblist_empty(&rdp->cblist)) return;
return; /* No callbacks to migrate. */
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
if (rcu_segcblist_empty(&rdp->cblist)) {
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
return; /* No callbacks to migrate. */
}
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp)); WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
rcu_barrier_entrain(rdp); rcu_barrier_entrain(rdp);
my_rdp = this_cpu_ptr(&rcu_data); my_rdp = this_cpu_ptr(&rcu_data);
......
...@@ -223,7 +223,6 @@ struct rcu_data { ...@@ -223,7 +223,6 @@ struct rcu_data {
struct swait_queue_head nocb_state_wq; /* For offloading state changes */ struct swait_queue_head nocb_state_wq; /* For offloading state changes */
struct task_struct *nocb_gp_kthread; struct task_struct *nocb_gp_kthread;
raw_spinlock_t nocb_lock; /* Guard following pair of fields. */ raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
atomic_t nocb_lock_contended; /* Contention experienced. */
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
struct timer_list nocb_timer; /* Enforce finite deferral. */ struct timer_list nocb_timer; /* Enforce finite deferral. */
unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */ unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */
...@@ -420,6 +419,7 @@ struct rcu_state { ...@@ -420,6 +419,7 @@ struct rcu_state {
struct llist_node *srs_done_tail; /* ready for GP users. */ struct llist_node *srs_done_tail; /* ready for GP users. */
struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX]; struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX];
struct work_struct srs_cleanup_work; struct work_struct srs_cleanup_work;
atomic_t srs_cleanups_pending; /* srs inflight worker cleanups. */
}; };
/* Values for rcu_state structure's gp_flags field. */ /* Values for rcu_state structure's gp_flags field. */
......
...@@ -265,7 +265,12 @@ static bool sync_exp_work_done(unsigned long s) ...@@ -265,7 +265,12 @@ static bool sync_exp_work_done(unsigned long s)
{ {
if (rcu_exp_gp_seq_done(s)) { if (rcu_exp_gp_seq_done(s)) {
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done")); trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
smp_mb(); /* Ensure test happens before caller kfree(). */ /*
* Order GP completion with preceding accesses. Order also GP
* completion with post GP update side accesses. Pairs with
* rcu_seq_end().
*/
smp_mb();
return true; return true;
} }
return false; return false;
...@@ -357,7 +362,21 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) ...@@ -357,7 +362,21 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
!(rnp->qsmaskinitnext & mask)) { !(rnp->qsmaskinitnext & mask)) {
mask_ofl_test |= mask; mask_ofl_test |= mask;
} else { } else {
snap = rcu_dynticks_snap(cpu); /*
* Full ordering between remote CPU's post idle accesses
* and updater's accesses prior to current GP (and also
* the started GP sequence number) is enforced by
* rcu_seq_start() implicit barrier, relayed by kworkers
* locking and even further by smp_mb__after_unlock_lock()
* barriers chained all the way throughout the rnp locking
* tree since sync_exp_reset_tree() and up to the current
* leaf rnp locking.
*
* Ordering between remote CPU's pre idle accesses and
* post grace period updater's accesses is enforced by the
* below acquire semantic.
*/
snap = ct_dynticks_cpu_acquire(cpu);
if (rcu_dynticks_in_eqs(snap)) if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask; mask_ofl_test |= mask;
else else
...@@ -953,7 +972,6 @@ void synchronize_rcu_expedited(void) ...@@ -953,7 +972,6 @@ void synchronize_rcu_expedited(void)
rnp = rcu_get_root(); rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s)); sync_exp_work_done(s));
smp_mb(); /* Work actions happen before return. */
/* Let the next expedited grace period start. */ /* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex); mutex_unlock(&rcu_state.exp_mutex);
......
This diff is collapsed.
...@@ -28,8 +28,8 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) ...@@ -28,8 +28,8 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
!(lockdep_is_held(&rcu_state.barrier_mutex) || !(lockdep_is_held(&rcu_state.barrier_mutex) ||
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
rcu_lockdep_is_held_nocb(rdp) || rcu_lockdep_is_held_nocb(rdp) ||
(rdp == this_cpu_ptr(&rcu_data) && (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) || rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)), rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state" "Unsafe read of RCU_NOCB offloaded state"
); );
...@@ -93,6 +93,16 @@ static void __init rcu_bootup_announce_oddness(void) ...@@ -93,6 +93,16 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay) if (gp_cleanup_delay)
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay); pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
if (nohz_full_patience_delay < 0) {
pr_info("\tRCU NOCB CPU patience negative (%d), resetting to zero.\n", nohz_full_patience_delay);
nohz_full_patience_delay = 0;
} else if (nohz_full_patience_delay > 5 * MSEC_PER_SEC) {
pr_info("\tRCU NOCB CPU patience too large (%d), resetting to %ld.\n", nohz_full_patience_delay, 5 * MSEC_PER_SEC);
nohz_full_patience_delay = 5 * MSEC_PER_SEC;
} else if (nohz_full_patience_delay) {
pr_info("\tRCU NOCB CPU patience set to %d milliseconds.\n", nohz_full_patience_delay);
}
nohz_full_patience_delay_jiffies = msecs_to_jiffies(nohz_full_patience_delay);
if (!use_softirq) if (!use_softirq)
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n"); pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
......
...@@ -501,7 +501,7 @@ static void print_cpu_stall_info(int cpu) ...@@ -501,7 +501,7 @@ static void print_cpu_stall_info(int cpu)
} }
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
falsepositive = rcu_is_gp_kthread_starving(NULL) && falsepositive = rcu_is_gp_kthread_starving(NULL) &&
rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); rcu_dynticks_in_eqs(ct_dynticks_cpu(cpu));
rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j); rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j);
if (rcuc_starved) if (rcuc_starved)
// Print signed value, as negative values indicate a probable bug. // Print signed value, as negative values indicate a probable bug.
...@@ -515,7 +515,7 @@ static void print_cpu_stall_info(int cpu) ...@@ -515,7 +515,7 @@ static void print_cpu_stall_info(int cpu)
rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
"!."[!delta], "!."[!delta],
ticks_value, ticks_title, ticks_value, ticks_title,
rcu_dynticks_snap(cpu) & 0xffff, ct_dynticks_cpu(cpu) & 0xffff,
ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu), ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu),
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
......
...@@ -4467,12 +4467,7 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg) ...@@ -4467,12 +4467,7 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
* @cpu: The CPU on which to snapshot the task. * @cpu: The CPU on which to snapshot the task.
* *
* Returns the task_struct pointer of the task "currently" running on * Returns the task_struct pointer of the task "currently" running on
* the specified CPU. If the same task is running on that CPU throughout, * the specified CPU.
* the return value will be a pointer to that task's task_struct structure.
* If the CPU did any context switches even vaguely concurrently with the
* execution of this function, the return value will be a pointer to the
* task_struct structure of a randomly chosen task that was running on
* that CPU somewhere around the time that this function was executing.
* *
* If the specified CPU was offline, the return value is whatever it * If the specified CPU was offline, the return value is whatever it
* is, perhaps a pointer to the task_struct structure of that CPU's idle * is, perhaps a pointer to the task_struct structure of that CPU's idle
...@@ -4486,11 +4481,16 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg) ...@@ -4486,11 +4481,16 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
*/ */
struct task_struct *cpu_curr_snapshot(int cpu) struct task_struct *cpu_curr_snapshot(int cpu)
{ {
struct rq *rq = cpu_rq(cpu);
struct task_struct *t; struct task_struct *t;
struct rq_flags rf;
smp_mb(); /* Pairing determined by caller's synchronization design. */ rq_lock_irqsave(rq, &rf);
smp_mb__after_spinlock(); /* Pairing determined by caller's synchronization design. */
t = rcu_dereference(cpu_curr(cpu)); t = rcu_dereference(cpu_curr(cpu));
rq_unlock_irqrestore(rq, &rf);
smp_mb(); /* Pairing determined by caller's synchronization design. */ smp_mb(); /* Pairing determined by caller's synchronization design. */
return t; return t;
} }
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
# Run bpftrace to obtain a histogram of the types of primitives used to
# initiate RCU grace periods. The count associated with rcu_gp_init()
# is the number of normal (non-expedited) grace periods.
#
# Usage: rcu-updaters.sh [ duration-in-seconds ]
#
# Note that not all kernel builds have all of these functions. In those
# that do not, this script will issue a diagnostic for each that is not
# found, but continue normally for the rest of the functions.
duration=${1}
if test -n "${duration}"
then
exitclause='interval:s:'"${duration}"' { exit(); }'
else
echo 'Hit control-C to end sample and print results.'
fi
bpftrace -e 'kprobe:kvfree_call_rcu,
kprobe:call_rcu,
kprobe:call_rcu_tasks,
kprobe:call_rcu_tasks_rude,
kprobe:call_rcu_tasks_trace,
kprobe:call_srcu,
kprobe:rcu_barrier,
kprobe:rcu_barrier_tasks,
kprobe:rcu_barrier_tasks_rude,
kprobe:rcu_barrier_tasks_trace,
kprobe:srcu_barrier,
kprobe:synchronize_rcu,
kprobe:synchronize_rcu_expedited,
kprobe:synchronize_rcu_tasks,
kprobe:synchronize_rcu_tasks_rude,
kprobe:synchronize_rcu_tasks_trace,
kprobe:synchronize_srcu,
kprobe:synchronize_srcu_expedited,
kprobe:get_state_synchronize_rcu,
kprobe:get_state_synchronize_rcu_full,
kprobe:start_poll_synchronize_rcu,
kprobe:start_poll_synchronize_rcu_expedited,
kprobe:start_poll_synchronize_rcu_full,
kprobe:start_poll_synchronize_rcu_expedited_full,
kprobe:poll_state_synchronize_rcu,
kprobe:poll_state_synchronize_rcu_full,
kprobe:cond_synchronize_rcu,
kprobe:cond_synchronize_rcu_full,
kprobe:start_poll_synchronize_srcu,
kprobe:poll_state_synchronize_srcu,
kprobe:rcu_gp_init
{ @counts[func] = count(); } '"${exitclause}"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment