Commit 4057adaf authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar:

 - updates to the handling of expedited grace periods

 - updates to reduce lock contention in the rcu_node combining tree

   [ These are in preparation for the consolidation of RCU-bh,
     RCU-preempt, and RCU-sched into a single flavor, which was
     requested by Linus in response to a security flaw whose root cause
     included confusion between the multiple flavors of RCU ]

 - torture-test updates that save their users some time and effort

 - miscellaneous fixes

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  rcu/x86: Provide early rcu_cpu_starting() callback
  torture: Make kvm-find-errors.sh find build warnings
  rcutorture: Abbreviate kvm.sh summary lines
  rcutorture: Print end-of-test state in kvm.sh summary
  rcutorture: Print end-of-test state
  torture: Fold parse-torture.sh into parse-console.sh
  torture: Add a script to edit output from failed runs
  rcu: Update list of rcu_future_grace_period() trace events
  rcu: Drop early GP request check from rcu_gp_kthread()
  rcu: Simplify and inline cpu_needs_another_gp()
  rcu: The rcu_gp_cleanup() function does not need cpu_needs_another_gp()
  rcu: Make rcu_start_this_gp() check for out-of-range requests
  rcu: Add funnel locking to rcu_start_this_gp()
  rcu: Make rcu_start_future_gp() caller select grace period
  rcu: Inline rcu_start_gp_advanced() into rcu_start_future_gp()
  rcu: Clear request other than RCU_GP_FLAG_INIT at GP end
  rcu: Cleanup, don't put ->completed into an int
  rcu: Switch __rcu_process_callbacks() to rcu_accelerate_cbs()
  rcu: Avoid __call_rcu_core() root rcu_node ->lock acquisition
  rcu: Make rcu_migrate_callbacks wake GP kthread when needed
  ...
parents 137f5ae4 52f2b34f
What is RCU? -- "Read, Copy, Update"
Please note that the "What is RCU?" LWN series is an excellent place Please note that the "What is RCU?" LWN series is an excellent place
to start learning about RCU: to start learning about RCU:
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <linux/rcupdate.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/e820/api.h> #include <asm/e820/api.h>
...@@ -793,6 +794,9 @@ void mtrr_ap_init(void) ...@@ -793,6 +794,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init) if (!use_intel() || mtrr_aps_delayed_init)
return; return;
rcu_cpu_starting(smp_processor_id());
/* /*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries * Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time, * changed, but this routine will be called in cpu boot time,
......
...@@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref) ...@@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head); nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance); ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry); list_del_init(&head->entry);
cleanup_srcu_struct(&head->srcu); cleanup_srcu_struct_quiesced(&head->srcu);
nvme_put_subsystem(head->subsys); nvme_put_subsystem(head->subsys);
kfree(head); kfree(head);
} }
......
...@@ -108,7 +108,6 @@ void rcu_sched_qs(void); ...@@ -108,7 +108,6 @@ void rcu_sched_qs(void);
void rcu_bh_qs(void); void rcu_bh_qs(void);
void rcu_check_callbacks(int user); void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu); void rcu_report_dead(unsigned int cpu);
void rcu_cpu_starting(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu); void rcutree_migrate_callbacks(int cpu);
#ifdef CONFIG_RCU_STALL_COMMON #ifdef CONFIG_RCU_STALL_COMMON
...@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { } ...@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */ #endif /* #else #ifdef CONFIG_TASKS_RCU */
/** /**
* cond_resched_rcu_qs - Report potential quiescent states to RCU * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
* *
* This macro resembles cond_resched(), except that it is defined to * This macro resembles cond_resched(), except that it is defined to
* report potential quiescent states to RCU-tasks even if the cond_resched() * report potential quiescent states to RCU-tasks even if the cond_resched()
* machinery were to be shut off, as some advocate for PREEMPT kernels. * machinery were to be shut off, as some advocate for PREEMPT kernels.
*/ */
#define cond_resched_rcu_qs() \ #define cond_resched_tasks_rcu_qs() \
do { \ do { \
if (!cond_resched()) \ if (!cond_resched()) \
rcu_note_voluntary_context_switch_lite(current); \ rcu_note_voluntary_context_switch_lite(current); \
......
...@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); } ...@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }
#define rcutree_offline_cpu NULL #define rcutree_offline_cpu NULL
#define rcutree_dead_cpu NULL #define rcutree_dead_cpu NULL
#define rcutree_dying_cpu NULL #define rcutree_dying_cpu NULL
static inline void rcu_cpu_starting(unsigned int cpu) { }
#endif /* __LINUX_RCUTINY_H */ #endif /* __LINUX_RCUTINY_H */
...@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void) ...@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
void rcu_barrier(void); void rcu_barrier(void);
void rcu_barrier_bh(void); void rcu_barrier_bh(void);
void rcu_barrier_sched(void); void rcu_barrier_sched(void);
bool rcu_eqs_special_set(int cpu);
unsigned long get_state_synchronize_rcu(void); unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate);
unsigned long get_state_synchronize_sched(void); unsigned long get_state_synchronize_sched(void);
...@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu); ...@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);
int rcutree_offline_cpu(unsigned int cpu); int rcutree_offline_cpu(unsigned int cpu);
int rcutree_dead_cpu(unsigned int cpu); int rcutree_dead_cpu(unsigned int cpu);
int rcutree_dying_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu);
void rcu_cpu_starting(unsigned int cpu);
#endif /* __LINUX_RCUTREE_H */ #endif /* __LINUX_RCUTREE_H */
...@@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) ...@@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* explicit rescheduling in places that are safe. The return * explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact. * value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling, * cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/ */
#ifndef CONFIG_PREEMPT #ifndef CONFIG_PREEMPT
extern int _cond_resched(void); extern int _cond_resched(void);
...@@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock); ...@@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \ __cond_resched_lock(lock); \
}) })
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})
static inline void cond_resched_rcu(void) static inline void cond_resched_rcu(void)
{ {
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
......
...@@ -69,11 +69,45 @@ struct srcu_struct { }; ...@@ -69,11 +69,45 @@ struct srcu_struct { };
void call_srcu(struct srcu_struct *sp, struct rcu_head *head, void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head)); void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *sp); void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp); void synchronize_srcu(struct srcu_struct *sp);
/**
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
static inline void cleanup_srcu_struct(struct srcu_struct *sp)
{
_cleanup_srcu_struct(sp, false);
}
/**
* cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory. Also,
* all grace-period processing must have completed.
*
* "Completed" means that the last synchronize_srcu() and
* synchronize_srcu_expedited() calls must have returned before the call
* to cleanup_srcu_struct_quiesced(). It also means that the callback
* from the last call_srcu() must have been invoked before the call to
* cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
* with this last. Violating these rules will get you a WARN_ON() splat
* (with high probability, anyway), and will also cause the srcu_struct
* to be leaked.
*/
static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
{
_cleanup_srcu_struct(sp, true);
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
/** /**
......
...@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period, ...@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
); );
/* /*
* Tracepoint for future grace-period events, including those for no-callbacks * Tracepoint for future grace-period events. The caller should pull
* CPUs. The caller should pull the data from the rcu_node structure, * the data from the rcu_node structure, other than rcuname, which comes
* other than rcuname, which comes from the rcu_state structure, and event, * from the rcu_state structure, and event, which is one of the following:
* which is one of the following:
* *
* "Startleaf": Request a nocb grace period based on leaf-node data. * "Startleaf": Request a grace period based on leaf-node data.
* "Prestarted": Someone beat us to the request
* "Startedleaf": Leaf-node start proved sufficient. * "Startedleaf": Leaf-node start proved sufficient.
* "Startedleafroot": Leaf-node start proved sufficient after checking root. * "Startedleafroot": Leaf-node start proved sufficient after checking root.
* "Startedroot": Requested a nocb grace period based on root-node data. * "Startedroot": Requested a nocb grace period based on root-node data.
* "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period. * "StartWait": Start waiting for the requested grace period.
* "ResumeWait": Resume waiting after signal. * "ResumeWait": Resume waiting after signal.
* "EndWait": Complete wait. * "EndWait": Complete wait.
* "Cleanup": Clean up rcu_node structure after previous GP. * "Cleanup": Clean up rcu_node structure after previous GP.
* "CleanupMore": Clean up, and another no-CB GP is needed. * "CleanupMore": Clean up, and another GP is needed.
*/ */
TRACE_EVENT(rcu_future_grace_period, TRACE_EVENT(rcu_future_grace_period,
......
...@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) ...@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
} }
} }
/* Returns first leaf rcu_node of the specified RCU flavor. */
#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
/* Is this rcu_node a leaf? */
#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
/* /*
* Do a full breadth-first scan of the rcu_node structures for the * Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure. * specified rcu_state structure.
...@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) ...@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* rcu_node tree with but one rcu_node structure, this loop is a no-op. * rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/ */
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \ for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
(rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
/* /*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
...@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) ...@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* It is still a leaf node, even if it is also the root node. * It is still a leaf node, even if it is also the root node.
*/ */
#define rcu_for_each_leaf_node(rsp, rnp) \ #define rcu_for_each_leaf_node(rsp, rnp) \
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ for ((rnp) = rcu_first_leaf_node(rsp); \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/* /*
...@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void); ...@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);
void rcu_bh_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void);
void rcu_sched_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq; extern struct workqueue_struct *rcu_gp_wq;
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* #else #ifdef CONFIG_TINY_RCU */ #endif /* #else #ifdef CONFIG_TINY_RCU */
#ifdef CONFIG_RCU_NOCB_CPU #ifdef CONFIG_RCU_NOCB_CPU
......
...@@ -403,24 +403,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) ...@@ -403,24 +403,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
return true; return true;
} }
/*
* Scan the specified rcu_segcblist structure for callbacks that need
* a grace period later than the one specified by "seq". We don't look
* at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
* have a grace-period sequence number.
*/
bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
unsigned long seq)
{
int i;
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
if (rsclp->tails[i - 1] != rsclp->tails[i] &&
ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
return true;
return false;
}
/* /*
* Merge the source rcu_segcblist structure into the destination * Merge the source rcu_segcblist structure into the destination
* rcu_segcblist structure, then initialize the source. Any pending * rcu_segcblist structure, then initialize the source. Any pending
......
...@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, ...@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp); struct rcu_cblist *rclp);
void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq); void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
unsigned long seq);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp); struct rcu_segcblist *src_rsclp);
...@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void) ...@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
*/ */
static void rcu_perf_wait_shutdown(void) static void rcu_perf_wait_shutdown(void)
{ {
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters) if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
return; return;
while (!torture_must_stop()) while (!torture_must_stop())
......
...@@ -593,7 +593,12 @@ static void srcu_torture_init(void) ...@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
static void srcu_torture_cleanup(void) static void srcu_torture_cleanup(void)
{ {
static DEFINE_TORTURE_RANDOM(rand);
if (torture_random(&rand) & 0x800)
cleanup_srcu_struct(&srcu_ctld); cleanup_srcu_struct(&srcu_ctld);
else
cleanup_srcu_struct_quiesced(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
} }
...@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp; ...@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
static void static void
rcu_torture_cleanup(void) rcu_torture_cleanup(void)
{ {
int flags = 0;
unsigned long gpnum = 0;
unsigned long completed = 0;
int i; int i;
rcutorture_record_test_transition(); rcutorture_record_test_transition();
...@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void) ...@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
fakewriter_tasks = NULL; fakewriter_tasks = NULL;
} }
rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
&flags, &gpnum, &completed);
pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n",
cur_ops->name, gpnum, completed, flags);
torture_stop_kthread(rcu_torture_stats, stats_task); torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task); torture_stop_kthread(rcu_torture_fqs, fqs_task);
for (i = 0; i < ncbflooders; i++) for (i = 0; i < ncbflooders; i++)
......
...@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); ...@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
* Must invoke this after you are finished using a given srcu_struct that * Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory. * was initialized via init_srcu_struct(), else you leak memory.
*/ */
void cleanup_srcu_struct(struct srcu_struct *sp) void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{ {
WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
if (quiesced)
WARN_ON(work_pending(&sp->srcu_work));
else
flush_work(&sp->srcu_work); flush_work(&sp->srcu_work);
WARN_ON(sp->srcu_gp_running); WARN_ON(sp->srcu_gp_running);
WARN_ON(sp->srcu_gp_waiting); WARN_ON(sp->srcu_gp_waiting);
WARN_ON(sp->srcu_cb_head); WARN_ON(sp->srcu_cb_head);
WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
} }
EXPORT_SYMBOL_GPL(cleanup_srcu_struct); EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/* /*
* Removes the count for the old reader from the appropriate element of * Removes the count for the old reader from the appropriate element of
......
...@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp) ...@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
return SRCU_INTERVAL; return SRCU_INTERVAL;
} }
/** /* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
* cleanup_srcu_struct - deconstruct a sleep-RCU structure void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
void cleanup_srcu_struct(struct srcu_struct *sp)
{ {
int cpu; int cpu;
if (WARN_ON(!srcu_get_delay(sp))) if (WARN_ON(!srcu_get_delay(sp)))
return; /* Leakage unless caller handles error. */ return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(sp))) if (WARN_ON(srcu_readers_active(sp)))
return; /* Leakage unless caller handles error. */ return; /* Just leak it! */
if (quiesced) {
if (WARN_ON(delayed_work_pending(&sp->work)))
return; /* Just leak it! */
} else {
flush_delayed_work(&sp->work); flush_delayed_work(&sp->work);
}
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
if (quiesced) {
if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
return; /* Just leak it! */
} else {
flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
}
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(sp))) { WARN_ON(srcu_readers_active(sp))) {
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
...@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) ...@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
free_percpu(sp->sda); free_percpu(sp->sda);
sp->sda = NULL; sp->sda = NULL;
} }
EXPORT_SYMBOL_GPL(cleanup_srcu_struct); EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/* /*
* Counts the new reader in the appropriate per-CPU element of the * Counts the new reader in the appropriate per-CPU element of the
......
...@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644); ...@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);
static ulong jiffies_till_sched_qs = HZ / 10; static ulong jiffies_till_sched_qs = HZ / 10;
module_param(jiffies_till_sched_qs, ulong, 0444); module_param(jiffies_till_sched_qs, ulong, 0444);
static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
static void force_quiescent_state(struct rcu_state *rsp); static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(void); static int rcu_pending(void);
...@@ -710,44 +708,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) ...@@ -710,44 +708,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
return &rsp->node[0]; return &rsp->node[0];
} }
/*
* Is there any need for future grace periods?
* Interrupts must be disabled. If the caller does not hold the root
* rnp_node structure's ->lock, the results are advisory only.
*/
static int rcu_future_needs_gp(struct rcu_state *rsp)
{
struct rcu_node *rnp = rcu_get_root(rsp);
int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
int *fp = &rnp->need_future_gp[idx];
lockdep_assert_irqs_disabled();
return READ_ONCE(*fp);
}
/*
* Does the current CPU require a not-yet-started grace period?
* The caller must have disabled interrupts to prevent races with
* normal callback registry.
*/
static bool
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
lockdep_assert_irqs_disabled();
if (rcu_gp_in_progress(rsp))
return false; /* No, a grace period is already in progress. */
if (rcu_future_needs_gp(rsp))
return true; /* Yes, a no-CBs CPU needs one. */
if (!rcu_segcblist_is_enabled(&rdp->cblist))
return false; /* No, this is a no-CBs (or offline) CPU. */
if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
return true; /* Yes, CPU has newly registered callbacks. */
if (rcu_segcblist_future_gp_needed(&rdp->cblist,
READ_ONCE(rsp->completed)))
return true; /* Yes, CBs for future grace period. */
return false; /* No grace period needed. */
}
/* /*
* Enter an RCU extended quiescent state, which can be either the * Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution. * idle loop or adaptive-tickless usermode execution.
...@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) ...@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
} }
/* /*
* Has this CPU encountered a cond_resched_rcu_qs() since the * Has this CPU encountered a cond_resched() since the beginning
* beginning of the grace period? For this to be the case, * of the grace period? For this to be the case, the CPU has to
* the CPU has to have noticed the current grace period. This * have noticed the current grace period. This might not be the
* might not be the case for nohz_full CPUs looping in the kernel. * case for nohz_full CPUs looping in the kernel.
*/ */
jtsq = jiffies_till_sched_qs; jtsq = jiffies_till_sched_qs;
ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
...@@ -1641,6 +1601,21 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, ...@@ -1641,6 +1601,21 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed) if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
return rnp->completed + 1; return rnp->completed + 1;
/*
* If the current rcu_node structure believes that RCU is
* idle, and if the rcu_state structure does not yet reflect
* the start of a new grace period, then the next grace period
* will suffice. The memory barrier is needed to accurately
* sample the rsp->gpnum, and pairs with the second lock
* acquisition in rcu_gp_init(), which is augmented with
* smp_mb__after_unlock_lock() for this purpose.
*/
if (rnp->gpnum == rnp->completed) {
smp_mb(); /* See above block comment. */
if (READ_ONCE(rsp->gpnum) == rnp->completed)
return rnp->completed + 1;
}
/* /*
* Otherwise, wait for a possible partial grace period and * Otherwise, wait for a possible partial grace period and
* then the subsequent full grace period. * then the subsequent full grace period.
...@@ -1648,11 +1623,8 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, ...@@ -1648,11 +1623,8 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
return rnp->completed + 2; return rnp->completed + 2;
} }
/* /* Trace-event wrapper function for trace_rcu_future_grace_period. */
* Trace-event helper function for rcu_start_future_gp() and static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* rcu_nocb_wait_gp().
*/
static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
unsigned long c, const char *s) unsigned long c, const char *s)
{ {
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
...@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, ...@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
} }
/* /*
* Start some future grace period, as needed to handle newly arrived * Start the specified grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each * callbacks. The required future grace periods are recorded in each
* rcu_node structure's ->need_future_gp field. Returns true if there * rcu_node structure's ->need_future_gp[] field. Returns true if there
* is reason to awaken the grace-period kthread. * is reason to awaken the grace-period kthread.
* *
* The caller must hold the specified rcu_node structure's ->lock. * The caller must hold the specified rcu_node structure's ->lock, which
* is why the caller is responsible for waking the grace-period kthread.
*/ */
static bool __maybe_unused static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, unsigned long c)
unsigned long *c_out)
{ {
unsigned long c;
bool ret = false; bool ret = false;
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); struct rcu_state *rsp = rdp->rsp;
struct rcu_node *rnp_root;
raw_lockdep_assert_held_rcu_node(rnp);
/*
* Pick up grace-period number for new callbacks. If this
* grace period is already marked as needed, return to the caller.
*/
c = rcu_cbs_completed(rdp->rsp, rnp);
trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
if (rnp->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
goto out;
}
/*
* If either this rcu_node structure or the root rcu_node structure
* believe that a grace period is in progress, then we must wait
* for the one following, which is in "c". Because our request
* will be noticed at the end of the current grace period, we don't
* need to explicitly start one. We only do the lockless check
* of rnp_root's fields if the current rcu_node structure thinks
* there is no grace period in flight, and because we hold rnp->lock,
* the only possible change is when rnp_root's two fields are
* equal, in which case rnp_root->gpnum might be concurrently
* incremented. But that is OK, as it will just result in our
* doing some extra useless work.
*/
if (rnp->gpnum != rnp->completed ||
READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
goto out;
}
/* /*
* There might be no grace period in progress. If we don't already * Use funnel locking to either acquire the root rcu_node
* hold it, acquire the root rcu_node structure's lock in order to * structure's lock or bail out if the need for this grace period
* start one (if needed). * has already been recorded -- or has already started. If there
* is already a grace period in progress in a non-leaf node, no
* recording is needed because the end of the grace period will
* scan the leaf rcu_node structures. Note that rnp->lock must
* not be released.
*/ */
if (rnp != rnp_root) raw_lockdep_assert_held_rcu_node(rnp);
trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
if (rnp_root != rnp)
raw_spin_lock_rcu_node(rnp_root); raw_spin_lock_rcu_node(rnp_root);
WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
/* need_future_gp_mask(), c));
* Get a new grace-period number. If there really is no grace if (need_future_gp_element(rnp_root, c) ||
* period in progress, it will be smaller than the one we obtained ULONG_CMP_GE(rnp_root->gpnum, c) ||
* earlier. Adjust callbacks as needed. (rnp != rnp_root &&
*/ rnp_root->gpnum != rnp_root->completed)) {
c = rcu_cbs_completed(rdp->rsp, rnp_root); trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
if (!rcu_is_nocb_cpu(rdp->cpu))
(void)rcu_segcblist_accelerate(&rdp->cblist, c);
/*
* If the needed for the required grace period is already
* recorded, trace and leave.
*/
if (rnp_root->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
goto unlock_out; goto unlock_out;
} }
need_future_gp_element(rnp_root, c) = true;
if (rnp_root != rnp && rnp_root->parent != NULL)
raw_spin_unlock_rcu_node(rnp_root);
if (!rnp_root->parent)
break; /* At root, and perhaps also leaf. */
}
/* Record the need for the future grace period. */ /* If GP already in progress, just leave, otherwise start one. */
rnp_root->need_future_gp[c & 0x1]++;
/* If a grace period is not already in progress, start one. */
if (rnp_root->gpnum != rnp_root->completed) { if (rnp_root->gpnum != rnp_root->completed) {
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
} else { goto unlock_out;
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); }
ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
if (!rsp->gp_kthread) {
trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
goto unlock_out;
} }
trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
ret = true; /* Caller must wake GP kthread. */
unlock_out: unlock_out:
if (rnp != rnp_root) if (rnp != rnp_root)
raw_spin_unlock_rcu_node(rnp_root); raw_spin_unlock_rcu_node(rnp_root);
out:
if (c_out != NULL)
*c_out = c;
return ret; return ret;
} }
...@@ -1758,15 +1701,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, ...@@ -1758,15 +1701,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* Clean up any old requests for the just-ended grace period. Also return * Clean up any old requests for the just-ended grace period. Also return
* whether any additional grace periods have been requested. * whether any additional grace periods have been requested.
*/ */
static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{ {
int c = rnp->completed; unsigned long c = rnp->completed;
int needmore; bool needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
rnp->need_future_gp[c & 0x1] = 0; need_future_gp_element(rnp, c) = false;
needmore = rnp->need_future_gp[(c + 1) & 0x1]; needmore = need_any_future_gp(rnp);
trace_rcu_future_gp(rnp, rdp, c, trace_rcu_this_gp(rnp, rdp, c,
needmore ? TPS("CleanupMore") : TPS("Cleanup")); needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore; return needmore;
} }
...@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) ...@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp) struct rcu_data *rdp)
{ {
unsigned long c;
bool ret = false; bool ret = false;
raw_lockdep_assert_held_rcu_node(rnp); raw_lockdep_assert_held_rcu_node(rnp);
...@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, ...@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
* accelerating callback invocation to an earlier grace-period * accelerating callback invocation to an earlier grace-period
* number. * number.
*/ */
if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp))) c = rcu_cbs_completed(rsp, rnp);
ret = rcu_start_future_gp(rnp, rdp, NULL); if (rcu_segcblist_accelerate(&rdp->cblist, c))
ret = rcu_start_this_gp(rnp, rdp, c);
/* Trace depending on how much we were able to accelerate. */ /* Trace depending on how much we were able to accelerate. */
if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
...@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) ...@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
rnp->level, rnp->grplo, rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask); rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq_rcu_node(rnp); raw_spin_unlock_irq_rcu_node(rnp);
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies); WRITE_ONCE(rsp->gp_activity, jiffies);
} }
...@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) ...@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
{ {
unsigned long gp_duration; unsigned long gp_duration;
bool needgp = false; bool needgp = false;
int nocb = 0;
struct rcu_data *rdp; struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp); struct rcu_node *rnp = rcu_get_root(rsp);
struct swait_queue_head *sq; struct swait_queue_head *sq;
...@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) ...@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
if (rnp == rdp->mynode) if (rnp == rdp->mynode)
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */ /* smp_mb() provided by prior unlock-lock pair. */
nocb += rcu_future_gp_cleanup(rsp, rnp); needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
sq = rcu_nocb_gp_get(rnp); sq = rcu_nocb_gp_get(rnp);
raw_spin_unlock_irq_rcu_node(rnp); raw_spin_unlock_irq_rcu_node(rnp);
rcu_nocb_gp_cleanup(sq); rcu_nocb_gp_cleanup(sq);
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies); WRITE_ONCE(rsp->gp_activity, jiffies);
rcu_gp_slow(rsp, gp_cleanup_delay); rcu_gp_slow(rsp, gp_cleanup_delay);
} }
rnp = rcu_get_root(rsp); rnp = rcu_get_root(rsp);
raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */ raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */ /* Declare grace period done. */
WRITE_ONCE(rsp->completed, rsp->gpnum); WRITE_ONCE(rsp->completed, rsp->gpnum);
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->gp_state = RCU_GP_IDLE; rsp->gp_state = RCU_GP_IDLE;
/* Check for GP requests since above loop. */
rdp = this_cpu_ptr(rsp->rda); rdp = this_cpu_ptr(rsp->rda);
if (need_any_future_gp(rnp)) {
trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
TPS("CleanupMore"));
needgp = true;
}
/* Advance CBs to reduce false positives below. */ /* Advance CBs to reduce false positives below. */
needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
if (needgp || cpu_needs_another_gp(rsp, rdp)) {
WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
trace_rcu_grace_period(rsp->name, trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
READ_ONCE(rsp->gpnum),
TPS("newreq")); TPS("newreq"));
} }
WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
raw_spin_unlock_irq_rcu_node(rnp); raw_spin_unlock_irq_rcu_node(rnp);
} }
...@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg) ...@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Locking provides needed memory barrier. */ /* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp)) if (rcu_gp_init(rsp))
break; break;
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies); WRITE_ONCE(rsp->gp_activity, jiffies);
WARN_ON(signal_pending(current)); WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name, trace_rcu_grace_period(rsp->name,
...@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg) ...@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name, trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum), READ_ONCE(rsp->gpnum),
TPS("fqsend")); TPS("fqsend"));
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies); WRITE_ONCE(rsp->gp_activity, jiffies);
ret = 0; /* Force full wait till next FQS. */ ret = 0; /* Force full wait till next FQS. */
j = jiffies_till_next_fqs; j = jiffies_till_next_fqs;
...@@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg) ...@@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
} }
} else { } else {
/* Deal with stray signal. */ /* Deal with stray signal. */
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies); WRITE_ONCE(rsp->gp_activity, jiffies);
WARN_ON(signal_pending(current)); WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name, trace_rcu_grace_period(rsp->name,
...@@ -2282,71 +2230,6 @@ static int __noreturn rcu_gp_kthread(void *arg) ...@@ -2282,71 +2230,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
} }
} }
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
* the root node's ->lock and hard irqs must be disabled.
*
* Note that it is legal for a dying CPU (which is marked as offline) to
* invoke this function. This can happen when the dying CPU reports its
* quiescent state.
*
* Returns true if the grace-period kthread must be awakened.
*/
static bool
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
raw_lockdep_assert_held_rcu_node(rnp);
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
* task, this CPU does not need another grace period,
* or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
return false;
}
WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
TPS("newreq"));
/*
* We can't do wakeups while holding the rnp->lock, as that
* could cause possible deadlocks with the rq->lock. Defer
* the wakeup to our caller.
*/
return true;
}
/*
* Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
* callbacks. Note that rcu_start_gp_advanced() cannot do this because it
* is invoked indirectly from rcu_advance_cbs(), which would result in
* endless recursion -- or would do so if it wasn't for the self-deadlock
* that is encountered beforehand.
*
* Returns true if the grace-period kthread needs to be awakened.
*/
static bool rcu_start_gp(struct rcu_state *rsp)
{
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
bool ret = false;
/*
* If there is no grace period in progress right now, any
* callbacks we have up to this point will be satisfied by the
* next grace period. Also, advancing the callbacks reduces the
* probability of false positives from cpu_needs_another_gp()
* resulting in pointless grace periods. So, advance callbacks
* then start the grace period!
*/
ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
return ret;
}
/* /*
* Report a full set of quiescent states to the specified rcu_state data * Report a full set of quiescent states to the specified rcu_state data
* structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period
...@@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, ...@@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
return; return;
} }
WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
rcu_preempt_blocked_readers_cgp(rnp)); rcu_preempt_blocked_readers_cgp(rnp));
rnp->qsmask &= ~mask; rnp->qsmask &= ~mask;
trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
...@@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)) ...@@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
struct rcu_node *rnp; struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) { rcu_for_each_leaf_node(rsp, rnp) {
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
mask = 0; mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) { if (rnp->qsmask == 0) {
...@@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp) ...@@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)
unsigned long flags; unsigned long flags;
bool needwake; bool needwake;
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
struct rcu_node *rnp;
WARN_ON_ONCE(!rdp->beenonline); WARN_ON_ONCE(!rdp->beenonline);
/* Update RCU state based on any recent quiescent states. */ /* Update RCU state based on any recent quiescent states. */
rcu_check_quiescent_state(rsp, rdp); rcu_check_quiescent_state(rsp, rdp);
/* Does this CPU require a not-yet-started grace period? */ /* No grace period and unregistered callbacks? */
if (!rcu_gp_in_progress(rsp) &&
rcu_segcblist_is_enabled(&rdp->cblist)) {
local_irq_save(flags); local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) { if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ local_irq_restore(flags);
needwake = rcu_start_gp(rsp); } else {
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake) if (needwake)
rcu_gp_kthread_wake(rsp); rcu_gp_kthread_wake(rsp);
} else { }
local_irq_restore(flags);
} }
/* If there are callbacks ready, invoke them. */ /* If there are callbacks ready, invoke them. */
...@@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, ...@@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
/* Start a new grace period if one not already started. */ /* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) { if (!rcu_gp_in_progress(rsp)) {
struct rcu_node *rnp_root = rcu_get_root(rsp); struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp_root); raw_spin_lock_rcu_node(rnp);
needwake = rcu_start_gp(rsp); needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock_rcu_node(rnp_root); raw_spin_unlock_rcu_node(rnp);
if (needwake) if (needwake)
rcu_gp_kthread_wake(rsp); rcu_gp_kthread_wake(rsp);
} else { } else {
...@@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) ...@@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
return 1; return 1;
/* Has RCU gone idle with this CPU needing another grace period? */ /* Has RCU gone idle with this CPU needing another grace period? */
if (cpu_needs_another_gp(rsp, rdp)) if (!rcu_gp_in_progress(rsp) &&
rcu_segcblist_is_enabled(&rdp->cblist) &&
!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
return 1; return 1;
/* Has another RCU grace period completed? */ /* Has another RCU grace period completed? */
...@@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu) ...@@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)
return 0; return 0;
} }
static DEFINE_PER_CPU(int, rcu_cpu_started);
/* /*
* Mark the specified CPU as being online so that subsequent grace periods * Mark the specified CPU as being online so that subsequent grace periods
* (both expedited and normal) will wait on it. Note that this means that * (both expedited and normal) will wait on it. Note that this means that
...@@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu) ...@@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)
struct rcu_node *rnp; struct rcu_node *rnp;
struct rcu_state *rsp; struct rcu_state *rsp;
if (per_cpu(rcu_cpu_started, cpu))
return;
per_cpu(rcu_cpu_started, cpu) = 1;
for_each_rcu_flavor(rsp) { for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu); rdp = per_cpu_ptr(rsp->rda, cpu);
rnp = rdp->mynode; rnp = rdp->mynode;
...@@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu) ...@@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)
preempt_enable(); preempt_enable();
for_each_rcu_flavor(rsp) for_each_rcu_flavor(rsp)
rcu_cleanup_dying_idle_cpu(cpu, rsp); rcu_cleanup_dying_idle_cpu(cpu, rsp);
per_cpu(rcu_cpu_started, cpu) = 0;
} }
/* Migrate the dead CPU's callbacks to the current CPU. */ /* Migrate the dead CPU's callbacks to the current CPU. */
...@@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) ...@@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
struct rcu_data *my_rdp; struct rcu_data *my_rdp;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
bool needwake;
if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */ return; /* No callbacks to migrate. */
...@@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) ...@@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
return; return;
} }
raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ /* Leverage recent GPs and set GP for new callbacks. */
rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||
rcu_advance_cbs(rsp, rnp_root, my_rdp);
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
!rcu_segcblist_n_cbs(&my_rdp->cblist)); !rcu_segcblist_n_cbs(&my_rdp->cblist));
raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
if (needwake)
rcu_gp_kthread_wake(rsp);
WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
!rcu_segcblist_empty(&rdp->cblist), !rcu_segcblist_empty(&rdp->cblist),
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
...@@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) ...@@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
init_swait_queue_head(&rsp->gp_wq); init_swait_queue_head(&rsp->gp_wq);
init_swait_queue_head(&rsp->expedited_wq); init_swait_queue_head(&rsp->expedited_wq);
rnp = rsp->level[rcu_num_lvls - 1]; rnp = rcu_first_leaf_node(rsp);
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
while (i > rnp->grphi) while (i > rnp->grphi)
rnp++; rnp++;
...@@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) ...@@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
} }
struct workqueue_struct *rcu_gp_wq; struct workqueue_struct *rcu_gp_wq;
struct workqueue_struct *rcu_par_gp_wq;
void __init rcu_init(void) void __init rcu_init(void)
{ {
...@@ -4199,6 +4103,8 @@ void __init rcu_init(void) ...@@ -4199,6 +4103,8 @@ void __init rcu_init(void)
/* Create workqueue for expedited GPs and for Tree SRCU. */ /* Create workqueue for expedited GPs and for Tree SRCU. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq); WARN_ON(!rcu_gp_wq);
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
} }
#include "tree_exp.h" #include "tree_exp.h"
......
...@@ -58,6 +58,14 @@ struct rcu_dynticks { ...@@ -58,6 +58,14 @@ struct rcu_dynticks {
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
}; };
/* Communicate arguments to a workqueue handler. */
struct rcu_exp_work {
smp_call_func_t rew_func;
struct rcu_state *rew_rsp;
unsigned long rew_s;
struct work_struct rew_work;
};
/* RCU's kthread states for tracing. */ /* RCU's kthread states for tracing. */
#define RCU_KTHREAD_STOPPED 0 #define RCU_KTHREAD_STOPPED 0
#define RCU_KTHREAD_RUNNING 1 #define RCU_KTHREAD_RUNNING 1
...@@ -150,15 +158,32 @@ struct rcu_node { ...@@ -150,15 +158,32 @@ struct rcu_node {
struct swait_queue_head nocb_gp_wq[2]; struct swait_queue_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */ /* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2]; u8 need_future_gp[4]; /* Counts of upcoming GP requests. */
/* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
spinlock_t exp_lock ____cacheline_internodealigned_in_smp; spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
unsigned long exp_seq_rq; unsigned long exp_seq_rq;
wait_queue_head_t exp_wq[4]; wait_queue_head_t exp_wq[4];
struct rcu_exp_work rew;
bool exp_need_flush; /* Need to flush workitem? */
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
/* Accessors for ->need_future_gp[] array. */
#define need_future_gp_mask() \
(ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
#define need_future_gp_element(rnp, c) \
((rnp)->need_future_gp[(c) & need_future_gp_mask()])
#define need_any_future_gp(rnp) \
({ \
int __i; \
bool __nonzero = false; \
\
for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \
__nonzero = __nonzero || \
READ_ONCE((rnp)->need_future_gp[__i]); \
__nonzero; \
})
/* /*
* Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
* are indexed relative to this interval rather than the global CPU ID space. * are indexed relative to this interval rather than the global CPU ID space.
...@@ -224,10 +249,6 @@ struct rcu_data { ...@@ -224,10 +249,6 @@ struct rcu_data {
#ifdef CONFIG_RCU_FAST_NO_HZ #ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head; struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
atomic_long_t exp_workdone0; /* # done by workqueue. */
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
int exp_dynticks_snap; /* Double-check need for IPI. */ int exp_dynticks_snap; /* Double-check need for IPI. */
/* 6) Callback offloading. */ /* 6) Callback offloading. */
...@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state; ...@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;
#endif /* #ifdef CONFIG_PREEMPT_RCU */ #endif /* #ifdef CONFIG_PREEMPT_RCU */
int rcu_dynticks_snap(struct rcu_dynticks *rdtp); int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
bool rcu_eqs_special_set(int cpu);
#ifdef CONFIG_RCU_BOOST #ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
...@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); ...@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void); static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void); static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST #ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp); struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_BOOST */
...@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void); ...@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void); static void increment_cpu_stall_ticks(void);
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp);
......
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/ */
#include <linux/lockdep.h>
/* /*
* Record the start of an expedited grace period. * Record the start of an expedited grace period.
*/ */
...@@ -154,14 +156,34 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) ...@@ -154,14 +156,34 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
* for the current expedited grace period. Works only for preemptible * for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means. * RCU -- other RCU implementation use other means.
* *
* Caller must hold the rcu_state's exp_mutex. * Caller must hold the specificed rcu_node structure's ->lock
*/ */
static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{ {
raw_lockdep_assert_held_rcu_node(rnp);
return rnp->exp_tasks == NULL && return rnp->exp_tasks == NULL &&
READ_ONCE(rnp->expmask) == 0; READ_ONCE(rnp->expmask) == 0;
} }
/*
* Like sync_rcu_preempt_exp_done(), but this function assumes the caller
* doesn't hold the rcu_node's ->lock, and will acquire and release the lock
* itself
*/
static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
{
unsigned long flags;
bool ret;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
ret = sync_rcu_preempt_exp_done(rnp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ret;
}
/* /*
* Report the exit from RCU read-side critical section for the last task * Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU * that queued itself during or before the current expedited preemptible-RCU
...@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) ...@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion * recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!) * iteratively!)
* *
* Caller must hold the rcu_state's exp_mutex and the specified rcu_node * Caller must hold the specified rcu_node structure's ->lock.
* structure's ->lock.
*/ */
static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake, unsigned long flags) bool wake, unsigned long flags)
...@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, ...@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
/* /*
* Report expedited quiescent state for specified node. This is a * Report expedited quiescent state for specified node. This is a
* lock-acquisition wrapper function for __rcu_report_exp_rnp(). * lock-acquisition wrapper function for __rcu_report_exp_rnp().
*
* Caller must hold the rcu_state's exp_mutex.
*/ */
static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
struct rcu_node *rnp, bool wake) struct rcu_node *rnp, bool wake)
...@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, ...@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
/* /*
* Report expedited quiescent state for multiple CPUs, all covered by the * Report expedited quiescent state for multiple CPUs, all covered by the
* specified leaf rcu_node structure. Caller must hold the rcu_state's * specified leaf rcu_node structure.
* exp_mutex.
*/ */
static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long mask, bool wake) unsigned long mask, bool wake)
...@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, ...@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
} }
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
unsigned long s)
{ {
if (rcu_exp_gp_seq_done(rsp, s)) { if (rcu_exp_gp_seq_done(rsp, s)) {
trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */ /* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */ smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(stat);
return true; return true;
} }
return false; return false;
...@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) ...@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* promoting locality and is not strictly needed for correctness. * promoting locality and is not strictly needed for correctness.
*/ */
for (; rnp != NULL; rnp = rnp->parent) { for (; rnp != NULL; rnp = rnp->parent) {
if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) if (sync_exp_work_done(rsp, s))
return true; return true;
/* Work not done, either wait here or go up. */ /* Work not done, either wait here or go up. */
...@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) ...@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp->grplo, rnp->grphi, rnp->grplo, rnp->grphi,
TPS("wait")); TPS("wait"));
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(rsp, sync_exp_work_done(rsp, s));
&rdp->exp_workdone2, s));
return true; return true;
} }
rnp->exp_seq_rq = s; /* Followers can wait on us. */ rnp->exp_seq_rq = s; /* Followers can wait on us. */
...@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) ...@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
} }
mutex_lock(&rsp->exp_mutex); mutex_lock(&rsp->exp_mutex);
fastpath: fastpath:
if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { if (sync_exp_work_done(rsp, s)) {
mutex_unlock(&rsp->exp_mutex); mutex_unlock(&rsp->exp_mutex);
return true; return true;
} }
...@@ -362,23 +377,23 @@ static void sync_sched_exp_online_cleanup(int cpu) ...@@ -362,23 +377,23 @@ static void sync_sched_exp_online_cleanup(int cpu)
} }
/* /*
* Select the nodes that the upcoming expedited grace period needs * Select the CPUs within the specified rcu_node that the upcoming
* to wait for. * expedited grace period needs to wait for.
*/ */
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
smp_call_func_t func)
{ {
int cpu; int cpu;
unsigned long flags; unsigned long flags;
smp_call_func_t func;
unsigned long mask_ofl_test; unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi; unsigned long mask_ofl_ipi;
int ret; int ret;
struct rcu_node *rnp; struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
struct rcu_state *rsp = rewp->rew_rsp;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); func = rewp->rew_func;
sync_exp_reset_tree(rsp);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
/* Each pass checks a CPU for identity, offline, and idle. */ /* Each pass checks a CPU for identity, offline, and idle. */
...@@ -404,8 +419,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, ...@@ -404,8 +419,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
/* /*
* Need to wait for any blocked tasks as well. Note that * Need to wait for any blocked tasks as well. Note that
* additional blocking tasks will also block the expedited * additional blocking tasks will also block the expedited GP
* GP until such time as the ->expmask bits are cleared. * until such time as the ->expmask bits are cleared.
*/ */
if (rcu_preempt_has_tasks(rnp)) if (rcu_preempt_has_tasks(rnp))
rnp->exp_tasks = rnp->blkd_tasks.next; rnp->exp_tasks = rnp->blkd_tasks.next;
...@@ -448,7 +463,43 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, ...@@ -448,7 +463,43 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
mask_ofl_test |= mask_ofl_ipi; mask_ofl_test |= mask_ofl_ipi;
if (mask_ofl_test) if (mask_ofl_test)
rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
}
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
*/
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
smp_call_func_t func)
{
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
sync_exp_reset_tree(rsp);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
/* Schedule work for each leaf rcu_node structure. */
rcu_for_each_leaf_node(rsp, rnp) {
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
rnp->rew.rew_func = func;
rnp->rew.rew_rsp = rsp;
if (!READ_ONCE(rcu_par_gp_wq) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
/* No workqueues yet. */
sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
continue;
}
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
rnp->exp_need_flush = true;
} }
/* Wait for workqueue jobs (if any) to complete. */
rcu_for_each_leaf_node(rsp, rnp)
if (rnp->exp_need_flush)
flush_work(&rnp->rew.rew_work);
} }
static void synchronize_sched_expedited_wait(struct rcu_state *rsp) static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
...@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) ...@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
for (;;) { for (;;) {
ret = swait_event_timeout( ret = swait_event_timeout(
rsp->expedited_wq, rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root), sync_rcu_preempt_exp_done_unlocked(rnp_root),
jiffies_stall); jiffies_stall);
if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
return; return;
WARN_ON(ret < 0); /* workqueues should not be signaled. */ WARN_ON(ret < 0); /* workqueues should not be signaled. */
if (rcu_cpu_stall_suppress) if (rcu_cpu_stall_suppress)
...@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) ...@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rcu_for_each_node_breadth_first(rsp, rnp) { rcu_for_each_node_breadth_first(rsp, rnp) {
if (rnp == rnp_root) if (rnp == rnp_root)
continue; /* printed unconditionally */ continue; /* printed unconditionally */
if (sync_rcu_preempt_exp_done(rnp)) if (sync_rcu_preempt_exp_done_unlocked(rnp))
continue; continue;
pr_cont(" l=%u:%d-%d:%#lx/%c", pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi, rnp->level, rnp->grplo, rnp->grphi,
...@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) ...@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
mutex_unlock(&rsp->exp_wake_mutex); mutex_unlock(&rsp->exp_wake_mutex);
} }
/* Let the workqueue handler know what it is supposed to do. */
struct rcu_exp_work {
smp_call_func_t rew_func;
struct rcu_state *rew_rsp;
unsigned long rew_s;
struct work_struct rew_work;
};
/* /*
* Common code to drive an expedited grace period forward, used by * Common code to drive an expedited grace period forward, used by
* workqueues and mid-boot-time tasks. * workqueues and mid-boot-time tasks.
...@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, ...@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
rnp = rcu_get_root(rsp); rnp = rcu_get_root(rsp);
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(rsp, &rdp->exp_workdone0, s)); sync_exp_work_done(rsp, s));
smp_mb(); /* Workqueue actions happen before return. */ smp_mb(); /* Workqueue actions happen before return. */
/* Let the next expedited grace period start. */ /* Let the next expedited grace period start. */
......
...@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) ...@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
raw_lockdep_assert_held_rcu_node(rnp); raw_lockdep_assert_held_rcu_node(rnp);
WARN_ON_ONCE(rdp->mynode != rnp); WARN_ON_ONCE(rdp->mynode != rnp);
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
/* /*
* Decide where to queue the newly blocked task. In theory, * Decide where to queue the newly blocked task. In theory,
...@@ -383,6 +383,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) ...@@ -383,6 +383,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
return rnp->gp_tasks != NULL; return rnp->gp_tasks != NULL;
} }
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
/*
* Preemptible RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
}
#ifdef CONFIG_PROVE_LOCKING
{
int rrln = READ_ONCE(t->rcu_read_lock_nesting);
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
/* /*
* Advance a ->blkd_tasks-list pointer to the next entry, instead * Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list. * returning NULL if at the end of the list.
...@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t) ...@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)
rnp = t->rcu_blocked_node; rnp = t->rcu_blocked_node;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
WARN_ON_ONCE(rnp != t->rcu_blocked_node); WARN_ON_ONCE(rnp != t->rcu_blocked_node);
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = sync_rcu_preempt_exp_done(rnp); empty_exp = sync_rcu_preempt_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
...@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void) ...@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)
t->rcu_read_unlock_special.b.need_qs = true; t->rcu_read_unlock_special.b.need_qs = true;
} }
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void)
{
rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
}
#endif /* #ifdef CONFIG_RCU_BOOST */
/** /**
* call_rcu() - Queue an RCU callback for invocation after a grace period. * call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates. * @head: structure to be used for queueing the RCU updates.
...@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void) ...@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)
{ {
rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
rcu_preempt_do_callbacks(); rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
} }
static void rcu_cpu_kthread_setup(unsigned int cpu) static void rcu_cpu_kthread_setup(unsigned int cpu)
...@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self, ...@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
} }
/* Unconditionally decrement: no need to wake ourselves up. */ /* Unconditionally decrement: no need to wake ourselves up. */
...@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) ...@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
swake_up_all(sq); swake_up_all(sq);
} }
/*
* Set the root rcu_node structure's ->need_future_gp field
* based on the sum of those of all rcu_node structures. This does
* double-count the root rcu_node structure's requests, but this
* is necessary to handle the possibility of a rcu_nocb_kthread()
* having awakened during the time that the rcu_node structures
* were being updated for the end of the previous grace period.
*/
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
}
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{ {
return &rnp->nocb_gp_wq[rnp->completed & 0x1]; return &rnp->nocb_gp_wq[rnp->completed & 0x1];
...@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, ...@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("WakeOvf")); TPS("WakeOvf"));
} else { } else {
wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred")); TPS("WakeOvfIsDeferred"));
} }
rdp->qlen_last_fqs_check = LONG_MAX / 2; rdp->qlen_last_fqs_check = LONG_MAX / 2;
...@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) ...@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
needwake = rcu_start_future_gp(rnp, rdp, &c); c = rcu_cbs_completed(rdp->rsp, rnp);
needwake = rcu_start_this_gp(rnp, rdp, c);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake) if (needwake)
rcu_gp_kthread_wake(rdp->rsp); rcu_gp_kthread_wake(rdp->rsp);
...@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) ...@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
* Wait for the grace period. Do so interruptibly to avoid messing * Wait for the grace period. Do so interruptibly to avoid messing
* up the load average. * up the load average.
*/ */
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait")); trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) { for (;;) {
swait_event_interruptible( swait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1], rnp->nocb_gp_wq[c & 0x1],
...@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) ...@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
if (likely(d)) if (likely(d))
break; break;
WARN_ON(signal_pending(current)); WARN_ON(signal_pending(current));
trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
} }
trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
smp_mb(); /* Ensure that CB invocation happens after GP end. */ smp_mb(); /* Ensure that CB invocation happens after GP end. */
} }
...@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg) ...@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)
cl++; cl++;
c++; c++;
local_bh_enable(); local_bh_enable();
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
list = next; list = next;
} }
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
...@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) ...@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
void __init rcu_init_nohz(void) void __init rcu_init_nohz(void)
{ {
int cpu; int cpu;
bool need_rcu_nocb_mask = true; bool need_rcu_nocb_mask = false;
struct rcu_state *rsp; struct rcu_state *rsp;
#if defined(CONFIG_NO_HZ_FULL) #if defined(CONFIG_NO_HZ_FULL)
...@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void) ...@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)
#endif /* #if defined(CONFIG_NO_HZ_FULL) */ #endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
cpumask_and(rcu_nocb_mask, cpu_possible_mask, cpumask_and(rcu_nocb_mask, cpu_possible_mask,
rcu_nocb_mask); rcu_nocb_mask);
} }
...@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) ...@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
{ {
} }
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
}
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{ {
return NULL; return NULL;
...@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) ...@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
} }
/* /*
* Bind the grace-period kthread for the sysidle flavor of RCU to the * Bind the RCU grace-period kthreads to the housekeeping CPU.
* timekeeping CPU.
*/ */
static void rcu_bind_gp_kthread(void) static void rcu_bind_gp_kthread(void)
{ {
......
...@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode); ...@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);
#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
#ifdef CONFIG_PREEMPT_RCU
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
/*
* Preemptible RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
}
#ifdef CONFIG_PROVE_LOCKING
{
int rrln = READ_ONCE(t->rcu_read_lock_nesting);
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key; static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map = struct lockdep_map rcu_lock_map =
...@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); ...@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
* grace period has elapsed, in other words after all currently * grace period has elapsed, in other words after all currently
* executing rcu-tasks read-side critical sections have elapsed. These * executing rcu-tasks read-side critical sections have elapsed. These
* read-side critical sections are delimited by calls to schedule(), * read-side critical sections are delimited by calls to schedule(),
* cond_resched_rcu_qs(), idle execution, userspace execution, calls * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
* *
* This is a very specialized primitive, intended only for a few uses in * This is a very specialized primitive, intended only for a few uses in
......
...@@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock) ...@@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock)
} }
EXPORT_SYMBOL(__cond_resched_lock); EXPORT_SYMBOL(__cond_resched_lock);
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
local_bh_enable();
preempt_schedule_common();
local_bh_disable();
return 1;
}
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
/** /**
* yield - yield the current processor to other threads. * yield - yield the current processor to other threads.
* *
......
...@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt) ...@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)
} }
/* /*
* Special-case - softirqs can safely be enabled in * Special-case - softirqs can safely be enabled by __do_softirq(),
* cond_resched_softirq(), or by __do_softirq(),
* without processing still-pending softirqs: * without processing still-pending softirqs:
*/ */
void _local_bh_enable(void) void _local_bh_enable(void)
......
...@@ -574,7 +574,7 @@ void stutter_wait(const char *title) ...@@ -574,7 +574,7 @@ void stutter_wait(const char *title)
{ {
int spt; int spt;
cond_resched_rcu_qs(); cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test); spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) { for (; spt; spt = READ_ONCE(stutter_pause_test)) {
if (spt == 1) { if (spt == 1) {
......
...@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg) ...@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)
* wants to run, schedule in, but if the CPU is idle, * wants to run, schedule in, but if the CPU is idle,
* we'll keep burning cycles. * we'll keep burning cycles.
* *
* Note the _rcu_qs() version of cond_resched() will * Note the tasks_rcu_qs() version of cond_resched() will
* notify synchronize_rcu_tasks() that this thread has * notify synchronize_rcu_tasks() that this thread has
* passed a quiescent state for rcu_tasks. Otherwise * passed a quiescent state for rcu_tasks. Otherwise
* this thread will never voluntarily schedule which would * this thread will never voluntarily schedule which would
* block synchronize_rcu_tasks() indefinitely. * block synchronize_rcu_tasks() indefinitely.
*/ */
cond_resched(); cond_resched_tasks_rcu_qs();
} }
return 0; return 0;
......
#!/bin/sh
#
# Invoke a text editor on all console.log files for all runs with diagnostics,
# that is, on all such files having a console.log.diags counterpart.
# Note that both console.log.diags and console.log are passed to the
# editor (currently defaulting to "vi"), allowing the user to get an
# idea of what to search for in the console.log file.
#
# Usage: kvm-find-errors.sh directory
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
rundir="${1}"
if test -z "$rundir" -o ! -d "$rundir"
then
echo Usage: $0 directory
fi
editor=${EDITOR-vi}
# Find builds with errors
files=
for i in ${rundir}/*/Make.out
do
if egrep -q "error:|warning:" < $i
then
egrep "error:|warning:" < $i > $i.diags
files="$files $i.diags $i"
fi
done
if test -n "$files"
then
$editor $files
else
echo No build errors.
fi
if grep -q -e "--buildonly" < ${rundir}/log
then
echo Build-only run, no console logs to check.
fi
# Find console logs with errors
files=
for i in ${rundir}/*/console.log
do
if test -r $i.diags
then
files="$files $i.diags $i"
fi
done
if test -n "$files"
then
$editor $files
else
echo No errors in console logs.
fi
...@@ -34,11 +34,15 @@ fi ...@@ -34,11 +34,15 @@ fi
configfile=`echo $i | sed -e 's/^.*\///'` configfile=`echo $i | sed -e 's/^.*\///'`
ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
if test -z "$ngps" if test -z "$ngps"
then then
echo "$configfile -------" echo "$configfile ------- " $stopstate
else else
title="$configfile ------- $ngps grace periods" title="$configfile ------- $ngps GPs"
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
if test -z "$dur" if test -z "$dur"
then then
...@@ -46,9 +50,9 @@ else ...@@ -46,9 +50,9 @@ else
else else
ngpsps=`awk -v ngps=$ngps -v dur=$dur ' ngpsps=`awk -v ngps=$ngps -v dur=$dur '
BEGIN { print ngps / dur }' < /dev/null` BEGIN { print ngps / dur }' < /dev/null`
title="$title ($ngpsps per second)" title="$title ($ngpsps/s)"
fi fi
echo $title echo $title $stopstate
nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
if test -z "$nclosecalls" if test -z "$nclosecalls"
then then
......
...@@ -48,10 +48,6 @@ do ...@@ -48,10 +48,6 @@ do
cat $i/Make.oldconfig.err cat $i/Make.oldconfig.err
fi fi
parse-build.sh $i/Make.out $configfile parse-build.sh $i/Make.out $configfile
if test "$TORTURE_SUITE" != rcuperf
then
parse-torture.sh $i/console.log $configfile
fi
parse-console.sh $i/console.log $configfile parse-console.sh $i/console.log $configfile
if test -r $i/Warnings if test -r $i/Warnings
then then
......
...@@ -267,5 +267,4 @@ then ...@@ -267,5 +267,4 @@ then
echo Unknown PID, cannot kill qemu command echo Unknown PID, cannot kill qemu command
fi fi
parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title parse-console.sh $resdir/console.log $title
...@@ -24,57 +24,146 @@ ...@@ -24,57 +24,146 @@
# #
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
T=${TMPDIR-/tmp}/parse-console.sh.$$
file="$1" file="$1"
title="$2" title="$2"
trap 'rm -f $T.seq $T.diags' 0
. functions.sh . functions.sh
# Check for presence and readability of console output file
if test -f "$file" -a -r "$file"
then
:
else
echo $title unreadable console output file: $file
exit 1
fi
if grep -Pq '\x00' < $file if grep -Pq '\x00' < $file
then then
print_warning Console output contains nul bytes, old qemu still running? print_warning Console output contains nul bytes, old qemu still running?
fi fi
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags cat /dev/null > $file.diags
if test -s $1.diags
# Check for proper termination, except that rcuperf runs don't indicate this.
if test "$TORTURE_SUITE" != rcuperf
then then
print_warning Assertion failure in $file $title # check for abject failure
# cat $1.diags
if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
then
nerrs=`grep --binary-files=text '!!!' $file |
tail -1 |
awk '
{
for (i=NF-8;i<=NF;i++)
sum+=$i;
}
END { print sum }'`
print_bug $title FAILURE, $nerrs instances
exit
fi
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
awk '
BEGIN {
ver = 0;
badseq = 0;
}
{
if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
badseqno1 = ver;
badseqno2 = $5;
badseqnr = NR;
badseq = 1;
}
ver = $5
}
END {
if (badseq) {
if (badseqno1 == badseqno2 && badseqno2 == ver)
print "GP HANG at " ver " torture stat " badseqnr;
else
print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
}
}' > $T.seq
if grep -q SUCCESS $file
then
if test -s $T.seq
then
print_warning $title `cat $T.seq`
echo " " $file
exit 2
fi
else
if grep -q "_HOTPLUG:" $file
then
print_warning HOTPLUG FAILURES $title `cat $T.seq`
echo " " $file
exit 3
fi
echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
if test -s $T.seq
then
print_warning $title `cat $T.seq`
fi
exit 2
fi
fi | tee -a $file.diags
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
grep -v 'ODEBUG: ' |
grep -v 'Warning: unable to open an initial console' > $T.diags
if test -s $T.diags
then
print_warning "Assertion failure in $file $title"
# cat $T.diags
summary="" summary=""
n_badness=`grep -c Badness $1` n_badness=`grep -c Badness $file`
if test "$n_badness" -ne 0 if test "$n_badness" -ne 0
then then
summary="$summary Badness: $n_badness" summary="$summary Badness: $n_badness"
fi fi
n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0 if test "$n_warn" -ne 0
then then
summary="$summary Warnings: $n_warn" summary="$summary Warnings: $n_warn"
fi fi
n_bugs=`egrep -c 'BUG|Oops:' $1` n_bugs=`egrep -c 'BUG|Oops:' $file`
if test "$n_bugs" -ne 0 if test "$n_bugs" -ne 0
then then
summary="$summary Bugs: $n_bugs" summary="$summary Bugs: $n_bugs"
fi fi
n_calltrace=`grep -c 'Call Trace:' $1` n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0 if test "$n_calltrace" -ne 0
then then
summary="$summary Call Traces: $n_calltrace" summary="$summary Call Traces: $n_calltrace"
fi fi
n_lockdep=`grep -c =========== $1` n_lockdep=`grep -c =========== $file`
if test "$n_badness" -ne 0 if test "$n_badness" -ne 0
then then
summary="$summary lockdep: $n_badness" summary="$summary lockdep: $n_badness"
fi fi
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0 if test "$n_stalls" -ne 0
then then
summary="$summary Stalls: $n_stalls" summary="$summary Stalls: $n_stalls"
fi fi
n_starves=`grep -c 'rcu_.*kthread starved for' $1` n_starves=`grep -c 'rcu_.*kthread starved for' $file`
if test "$n_starves" -ne 0 if test "$n_starves" -ne 0
then then
summary="$summary Starves: $n_starves" summary="$summary Starves: $n_starves"
fi fi
print_warning Summary: $summary print_warning Summary: $summary
else cat $T.diags >> $file.diags
rm $1.diags fi
if ! test -s $file.diags
then
rm -f $file.diags
fi fi
#!/bin/bash
#
# Check the console output from a torture run for goodness.
# The "file" is a pathname on the local system, and "title" is
# a text string for error-message purposes.
#
# The file must contain torture output, but can be interspersed
# with other dmesg text, as in console-log output.
#
# Usage: parse-torture.sh file title
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
T=${TMPDIR-/tmp}/parse-torture.sh.$$
file="$1"
title="$2"
trap 'rm -f $T.seq' 0
. functions.sh
# check for presence of torture output file.
if test -f "$file" -a -r "$file"
then
:
else
echo $title unreadable torture output file: $file
exit 1
fi
# check for abject failure
if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
then
nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
print_bug $title FAILURE, $nerrs instances
echo " " $url
exit
fi
grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
awk '
BEGIN {
ver = 0;
badseq = 0;
}
{
if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
badseqno1 = ver;
badseqno2 = $5;
badseqnr = NR;
badseq = 1;
}
ver = $5
}
END {
if (badseq) {
if (badseqno1 == badseqno2 && badseqno2 == ver)
print "GP HANG at " ver " torture stat " badseqnr;
else
print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
}
}' > $T.seq
if grep -q SUCCESS $file
then
if test -s $T.seq
then
print_warning $title $title `cat $T.seq`
echo " " $file
exit 2
fi
else
if grep -q "_HOTPLUG:" $file
then
print_warning HOTPLUG FAILURES $title `cat $T.seq`
echo " " $file
exit 3
fi
echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
if test -s $T.seq
then
print_warning $title `cat $T.seq`
fi
exit 2
fi
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment