Commit 7a968bb2 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'consolidate.2019.01.26a' and 'fwd.2019.01.26a' into HEAD

consolidate.2019.01.26a: RCU flavor consolidation cleanups.
fwd.2019.01.26a: RCU grace-period forward-progress fixes.
parents 6ba7d681 13dc7d0c
......@@ -3721,6 +3721,11 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
rcutree.sysrq_rcu= [KNL]
Commandeer a sysrq key to dump out Tree RCU's
rcu_node tree with an eye towards determining
why a new grace period has not yet started.
rcuperf.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
......
......@@ -62,6 +62,7 @@
#include <linux/suspend.h>
#include <linux/ftrace.h>
#include <linux/tick.h>
#include <linux/sysrq.h>
#include "tree.h"
#include "rcu.h"
......@@ -115,6 +116,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/* panic() on RCU Stall sysctl. */
int sysctl_panic_on_rcu_stall __read_mostly;
/* Commandeer a sysrq key to dump RCU's tree. */
static bool sysrq_rcu;
module_param(sysrq_rcu, bool, 0444);
/*
* The rcu_scheduler_active variable is initialized to the value
......@@ -502,6 +506,14 @@ unsigned long rcu_exp_batches_completed(void)
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
/*
* Return the root node of the rcu_state structure.
*/
static struct rcu_node *rcu_get_root(void)
{
return &rcu_state.node[0];
}
/*
* Convert a ->gp_state value to a character string.
*/
......@@ -519,19 +531,30 @@ void show_rcu_gp_kthreads(void)
{
int cpu;
unsigned long j;
unsigned long ja;
unsigned long jr;
unsigned long jw;
struct rcu_data *rdp;
struct rcu_node *rnp;
j = jiffies - READ_ONCE(rcu_state.gp_activity);
pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n",
j = jiffies;
ja = j - READ_ONCE(rcu_state.gp_activity);
jr = j - READ_ONCE(rcu_state.gp_req_activity);
jw = j - READ_ONCE(rcu_state.gp_wake_time);
pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state),
rcu_state.gp_state, rcu_state.gp_kthread->state, j);
rcu_state.gp_state,
rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
(long)READ_ONCE(rcu_state.gp_seq),
(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
READ_ONCE(rcu_state.gp_flags));
rcu_for_each_node_breadth_first(rnp) {
if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
continue;
pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
rnp->grplo, rnp->grphi, rnp->gp_seq,
rnp->gp_seq_needed);
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
(long)rnp->gp_seq_needed);
if (!rcu_is_leaf_node(rnp))
continue;
for_each_leaf_node_possible_cpu(rnp, cpu) {
......@@ -540,14 +563,35 @@ void show_rcu_gp_kthreads(void)
ULONG_CMP_GE(rcu_state.gp_seq,
rdp->gp_seq_needed))
continue;
pr_info("\tcpu %d ->gp_seq_needed %lu\n",
cpu, rdp->gp_seq_needed);
pr_info("\tcpu %d ->gp_seq_needed %ld\n",
cpu, (long)rdp->gp_seq_needed);
}
}
/* sched_show_task(rcu_state.gp_kthread); */
}
EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
/* Dump grace-period-request information due to commandeered sysrq. */
static void sysrq_show_rcu(int key)
{
show_rcu_gp_kthreads();
}
static struct sysrq_key_op sysrq_rcudump_op = {
.handler = sysrq_show_rcu,
.help_msg = "show-rcu(y)",
.action_msg = "Show RCU tree",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
static int __init rcu_sysrq_init(void)
{
if (sysrq_rcu)
return register_sysrq_key('y', &sysrq_rcudump_op);
return 0;
}
early_initcall(rcu_sysrq_init);
/*
* Send along grace-period-related data for rcutorture diagnostics.
*/
......@@ -565,14 +609,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
/*
* Return the root node of the rcu_state structure.
*/
static struct rcu_node *rcu_get_root(void)
{
return &rcu_state.node[0];
}
/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
......@@ -1169,7 +1205,7 @@ static void rcu_check_gp_kthread_starvation(void)
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
rcu_state.gp_flags,
READ_ONCE(rcu_state.gp_flags),
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
if (gpk) {
......@@ -1545,17 +1581,28 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
}
/*
* Awaken the grace-period kthread. Don't do a self-awaken, and don't
* bother awakening when there is nothing for the grace-period kthread
* to do (as in several CPUs raced to awaken, and we lost), and finally
* don't try to awaken a kthread that has not yet been created.
* Awaken the grace-period kthread. Don't do a self-awaken (unless in
* an interrupt or softirq handler), and don't bother awakening when there
* is nothing for the grace-period kthread to do (as in several CPUs raced
* to awaken, and we lost), and finally don't try to awaken a kthread that
* has not yet been created. If all those checks are passed, track some
* debug information and awaken.
*
* So why do the self-wakeup when in an interrupt or softirq handler
* in the grace-period kthread's context? Because the kthread might have
* been interrupted just as it was going to sleep, and just after the final
* pre-sleep check of the awaken condition. In this case, a wakeup really
* is required, and is therefore supplied.
*/
static void rcu_gp_kthread_wake(void)
{
if (current == rcu_state.gp_kthread ||
if ((current == rcu_state.gp_kthread &&
!in_interrupt() && !in_serving_softirq()) ||
!READ_ONCE(rcu_state.gp_flags) ||
!rcu_state.gp_kthread)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
swake_up_one(&rcu_state.gp_wq);
}
......@@ -1699,7 +1746,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
zero_cpu_stall_ticks(rdp);
}
rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap)
if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
rdp->gp_seq_needed = rnp->gp_seq_needed;
WRITE_ONCE(rdp->gpwrap, false);
rcu_gpnum_ovf(rnp, rdp);
......@@ -1927,7 +1974,7 @@ static void rcu_gp_fqs_loop(void)
if (!ret) {
rcu_state.jiffies_force_qs = jiffies + j;
WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
jiffies + 3 * j);
jiffies + (j ? 3 * j : 2));
}
trace_rcu_grace_period(rcu_state.name,
READ_ONCE(rcu_state.gp_seq),
......@@ -2646,16 +2693,11 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
__func__, (long)READ_ONCE(rcu_state.gp_seq),
(long)READ_ONCE(rnp_root->gp_seq_needed),
j - rcu_state.gp_req_activity, j - rcu_state.gp_activity,
rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name,
rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL);
WARN_ON(1);
if (rnp_root != rnp)
raw_spin_unlock_rcu_node(rnp_root);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
show_rcu_gp_kthreads();
}
/*
......
......@@ -230,7 +230,13 @@ struct rcu_data {
/* Leader CPU takes GP-end wakeups. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* 6) Diagnostic data, including RCU CPU stall warnings. */
/* 6) RCU priority boosting. */
struct task_struct *rcu_cpu_kthread_task;
/* rcuc per-CPU kthread or NULL. */
unsigned int rcu_cpu_kthread_status;
char rcu_cpu_has_work;
/* 7) Diagnostic data, including RCU CPU stall warnings. */
unsigned int softirq_snap; /* Snapshot of softirq activity. */
/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
struct irq_work rcu_iw; /* Check for non-irq activity. */
......@@ -299,6 +305,8 @@ struct rcu_state {
struct swait_queue_head gp_wq; /* Where GP task waits. */
short gp_flags; /* Commands for GP task. */
short gp_state; /* GP kthread sleep state. */
unsigned long gp_wake_time; /* Last GP kthread wake. */
unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */
/* End of fields guarded by root rcu_node's lock. */
......@@ -398,13 +406,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
int rcu_dynticks_snap(struct rcu_data *rdp);
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DECLARE_PER_CPU(char, rcu_cpu_has_work);
#endif /* #ifdef CONFIG_RCU_BOOST */
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
static void rcu_qs(void);
......
......@@ -34,17 +34,7 @@
#include "../time/tick-internal.h"
#ifdef CONFIG_RCU_BOOST
#include "../locking/rtmutex_common.h"
/*
* Control variables for per-CPU and per-rcu_node kthreads.
*/
static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DEFINE_PER_CPU(char, rcu_cpu_has_work);
#else /* #ifdef CONFIG_RCU_BOOST */
/*
......@@ -1243,11 +1233,11 @@ static void invoke_rcu_callbacks_kthread(void)
unsigned long flags;
local_irq_save(flags);
__this_cpu_write(rcu_cpu_has_work, 1);
if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
current != __this_cpu_read(rcu_cpu_kthread_task)) {
rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
__this_cpu_read(rcu_cpu_kthread_status));
__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
__this_cpu_read(rcu_data.rcu_cpu_kthread_status));
}
local_irq_restore(flags);
}
......@@ -1258,7 +1248,7 @@ static void invoke_rcu_callbacks_kthread(void)
*/
static bool rcu_is_callbacks_kthread(void)
{
return __this_cpu_read(rcu_cpu_kthread_task) == current;
return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
}
#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
......@@ -1315,12 +1305,12 @@ static void rcu_cpu_kthread_setup(unsigned int cpu)
static void rcu_cpu_kthread_park(unsigned int cpu)
{
per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
}
static int rcu_cpu_kthread_should_run(unsigned int cpu)
{
return __this_cpu_read(rcu_cpu_has_work);
return __this_cpu_read(rcu_data.rcu_cpu_has_work);
}
/*
......@@ -1330,15 +1320,14 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu)
*/
static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
int spincnt;
for (spincnt = 0; spincnt < 10; spincnt++) {
trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
this_cpu_inc(rcu_cpu_kthread_loops);
local_irq_disable();
work = *workp;
*workp = 0;
......@@ -1390,7 +1379,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
}
static struct smp_hotplug_thread rcu_cpu_thread_spec = {
.store = &rcu_cpu_kthread_task,
.store = &rcu_data.rcu_cpu_kthread_task,
.thread_should_run = rcu_cpu_kthread_should_run,
.thread_fn = rcu_cpu_kthread,
.thread_comm = "rcuc/%u",
......@@ -1407,7 +1396,7 @@ static void __init rcu_spawn_boost_kthreads(void)
int cpu;
for_each_possible_cpu(cpu)
per_cpu(rcu_cpu_has_work, cpu) = 0;
per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
return;
rcu_for_each_leaf_node(rnp)
......@@ -1773,22 +1762,24 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
/*
* Offload callback processing from the boot-time-specified set of CPUs
* specified by rcu_nocb_mask. For each CPU in the set, there is a
* kthread created that pulls the callbacks from the corresponding CPU,
* waits for a grace period to elapse, and invokes the callbacks.
* The no-CBs CPUs do a wake_up() on their kthread when they insert
* a callback into any empty list, unless the rcu_nocb_poll boot parameter
* has been specified, in which case each kthread actively polls its
* CPU. (Which isn't so great for energy efficiency, but which does
* reduce RCU's overhead on that CPU.)
* specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
* created that pull the callbacks from the corresponding CPU, wait for
* a grace period to elapse, and invoke the callbacks. These kthreads
* are organized into leaders, which manage incoming callbacks, wait for
* grace periods, and awaken followers, and the followers, which only
* invoke callbacks. Each leader is its own follower. The no-CBs CPUs
* do a wake_up() on their kthread when they insert a callback into any
* empty list, unless the rcu_nocb_poll boot parameter has been specified,
* in which case each kthread actively polls its CPU. (Which isn't so great
* for energy efficiency, but which does reduce RCU's overhead on that CPU.)
*
* This is intended to be used in conjunction with Frederic Weisbecker's
* adaptive-idle work, which would seriously reduce OS jitter on CPUs
* running CPU-bound user-mode computations.
*
* Offloading of callback processing could also in theory be used as
* an energy-efficiency measure because CPUs with no RCU callbacks
* queued are more aggressive about entering dyntick-idle mode.
* Offloading of callbacks can also be used as an energy-efficiency
* measure because CPUs with no RCU callbacks queued are more aggressive
* about entering dyntick-idle mode.
*/
......@@ -1892,10 +1883,7 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
}
/*
* Does the specified CPU need an RCU callback for this invocation
* of rcu_barrier()?
*/
/* Does rcu_barrier need to queue an RCU callback on the specified CPU? */
static bool rcu_nocb_cpu_needs_barrier(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
......@@ -1911,8 +1899,8 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu)
* callbacks would be posted. In the worst case, the first
* barrier in rcu_barrier() suffices (but the caller cannot
* necessarily rely on this, not a substitute for the caller
* getting the concurrency design right!). There must also be
* a barrier between the following load an posting of a callback
* getting the concurrency design right!). There must also be a
* barrier between the following load and posting of a callback
* (if a callback is in fact needed). This is associated with an
* atomic_inc() in the caller.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment