Commit c06aed0e authored by Paul E. McKenney's avatar Paul E. McKenney

rcu: Compute jiffies_till_sched_qs from other kernel parameters

The jiffies_till_sched_qs value used to determine how old a grace period
must be before RCU enlists the help of the scheduler to force a quiescent
state on the holdout CPU.  Currently, this defaults to HZ/10 regardless of
system size and may be set only at boot time.  This can be a problem for
very large systems, because if the values of the jiffies_till_first_fqs
and jiffies_till_next_fqs kernel parameters are left at their defaults,
they are calculated to increase as the number of CPUs actually configured
on the system increases.  Thus, on a sufficiently large system, RCU would
enlist the help of the scheduler before the grace-period kthread had a
chance to scan for idle CPUs, which wastes CPU time.

This commit therefore allows jiffies_till_sched_qs to be set, if desired,
but if left as default, computes is as jiffies_till_first_fqs plus twice
jiffies_till_next_fqs, thus allowing three force-quiescent-state scans
for idle CPUs.  This scales with the number of CPUs, providing sensible
default values.
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 74de6960
...@@ -3595,7 +3595,14 @@ ...@@ -3595,7 +3595,14 @@
Set required age in jiffies for a Set required age in jiffies for a
given grace period before RCU starts given grace period before RCU starts
soliciting quiescent-state help from soliciting quiescent-state help from
rcu_note_context_switch(). rcu_note_context_switch(). If not specified, the
kernel will calculate a value based on the most
recent settings of rcutree.jiffies_till_first_fqs
and rcutree.jiffies_till_next_fqs.
This calculated value may be viewed in
rcutree.jiffies_to_sched_qs. Any attempt to
set rcutree.jiffies_to_sched_qs will be
cheerfully overwritten.
rcutree.jiffies_till_first_fqs= [KNL] rcutree.jiffies_till_first_fqs= [KNL]
Set delay from grace-period initialization to Set delay from grace-period initialization to
......
...@@ -396,13 +396,47 @@ static ulong jiffies_till_first_fqs = ULONG_MAX; ...@@ -396,13 +396,47 @@ static ulong jiffies_till_first_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads; static bool rcu_kick_kthreads;
/*
* How long the grace period must be before we start recruiting
* quiescent-state help from rcu_note_context_switch().
*/
static ulong jiffies_till_sched_qs = ULONG_MAX;
module_param(jiffies_till_sched_qs, ulong, 0444);
static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
/*
* Make sure that we give the grace-period kthread time to detect any
* idle CPUs before taking active measures to force quiescent states.
* However, don't go below 100 milliseconds, adjusted upwards for really
* large systems.
*/
static void adjust_jiffies_till_sched_qs(void)
{
unsigned long j;
/* If jiffies_till_sched_qs was specified, respect the request. */
if (jiffies_till_sched_qs != ULONG_MAX) {
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
return;
}
j = READ_ONCE(jiffies_till_first_fqs) +
2 * READ_ONCE(jiffies_till_next_fqs);
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
WRITE_ONCE(jiffies_to_sched_qs, j);
}
static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp) static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
{ {
ulong j; ulong j;
int ret = kstrtoul(val, 0, &j); int ret = kstrtoul(val, 0, &j);
if (!ret) if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
adjust_jiffies_till_sched_qs();
}
return ret; return ret;
} }
...@@ -411,8 +445,10 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param ...@@ -411,8 +445,10 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
ulong j; ulong j;
int ret = kstrtoul(val, 0, &j); int ret = kstrtoul(val, 0, &j);
if (!ret) if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
adjust_jiffies_till_sched_qs();
}
return ret; return ret;
} }
...@@ -430,13 +466,6 @@ module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_fi ...@@ -430,13 +466,6 @@ module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_fi
module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644); module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param(rcu_kick_kthreads, bool, 0644); module_param(rcu_kick_kthreads, bool, 0644);
/*
* How long the grace period must be before we start recruiting
* quiescent-state help from rcu_note_context_switch().
*/
static ulong jiffies_till_sched_qs = HZ / 10;
module_param(jiffies_till_sched_qs, ulong, 0444);
static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
static void force_quiescent_state(void); static void force_quiescent_state(void);
static int rcu_pending(void); static int rcu_pending(void);
...@@ -1041,16 +1070,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) ...@@ -1041,16 +1070,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
/* /*
* A CPU running for an extended time within the kernel can * A CPU running for an extended time within the kernel can
* delay RCU grace periods: (1) At age jiffies_till_sched_qs, * delay RCU grace periods: (1) At age jiffies_to_sched_qs,
* set .rcu_urgent_qs, (2) At age 2*jiffies_till_sched_qs, set * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set
* both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the * both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the
* unsynchronized assignments to the per-CPU rcu_need_heavy_qs * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
* variable are safe because the assignments are repeated if this * variable are safe because the assignments are repeated if this
* CPU failed to pass through a quiescent state. This code * CPU failed to pass through a quiescent state. This code
* also checks .jiffies_resched in case jiffies_till_sched_qs * also checks .jiffies_resched in case jiffies_to_sched_qs
* is set way high. * is set way high.
*/ */
jtsq = jiffies_till_sched_qs; jtsq = READ_ONCE(jiffies_to_sched_qs);
ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
if (!READ_ONCE(*rnhqp) && if (!READ_ONCE(*rnhqp) &&
...@@ -1236,7 +1265,7 @@ static void print_other_cpu_stall(unsigned long gp_seq) ...@@ -1236,7 +1265,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
gpa = READ_ONCE(rcu_state.gp_activity); gpa = READ_ONCE(rcu_state.gp_activity);
pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
rcu_state.name, j - gpa, j, gpa, rcu_state.name, j - gpa, j, gpa,
jiffies_till_next_fqs, READ_ONCE(jiffies_till_next_fqs),
rcu_get_root()->qsmask); rcu_get_root()->qsmask);
/* In this case, the current CPU might be at fault. */ /* In this case, the current CPU might be at fault. */
sched_show_task(current); sched_show_task(current);
...@@ -1874,7 +1903,7 @@ static void rcu_gp_fqs_loop(void) ...@@ -1874,7 +1903,7 @@ static void rcu_gp_fqs_loop(void)
struct rcu_node *rnp = rcu_get_root(); struct rcu_node *rnp = rcu_get_root();
first_gp_fqs = true; first_gp_fqs = true;
j = jiffies_till_first_fqs; j = READ_ONCE(jiffies_till_first_fqs);
ret = 0; ret = 0;
for (;;) { for (;;) {
if (!ret) { if (!ret) {
...@@ -1908,7 +1937,7 @@ static void rcu_gp_fqs_loop(void) ...@@ -1908,7 +1937,7 @@ static void rcu_gp_fqs_loop(void)
cond_resched_tasks_rcu_qs(); cond_resched_tasks_rcu_qs();
WRITE_ONCE(rcu_state.gp_activity, jiffies); WRITE_ONCE(rcu_state.gp_activity, jiffies);
ret = 0; /* Force full wait till next FQS. */ ret = 0; /* Force full wait till next FQS. */
j = jiffies_till_next_fqs; j = READ_ONCE(jiffies_till_next_fqs);
} else { } else {
/* Deal with stray signal. */ /* Deal with stray signal. */
cond_resched_tasks_rcu_qs(); cond_resched_tasks_rcu_qs();
...@@ -3579,6 +3608,8 @@ static void __init rcu_init_geometry(void) ...@@ -3579,6 +3608,8 @@ static void __init rcu_init_geometry(void)
jiffies_till_first_fqs = d; jiffies_till_first_fqs = d;
if (jiffies_till_next_fqs == ULONG_MAX) if (jiffies_till_next_fqs == ULONG_MAX)
jiffies_till_next_fqs = d; jiffies_till_next_fqs = d;
if (jiffies_till_sched_qs == ULONG_MAX)
adjust_jiffies_till_sched_qs();
/* If the compile-time values are accurate, just leave. */ /* If the compile-time values are accurate, just leave. */
if (rcu_fanout_leaf == RCU_FANOUT_LEAF && if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
......
...@@ -105,6 +105,8 @@ static void __init rcu_bootup_announce_oddness(void) ...@@ -105,6 +105,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs); pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
if (jiffies_till_next_fqs != ULONG_MAX) if (jiffies_till_next_fqs != ULONG_MAX)
pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs); pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
if (jiffies_till_sched_qs != ULONG_MAX)
pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
if (rcu_kick_kthreads) if (rcu_kick_kthreads)
pr_info("\tKick kthreads if too-long grace period.\n"); pr_info("\tKick kthreads if too-long grace period.\n");
if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment