Commit 3fbfbf7a authored by Paul E. McKenney's avatar Paul E. McKenney Committed by Paul E. McKenney

rcu: Add callback-free CPUs

RCU callback execution can add significant OS jitter and also can
degrade both scheduling latency and, in asymmetric multiprocessors,
energy efficiency.  This commit therefore adds the ability for selected
CPUs ("rcu_nocbs=" boot parameter) to have their callbacks offloaded
to kthreads.  If the "rcu_nocb_poll" boot parameter is also specified,
these kthreads will do polling, removing the need for the offloaded
CPUs to do wakeups.  At least one CPU must be doing normal callback
processing: currently CPU 0 cannot be selected as a no-CBs CPU.
In addition, attempts to offline the last normal-CBs CPU will fail.

This feature was inspired by Jim Houston's and Joe Korty's JRCU, and
this commit includes fixes to problems located by Fengguang Wu's
kbuild test robot.

[ paulmck: Added gfp.h include file as suggested by Fengguang Wu. ]
Signed-off-by: default avatarPaul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent aac1cda3
...@@ -2394,6 +2394,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2394,6 +2394,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
See Documentation/blockdev/ramdisk.txt. See Documentation/blockdev/ramdisk.txt.
rcu_nocbs= [KNL,BOOT]
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
the specified list of CPUs to be no-callback CPUs.
Invocation of these CPUs' RCU callbacks will
be offloaded to "rcuoN" kthreads created for
that purpose. This reduces OS jitter on the
offloaded CPUs, which can be useful for HPC and
real-time workloads. It can also improve energy
efficiency for asymmetric multiprocessors.
rcu_nocbs_poll [KNL,BOOT]
Rather than requiring that offloaded CPUs
(specified by rcu_nocbs= above) explicitly
awaken the corresponding "rcuoN" kthreads,
make these kthreads poll for callbacks.
This improves the real-time response for the
offloaded CPUs by relieving them of the need to
wake up the corresponding kthread, but degrades
energy efficiency by requiring that the kthreads
periodically wake up to do the polling.
rcutree.blimit= [KNL,BOOT] rcutree.blimit= [KNL,BOOT]
Set maximum number of finished RCU callbacks to process Set maximum number of finished RCU callbacks to process
in one batch. in one batch.
......
...@@ -549,6 +549,7 @@ TRACE_EVENT(rcu_torture_read, ...@@ -549,6 +549,7 @@ TRACE_EVENT(rcu_torture_read,
* "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
* "Inc1": rcu_barrier_callback() piggyback check counter incremented. * "Inc1": rcu_barrier_callback() piggyback check counter incremented.
* "Offline": rcu_barrier_callback() found offline CPU * "Offline": rcu_barrier_callback() found offline CPU
* "OnlineNoCB": rcu_barrier_callback() found online no-CBs CPU.
* "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
* "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
* "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
......
...@@ -654,6 +654,28 @@ config RCU_BOOST_DELAY ...@@ -654,6 +654,28 @@ config RCU_BOOST_DELAY
Accept the default if unsure. Accept the default if unsure.
config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs"
depends on TREE_RCU || TREE_PREEMPT_RCU
default n
help
Use this option to reduce OS jitter for aggressive HPC or
real-time workloads. It can also be used to offload RCU
callback invocation to energy-efficient CPUs in battery-powered
asymmetric multiprocessors.
This option offloads callback invocation from the set of
CPUs specified at boot time by the rcu_nocbs parameter.
For each such CPU, a kthread ("rcuoN") will be created to
invoke callbacks, where the "N" is the CPU being offloaded.
Nothing prevents this kthread from running on the specified
CPUs, but (1) the kthreads may be preempted between each
callback, and (2) affinity or cgroups can be used to force
the kthreads to run on whatever set of CPUs is desired.
Say Y here if you want reduced OS jitter on selected CPUs.
Say N here if you are unsure.
endmenu # "RCU Subsystem" endmenu # "RCU Subsystem"
config IKCONFIG config IKCONFIG
......
...@@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); ...@@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
static int static int
cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
{ {
return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
rdp->nxttail[RCU_DONE_TAIL] != NULL;
} }
/* /*
...@@ -312,8 +313,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) ...@@ -312,8 +313,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
static int static int
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{ {
return *rdp->nxttail[RCU_DONE_TAIL + struct rcu_head **ntp;
(ACCESS_ONCE(rsp->completed) != rdp->completed)] &&
ntp = rdp->nxttail[RCU_DONE_TAIL +
(ACCESS_ONCE(rsp->completed) != rdp->completed)];
return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
!rcu_gp_in_progress(rsp); !rcu_gp_in_progress(rsp);
} }
...@@ -1123,6 +1127,7 @@ static void init_callback_list(struct rcu_data *rdp) ...@@ -1123,6 +1127,7 @@ static void init_callback_list(struct rcu_data *rdp)
rdp->nxtlist = NULL; rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++) for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist; rdp->nxttail[i] = &rdp->nxtlist;
init_nocb_callback_list(rdp);
} }
/* /*
...@@ -1633,6 +1638,10 @@ static void ...@@ -1633,6 +1638,10 @@ static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_data *rdp) struct rcu_node *rnp, struct rcu_data *rdp)
{ {
/* No-CBs CPUs do not have orphanable callbacks. */
if (is_nocb_cpu(rdp->cpu))
return;
/* /*
* Orphan the callbacks. First adjust the counts. This is safe * Orphan the callbacks. First adjust the counts. This is safe
* because _rcu_barrier() excludes CPU-hotplug operations, so it * because _rcu_barrier() excludes CPU-hotplug operations, so it
...@@ -1684,6 +1693,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) ...@@ -1684,6 +1693,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
int i; int i;
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
/* No-CBs CPUs are handled specially. */
if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
return;
/* Do the accounting first. */ /* Do the accounting first. */
rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen_lazy += rsp->qlen_lazy;
rdp->qlen += rsp->qlen; rdp->qlen += rsp->qlen;
...@@ -2162,9 +2175,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, ...@@ -2162,9 +2175,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
} }
} }
/*
* Helper function for call_rcu() and friends. The cpu argument will
* normally be -1, indicating "currently running CPU". It may specify
* a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
* is expected to specify a CPU.
*/
static void static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp, bool lazy) struct rcu_state *rsp, int cpu, bool lazy)
{ {
unsigned long flags; unsigned long flags;
struct rcu_data *rdp; struct rcu_data *rdp;
...@@ -2184,9 +2203,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), ...@@ -2184,9 +2203,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rdp = this_cpu_ptr(rsp->rda); rdp = this_cpu_ptr(rsp->rda);
/* Add the callback to our list. */ /* Add the callback to our list. */
if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
int offline;
if (cpu != -1)
rdp = per_cpu_ptr(rsp->rda, cpu);
offline = !__call_rcu_nocb(rdp, head, lazy);
WARN_ON_ONCE(offline);
/* _call_rcu() is illegal on offline CPU; leak the callback. */ /* _call_rcu() is illegal on offline CPU; leak the callback. */
WARN_ON_ONCE(1);
local_irq_restore(flags); local_irq_restore(flags);
return; return;
} }
...@@ -2215,7 +2239,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), ...@@ -2215,7 +2239,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*/ */
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{ {
__call_rcu(head, func, &rcu_sched_state, 0); __call_rcu(head, func, &rcu_sched_state, -1, 0);
} }
EXPORT_SYMBOL_GPL(call_rcu_sched); EXPORT_SYMBOL_GPL(call_rcu_sched);
...@@ -2224,7 +2248,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); ...@@ -2224,7 +2248,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
*/ */
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{ {
__call_rcu(head, func, &rcu_bh_state, 0); __call_rcu(head, func, &rcu_bh_state, -1, 0);
} }
EXPORT_SYMBOL_GPL(call_rcu_bh); EXPORT_SYMBOL_GPL(call_rcu_bh);
...@@ -2676,9 +2700,17 @@ static void _rcu_barrier(struct rcu_state *rsp) ...@@ -2676,9 +2700,17 @@ static void _rcu_barrier(struct rcu_state *rsp)
* When that callback is invoked, we will know that all of the * When that callback is invoked, we will know that all of the
* corresponding CPU's preceding callbacks have been invoked. * corresponding CPU's preceding callbacks have been invoked.
*/ */
for_each_online_cpu(cpu) { for_each_possible_cpu(cpu) {
if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
continue;
rdp = per_cpu_ptr(rsp->rda, cpu); rdp = per_cpu_ptr(rsp->rda, cpu);
if (ACCESS_ONCE(rdp->qlen)) { if (is_nocb_cpu(cpu)) {
_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
rsp->n_barrier_done);
atomic_inc(&rsp->barrier_cpu_count);
__call_rcu(&rdp->barrier_head, rcu_barrier_callback,
rsp, cpu, 0);
} else if (ACCESS_ONCE(rdp->qlen)) {
_rcu_barrier_trace(rsp, "OnlineQ", cpu, _rcu_barrier_trace(rsp, "OnlineQ", cpu,
rsp->n_barrier_done); rsp->n_barrier_done);
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
...@@ -2752,6 +2784,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) ...@@ -2752,6 +2784,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
#endif #endif
rdp->cpu = cpu; rdp->cpu = cpu;
rdp->rsp = rsp; rdp->rsp = rsp;
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags); raw_spin_unlock_irqrestore(&rnp->lock, flags);
} }
...@@ -2833,6 +2866,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, ...@@ -2833,6 +2866,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp; struct rcu_state *rsp;
int ret = NOTIFY_OK;
trace_rcu_utilization("Start CPU hotplug"); trace_rcu_utilization("Start CPU hotplug");
switch (action) { switch (action) {
...@@ -2846,7 +2880,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, ...@@ -2846,7 +2880,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_boost_kthread_setaffinity(rnp, -1); rcu_boost_kthread_setaffinity(rnp, -1);
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
rcu_boost_kthread_setaffinity(rnp, cpu); if (nocb_cpu_expendable(cpu))
rcu_boost_kthread_setaffinity(rnp, cpu);
else
ret = NOTIFY_BAD;
break; break;
case CPU_DYING: case CPU_DYING:
case CPU_DYING_FROZEN: case CPU_DYING_FROZEN:
...@@ -2870,7 +2907,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, ...@@ -2870,7 +2907,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
break; break;
} }
trace_rcu_utilization("End CPU hotplug"); trace_rcu_utilization("End CPU hotplug");
return NOTIFY_OK; return ret;
} }
/* /*
...@@ -2890,6 +2927,7 @@ static int __init rcu_spawn_gp_kthread(void) ...@@ -2890,6 +2927,7 @@ static int __init rcu_spawn_gp_kthread(void)
raw_spin_lock_irqsave(&rnp->lock, flags); raw_spin_lock_irqsave(&rnp->lock, flags);
rsp->gp_kthread = t; rsp->gp_kthread = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags); raw_spin_unlock_irqrestore(&rnp->lock, flags);
rcu_spawn_nocb_kthreads(rsp);
} }
return 0; return 0;
} }
...@@ -3085,6 +3123,7 @@ void __init rcu_init(void) ...@@ -3085,6 +3123,7 @@ void __init rcu_init(void)
rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt(); __rcu_init_preempt();
rcu_init_nocb();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/* /*
......
...@@ -317,6 +317,18 @@ struct rcu_data { ...@@ -317,6 +317,18 @@ struct rcu_data {
struct rcu_head oom_head; struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
struct rcu_head *nocb_head; /* CBs waiting for kthread. */
struct rcu_head **nocb_tail;
atomic_long_t nocb_q_count; /* # CBs waiting for kthread */
atomic_long_t nocb_q_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int cpu; int cpu;
struct rcu_state *rsp; struct rcu_state *rsp;
}; };
...@@ -369,6 +381,12 @@ struct rcu_state { ...@@ -369,6 +381,12 @@ struct rcu_state {
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
void (*func)(struct rcu_head *head)); void (*func)(struct rcu_head *head));
#ifdef CONFIG_RCU_NOCB_CPU
void (*call_remote)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
/* call_rcu() flavor, but for */
/* placing on remote CPU. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* The following fields are guarded by the root rcu_node's lock. */ /* The following fields are guarded by the root rcu_node's lock. */
...@@ -439,6 +457,8 @@ struct rcu_state { ...@@ -439,6 +457,8 @@ struct rcu_state {
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ #define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
extern struct list_head rcu_struct_flavors; extern struct list_head rcu_struct_flavors;
/* Sequence through rcu_state structures for each RCU flavor. */
#define for_each_rcu_flavor(rsp) \ #define for_each_rcu_flavor(rsp) \
list_for_each_entry((rsp), &rcu_struct_flavors, flavors) list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
...@@ -515,5 +535,32 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); ...@@ -515,5 +535,32 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void); static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void); static void increment_cpu_stall_ticks(void);
static bool is_nocb_cpu(int cpu);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp);
static bool nocb_cpu_expendable(int cpu);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void init_nocb_callback_list(struct rcu_data *rdp);
static void __init rcu_init_nocb(void);
#endif /* #ifndef RCU_TREE_NONCORE */ #endif /* #ifndef RCU_TREE_NONCORE */
#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_NOCB_CPU
/* Sum up queue lengths for tracing. */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
*qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
}
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
{
*ql = 0;
*qll = 0;
}
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
#endif /* #ifdef CONFIG_RCU_TRACE */
This diff is collapsed.
...@@ -113,6 +113,8 @@ static char convert_kthread_status(unsigned int kthread_status) ...@@ -113,6 +113,8 @@ static char convert_kthread_status(unsigned int kthread_status)
static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{ {
long ql, qll;
if (!rdp->beenonline) if (!rdp->beenonline)
return; return;
seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
...@@ -126,8 +128,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) ...@@ -126,8 +128,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs); rdp->dynticks_fqs);
seq_printf(m, " of=%lu", rdp->offline_fqs); seq_printf(m, " of=%lu", rdp->offline_fqs);
rcu_nocb_q_lengths(rdp, &ql, &qll);
qll += rdp->qlen_lazy;
ql += rdp->qlen;
seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
rdp->qlen_lazy, rdp->qlen, qll, ql,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
rdp->nxttail[RCU_NEXT_TAIL]], rdp->nxttail[RCU_NEXT_TAIL]],
".R"[rdp->nxttail[RCU_WAIT_TAIL] != ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment