Commit 3add00be authored by Boqun Feng's avatar Boqun Feng

Merge branches 'rcu-doc.2024.02.14a', 'rcu-nocb.2024.02.14a',...

Merge branches 'rcu-doc.2024.02.14a', 'rcu-nocb.2024.02.14a', 'rcu-exp.2024.02.14a', 'rcu-tasks.2024.02.26a' and 'rcu-misc.2024.02.14a' into rcu.2024.02.26a
......@@ -5047,6 +5047,11 @@
this kernel boot parameter, forcibly setting it
to zero.
rcutree.enable_rcu_lazy= [KNL]
To save power, batch RCU callbacks and flush after
delay, memory pressure or callback list growing too
big.
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
......
......@@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
}
extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
......
......@@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t);
do { \
int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
\
if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \
if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \
likely(!___rttq_nesting)) { \
rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \
rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \
} else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
!READ_ONCE((t)->trc_reader_special.b.blocked)) { \
rcu_tasks_trace_qs_blkd(t); \
......
......@@ -858,6 +858,8 @@ struct task_struct {
u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list;
int rcu_tasks_exit_cpu;
struct list_head rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
......
......@@ -147,6 +147,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
.rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
......
......@@ -1976,6 +1976,7 @@ static inline void rcu_copy_process(struct task_struct *p)
p->rcu_tasks_holdout = false;
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
p->rcu_tasks_idle_cpu = -1;
INIT_LIST_HEAD(&p->rcu_tasks_exit_list);
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
p->trc_reader_nesting = 0;
......
......@@ -314,6 +314,19 @@ config RCU_LAZY
To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big.
Requires rcu_nocbs=all to be set.
Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
config RCU_LAZY_DEFAULT_OFF
bool "Turn RCU lazy invocation off by default"
depends on RCU_LAZY
default n
help
Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
it back on.
config RCU_DOUBLE_CHECK_CB_TIME
bool "RCU callback-batch backup time check"
depends on RCU_EXPERT
......
......@@ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void);
struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
#endif // # ifdef CONFIG_TASKS_RUDE_RCU
#ifdef CONFIG_TASKS_RCU_GENERIC
void tasks_cblist_init_generic(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void tasks_cblist_init_generic(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
#define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2
......@@ -543,11 +549,11 @@ enum rcutorture_type {
};
#if defined(CONFIG_RCU_LAZY)
unsigned long rcu_lazy_get_jiffies_till_flush(void);
void rcu_lazy_set_jiffies_till_flush(unsigned long j);
unsigned long rcu_get_jiffies_lazy_flush(void);
void rcu_set_jiffies_lazy_flush(unsigned long j);
#else
static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
static inline unsigned long rcu_get_jiffies_lazy_flush(void) { return 0; }
static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { }
#endif
#if defined(CONFIG_TREE_RCU)
......@@ -623,12 +629,7 @@ int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j);
void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
#ifdef CONFIG_RCU_EXP_KTHREAD
extern struct kthread_worker *rcu_exp_gp_kworker;
extern struct kthread_worker *rcu_exp_par_gp_kworker;
#else /* !CONFIG_RCU_EXP_KTHREAD */
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* CONFIG_RCU_EXP_KTHREAD */
void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */
......
......@@ -764,9 +764,9 @@ kfree_scale_init(void)
if (kfree_by_call_rcu) {
/* do a test to check the timeout. */
orig_jif = rcu_lazy_get_jiffies_till_flush();
orig_jif = rcu_get_jiffies_lazy_flush();
rcu_lazy_set_jiffies_till_flush(2 * HZ);
rcu_set_jiffies_lazy_flush(2 * HZ);
rcu_barrier();
jif_start = jiffies;
......@@ -775,7 +775,7 @@ kfree_scale_init(void)
smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
rcu_lazy_set_jiffies_till_flush(orig_jif);
rcu_set_jiffies_lazy_flush(orig_jif);
if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
......
......@@ -1368,9 +1368,13 @@ rcu_torture_writer(void *arg)
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
unsigned long stallsdone = jiffies;
bool stutter_waited;
unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
// If a new stall test is added, this must be adjusted.
if (stall_cpu_holdoff + stall_gp_kthread + stall_cpu)
stallsdone += (stall_cpu_holdoff + stall_gp_kthread + stall_cpu + 60) * HZ;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
......@@ -1576,11 +1580,11 @@ rcu_torture_writer(void *arg)
!atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
boot_ended)
boot_ended &&
time_after(jiffies, stallsdone))
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free) &&
rcu_access_pointer(rcu_torture_current) !=
&rcu_tortures[i]) {
rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) {
tracing_off();
show_rcu_gp_kthreads();
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
......@@ -2441,7 +2445,8 @@ static struct notifier_block rcu_torture_stall_block = {
/*
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
* induces a CPU stall for the time specified by stall_cpu.
* induces a CPU stall for the time specified by stall_cpu. If a new
* stall test is added, stallsdone in rcu_torture_writer() must be adjusted.
*/
static int rcu_torture_stall(void *args)
{
......
......@@ -1234,11 +1234,20 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
/*
* The snapshot for acceleration must be taken _before_ the read of the
* current gp sequence used for advancing, otherwise advancing may fail
* and acceleration may then fail too.
* It's crucial to capture the snapshot 's' for acceleration before
* reading the current gp_seq that is used for advancing. This is
* essential because if the acceleration snapshot is taken after a
* failed advancement attempt, there's a risk that a grace period may
* conclude and a new one may start in the interim. If the snapshot is
* captured after this sequence of events, the acceleration snapshot 's'
* could be excessively advanced, leading to acceleration failure.
* In such a scenario, an 'acceleration leak' can occur, where new
* callbacks become indefinitely stuck in the RCU_NEXT_TAIL segment.
* Also note that encountering advancing failures is a normal
* occurrence when the grace period for RCU_WAIT_TAIL is in progress.
*
* This could happen if:
* To see this, consider the following events which occur if
* rcu_seq_snap() were to be called after advance:
*
* 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the
* RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8).
......@@ -1264,6 +1273,13 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp) {
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
/*
* Acceleration can never fail because the base current gp_seq
* used for acceleration is <= the value of gp_seq used for
* advancing. This means that RCU_NEXT_TAIL segment will
* always be able to be emptied by the acceleration into the
* RCU_NEXT_READY_TAIL or RCU_WAIT_TAIL segments.
*/
WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
}
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
......
......@@ -24,22 +24,6 @@ void rcu_sync_init(struct rcu_sync *rsp)
init_waitqueue_head(&rsp->gp_wait);
}
/**
* rcu_sync_enter_start - Force readers onto slow path for multiple updates
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* Must be called after rcu_sync_init() and before first use.
*
* Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
* pairs turn into NO-OPs.
*/
void rcu_sync_enter_start(struct rcu_sync *rsp)
{
rsp->gp_count++;
rsp->gp_state = GP_PASSED;
}
static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp)
......
This diff is collapsed.
......@@ -261,4 +261,5 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_early_boot_tests();
tasks_cblist_init_generic();
}
This diff is collapsed.
......@@ -21,14 +21,10 @@
#include "rcu_segcblist.h"
/* Communicate arguments to a workqueue handler. */
/* Communicate arguments to a kthread worker handler. */
struct rcu_exp_work {
unsigned long rew_s;
#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_work rew_work;
#else
struct work_struct rew_work;
#endif /* CONFIG_RCU_EXP_KTHREAD */
};
/* RCU's kthread states for tracing. */
......@@ -72,6 +68,9 @@ struct rcu_node {
/* Online CPUs for next expedited GP. */
/* Any CPU that has ever been online will */
/* have its bit set. */
struct kthread_worker *exp_kworker;
/* Workers performing per node expedited GP */
/* initialization. */
unsigned long cbovldmask;
/* CPUs experiencing callback overload. */
unsigned long ffmask; /* Fully functional CPUs. */
......@@ -113,7 +112,7 @@ struct rcu_node {
/* side effect, not as a lock. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
struct mutex boost_kthread_mutex;
struct mutex kthread_mutex;
/* Exclusion for thread spawning and affinity */
/* manipulation. */
struct task_struct *boost_kthread_task;
......@@ -467,10 +466,9 @@ static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j, bool lazy);
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags,
bool lazy);
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
rcu_callback_t func, unsigned long flags, bool lazy);
static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
......
......@@ -198,10 +198,9 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
}
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake) {
smp_mb(); /* EGP done before wake_up(). */
if (wake)
swake_up_one_online(&rcu_state.expedited_wq);
}
break;
}
mask = rnp->grpmask;
......@@ -419,7 +418,6 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
static void rcu_exp_sel_wait_wake(unsigned long s);
#ifdef CONFIG_RCU_EXP_KTHREAD
static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
{
struct rcu_exp_work *rewp =
......@@ -428,9 +426,14 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
__sync_rcu_exp_select_node_cpus(rewp);
}
static inline bool rcu_gp_par_worker_started(void)
static inline bool rcu_exp_worker_started(void)
{
return !!READ_ONCE(rcu_exp_gp_kworker);
}
static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{
return !!READ_ONCE(rcu_exp_par_gp_kworker);
return !!READ_ONCE(rnp->exp_kworker);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
......@@ -441,7 +444,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
* another work item on the same kthread worker can result in
* deadlock.
*/
kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
......@@ -466,64 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew
kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
}
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
}
#else /* !CONFIG_RCU_EXP_KTHREAD */
static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
{
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
__sync_rcu_exp_select_node_cpus(rewp);
}
static inline bool rcu_gp_par_worker_started(void)
{
return !!READ_ONCE(rcu_par_gp_wq);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
{
int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
/* If all offline, queue the work on an unbound CPU. */
if (unlikely(cpu > rnp->grphi - rnp->grplo))
cpu = WORK_CPU_UNBOUND;
else
cpu += rnp->grplo;
queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
{
flush_work(&rnp->rew.rew_work);
}
/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct work_struct *wp)
{
struct rcu_exp_work *rewp;
rewp = container_of(wp, struct rcu_exp_work, rew_work);
rcu_exp_sel_wait_wake(rewp->rew_s);
}
static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
{
INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
queue_work(rcu_gp_wq, &rew->rew_work);
}
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
destroy_work_on_stack(&rew->rew_work);
}
#endif /* CONFIG_RCU_EXP_KTHREAD */
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
......@@ -541,7 +486,7 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
if (!rcu_gp_par_worker_started() ||
if (!rcu_exp_par_worker_started(rnp) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) {
/* No worker started yet or last leaf, do direct call. */
......@@ -956,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT);
unsigned long flags;
struct rcu_exp_work rew;
struct rcu_node *rnp;
......@@ -996,7 +940,7 @@ void synchronize_rcu_expedited(void)
return; /* Someone else did our work for us. */
/* Ensure that load happens before action based on it. */
if (unlikely(boottime)) {
if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) {
/* Direct call during scheduler init and early_initcalls(). */
rcu_exp_sel_wait_wake(s);
} else {
......@@ -1013,9 +957,6 @@ void synchronize_rcu_expedited(void)
/* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex);
if (likely(!boottime))
synchronize_rcu_expedited_destroy_work(&rew);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
......
......@@ -256,6 +256,7 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
return __wake_nocb_gp(rdp_gp, rdp, force, flags);
}
#ifdef CONFIG_RCU_LAZY
/*
* LAZY_FLUSH_JIFFIES decides the maximum amount of time that
* can elapse before lazy callbacks are flushed. Lazy callbacks
......@@ -264,21 +265,20 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
* left unsubmitted to RCU after those many jiffies.
*/
#define LAZY_FLUSH_JIFFIES (10 * HZ)
static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES;
#ifdef CONFIG_RCU_LAZY
// To be called only from test code.
void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
void rcu_set_jiffies_lazy_flush(unsigned long jif)
{
jiffies_till_flush = jif;
jiffies_lazy_flush = jif;
}
EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush);
unsigned long rcu_lazy_get_jiffies_till_flush(void)
unsigned long rcu_get_jiffies_lazy_flush(void)
{
return jiffies_till_flush;
return jiffies_lazy_flush;
}
EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush);
#endif
/*
......@@ -299,7 +299,7 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
*/
if (waketype == RCU_NOCB_WAKE_LAZY &&
rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
mod_timer(&rdp_gp->nocb_timer, jiffies + rcu_get_jiffies_lazy_flush());
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
......@@ -482,7 +482,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// flush ->nocb_bypass to ->cblist.
if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
(ncbs && bypass_is_lazy &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) ||
ncbs >= qhimark) {
rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
......@@ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// 2. Both of these conditions are met:
// a. The bypass list previously had only lazy CBs, and:
// b. The new CB is non-lazy.
if (ncbs && (!bypass_is_lazy || lazy)) {
local_irq_restore(flags);
} else {
if (!ncbs || (bypass_is_lazy && !lazy)) {
// No-CBs GP kthread might be indefinitely asleep, if so, wake.
rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
......@@ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
} else {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstBQnoWake"));
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
}
}
return true; // Callback already enqueued.
......@@ -566,11 +564,12 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
long lazy_len;
long len;
struct task_struct *t;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
// If we are being polled or there is no kthread, just leave.
t = READ_ONCE(rdp->nocb_gp_kthread);
if (rcu_nocb_poll || !t) {
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeNotPoll"));
return;
......@@ -583,17 +582,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rdp->qlen_last_fqs_check = len;
// Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
}
......@@ -610,20 +609,32 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
smp_mb(); /* Enqueue before timer_pending(). */
if ((rdp->nocb_cb_sleep ||
!rcu_segcblist_ready_cbs(&rdp->cblist)) &&
!timer_pending(&rdp->nocb_timer)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
!timer_pending(&rdp_gp->nocb_timer)) {
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred"));
} else {
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
} else {
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
}
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
rcu_callback_t func, unsigned long flags, bool lazy)
{
bool was_alldone;
if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) {
/* Not enqueued on bypass but locked, do regular enqueue */
rcutree_enqueue(rdp, head, func);
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
}
}
static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
bool *wake_state)
{
......@@ -723,7 +734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
lazy_ncbs = READ_ONCE(rdp->lazy_len);
if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) ||
bypass_ncbs > 2 * qhimark)) {
flush_bypass = true;
} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
......@@ -779,7 +790,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
needwake = rdp->nocb_cb_sleep;
WRITE_ONCE(rdp->nocb_cb_sleep, false);
smp_mb(); /* CB invocation -after- GP end. */
} else {
needwake = false;
}
......@@ -933,8 +943,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
// VVV Ensure CB invocation follows _sleep test.
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
if (READ_ONCE(rdp->nocb_cb_sleep)) {
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
......@@ -1383,7 +1392,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
rcu_nocb_unlock_irqrestore(rdp, flags);
continue;
}
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
rcu_nocb_try_flush_bypass(rdp, jiffies);
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
sc->nr_to_scan -= _count;
......@@ -1768,10 +1777,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
return true;
}
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags, bool lazy)
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
rcu_callback_t func, unsigned long flags, bool lazy)
{
return false;
WARN_ON_ONCE(1); /* Should be dead code! */
}
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
......
......@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
struct sched_param sp;
struct task_struct *t;
mutex_lock(&rnp->boost_kthread_mutex);
if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
goto out;
if (rnp->boost_kthread_task)
return;
t = kthread_create(rcu_boost_kthread, (void *)rnp,
"rcub/%d", rnp_index);
if (WARN_ON_ONCE(IS_ERR(t)))
goto out;
return;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
......@@ -1210,48 +1209,11 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
out:
mutex_unlock(&rnp->boost_kthread_mutex);
}
/*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
* held, so the value of rnp->qsmaskinit will be stable.
*
* We don't include outgoingcpu in the affinity set, use -1 if there is
* no outgoing CPU. If there are no CPUs left in the affinity set,
* this function allows the kthread to execute on any CPU.
*
* Any future concurrent calls are serialized via ->boost_kthread_mutex.
*/
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
{
struct task_struct *t = rnp->boost_kthread_task;
unsigned long mask;
cpumask_var_t cm;
int cpu;
if (!t)
return;
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
mutex_lock(&rnp->boost_kthread_mutex);
mask = rcu_rnp_online_cpus(rnp);
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
if (cpumask_empty(cm)) {
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
if (outgoingcpu >= 0)
cpumask_clear_cpu(outgoingcpu, cm);
}
set_cpus_allowed_ptr(t, cm);
mutex_unlock(&rnp->boost_kthread_mutex);
free_cpumask_var(cm);
return READ_ONCE(rnp->boost_kthread_task);
}
#else /* #ifdef CONFIG_RCU_BOOST */
......@@ -1270,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{
}
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
{
return NULL;
}
#endif /* #else #ifdef CONFIG_RCU_BOOST */
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment