Commit 3add00be authored by Boqun Feng's avatar Boqun Feng

Merge branches 'rcu-doc.2024.02.14a', 'rcu-nocb.2024.02.14a',...

Merge branches 'rcu-doc.2024.02.14a', 'rcu-nocb.2024.02.14a', 'rcu-exp.2024.02.14a', 'rcu-tasks.2024.02.26a' and 'rcu-misc.2024.02.14a' into rcu.2024.02.26a
...@@ -5047,6 +5047,11 @@ ...@@ -5047,6 +5047,11 @@
this kernel boot parameter, forcibly setting it this kernel boot parameter, forcibly setting it
to zero. to zero.
rcutree.enable_rcu_lazy= [KNL]
To save power, batch RCU callbacks and flush after
delay, memory pressure or callback list growing too
big.
rcuscale.gp_async= [KNL] rcuscale.gp_async= [KNL]
Measure performance of asynchronous Measure performance of asynchronous
grace-period primitives such as call_rcu(). grace-period primitives such as call_rcu().
......
...@@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) ...@@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
} }
extern void rcu_sync_init(struct rcu_sync *); extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *);
......
...@@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); ...@@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t);
do { \ do { \
int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
\ \
if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \
likely(!___rttq_nesting)) { \ likely(!___rttq_nesting)) { \
rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \
} else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
!READ_ONCE((t)->trc_reader_special.b.blocked)) { \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \
rcu_tasks_trace_qs_blkd(t); \ rcu_tasks_trace_qs_blkd(t); \
......
...@@ -858,6 +858,8 @@ struct task_struct { ...@@ -858,6 +858,8 @@ struct task_struct {
u8 rcu_tasks_idx; u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu; int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list; struct list_head rcu_tasks_holdout_list;
int rcu_tasks_exit_cpu;
struct list_head rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */ #endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU #ifdef CONFIG_TASKS_TRACE_RCU
......
...@@ -147,6 +147,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { ...@@ -147,6 +147,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
.rcu_tasks_holdout = false, .rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1, .rcu_tasks_idle_cpu = -1,
.rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif #endif
#ifdef CONFIG_TASKS_TRACE_RCU #ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0, .trc_reader_nesting = 0,
......
...@@ -1976,6 +1976,7 @@ static inline void rcu_copy_process(struct task_struct *p) ...@@ -1976,6 +1976,7 @@ static inline void rcu_copy_process(struct task_struct *p)
p->rcu_tasks_holdout = false; p->rcu_tasks_holdout = false;
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
p->rcu_tasks_idle_cpu = -1; p->rcu_tasks_idle_cpu = -1;
INIT_LIST_HEAD(&p->rcu_tasks_exit_list);
#endif /* #ifdef CONFIG_TASKS_RCU */ #endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU #ifdef CONFIG_TASKS_TRACE_RCU
p->trc_reader_nesting = 0; p->trc_reader_nesting = 0;
......
...@@ -314,6 +314,19 @@ config RCU_LAZY ...@@ -314,6 +314,19 @@ config RCU_LAZY
To save power, batch RCU callbacks and flush after delay, memory To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big. pressure, or callback list growing too big.
Requires rcu_nocbs=all to be set.
Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
config RCU_LAZY_DEFAULT_OFF
bool "Turn RCU lazy invocation off by default"
depends on RCU_LAZY
default n
help
Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
it back on.
config RCU_DOUBLE_CHECK_CB_TIME config RCU_DOUBLE_CHECK_CB_TIME
bool "RCU callback-batch backup time check" bool "RCU callback-batch backup time check"
depends on RCU_EXPERT depends on RCU_EXPERT
......
...@@ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void); ...@@ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void);
struct task_struct *get_rcu_tasks_rude_gp_kthread(void); struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
#endif // # ifdef CONFIG_TASKS_RUDE_RCU #endif // # ifdef CONFIG_TASKS_RUDE_RCU
#ifdef CONFIG_TASKS_RCU_GENERIC
void tasks_cblist_init_generic(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void tasks_cblist_init_generic(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
#define RCU_SCHEDULER_INACTIVE 0 #define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1 #define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2 #define RCU_SCHEDULER_RUNNING 2
...@@ -543,11 +549,11 @@ enum rcutorture_type { ...@@ -543,11 +549,11 @@ enum rcutorture_type {
}; };
#if defined(CONFIG_RCU_LAZY) #if defined(CONFIG_RCU_LAZY)
unsigned long rcu_lazy_get_jiffies_till_flush(void); unsigned long rcu_get_jiffies_lazy_flush(void);
void rcu_lazy_set_jiffies_till_flush(unsigned long j); void rcu_set_jiffies_lazy_flush(unsigned long j);
#else #else
static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } static inline unsigned long rcu_get_jiffies_lazy_flush(void) { return 0; }
static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { }
#endif #endif
#if defined(CONFIG_TREE_RCU) #if defined(CONFIG_TREE_RCU)
...@@ -623,12 +629,7 @@ int rcu_get_gp_kthreads_prio(void); ...@@ -623,12 +629,7 @@ int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j); void rcu_fwd_progress_check(unsigned long j);
void rcu_force_quiescent_state(void); void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq; extern struct workqueue_struct *rcu_gp_wq;
#ifdef CONFIG_RCU_EXP_KTHREAD
extern struct kthread_worker *rcu_exp_gp_kworker; extern struct kthread_worker *rcu_exp_gp_kworker;
extern struct kthread_worker *rcu_exp_par_gp_kworker;
#else /* !CONFIG_RCU_EXP_KTHREAD */
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* CONFIG_RCU_EXP_KTHREAD */
void rcu_gp_slow_register(atomic_t *rgssp); void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp); void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */ #endif /* #else #ifdef CONFIG_TINY_RCU */
......
...@@ -764,9 +764,9 @@ kfree_scale_init(void) ...@@ -764,9 +764,9 @@ kfree_scale_init(void)
if (kfree_by_call_rcu) { if (kfree_by_call_rcu) {
/* do a test to check the timeout. */ /* do a test to check the timeout. */
orig_jif = rcu_lazy_get_jiffies_till_flush(); orig_jif = rcu_get_jiffies_lazy_flush();
rcu_lazy_set_jiffies_till_flush(2 * HZ); rcu_set_jiffies_lazy_flush(2 * HZ);
rcu_barrier(); rcu_barrier();
jif_start = jiffies; jif_start = jiffies;
...@@ -775,7 +775,7 @@ kfree_scale_init(void) ...@@ -775,7 +775,7 @@ kfree_scale_init(void)
smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1); smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
rcu_lazy_set_jiffies_till_flush(orig_jif); rcu_set_jiffies_lazy_flush(orig_jif);
if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
......
...@@ -1368,9 +1368,13 @@ rcu_torture_writer(void *arg) ...@@ -1368,9 +1368,13 @@ rcu_torture_writer(void *arg)
struct rcu_torture *rp; struct rcu_torture *rp;
struct rcu_torture *old_rp; struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand); static DEFINE_TORTURE_RANDOM(rand);
unsigned long stallsdone = jiffies;
bool stutter_waited; bool stutter_waited;
unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE]; unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
// If a new stall test is added, this must be adjusted.
if (stall_cpu_holdoff + stall_gp_kthread + stall_cpu)
stallsdone += (stall_cpu_holdoff + stall_gp_kthread + stall_cpu + 60) * HZ;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite) if (!can_expedite)
pr_alert("%s" TORTURE_FLAG pr_alert("%s" TORTURE_FLAG
...@@ -1576,11 +1580,11 @@ rcu_torture_writer(void *arg) ...@@ -1576,11 +1580,11 @@ rcu_torture_writer(void *arg)
!atomic_read(&rcu_fwd_cb_nodelay) && !atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps && !cur_ops->slow_gps &&
!torture_must_stop() && !torture_must_stop() &&
boot_ended) boot_ended &&
time_after(jiffies, stallsdone))
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free) && if (list_empty(&rcu_tortures[i].rtort_free) &&
rcu_access_pointer(rcu_torture_current) != rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) {
&rcu_tortures[i]) {
tracing_off(); tracing_off();
show_rcu_gp_kthreads(); show_rcu_gp_kthreads();
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count); WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
...@@ -2441,7 +2445,8 @@ static struct notifier_block rcu_torture_stall_block = { ...@@ -2441,7 +2445,8 @@ static struct notifier_block rcu_torture_stall_block = {
/* /*
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
* induces a CPU stall for the time specified by stall_cpu. * induces a CPU stall for the time specified by stall_cpu. If a new
* stall test is added, stallsdone in rcu_torture_writer() must be adjusted.
*/ */
static int rcu_torture_stall(void *args) static int rcu_torture_stall(void *args)
{ {
......
...@@ -1234,11 +1234,20 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1234,11 +1234,20 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp) if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
/* /*
* The snapshot for acceleration must be taken _before_ the read of the * It's crucial to capture the snapshot 's' for acceleration before
* current gp sequence used for advancing, otherwise advancing may fail * reading the current gp_seq that is used for advancing. This is
* and acceleration may then fail too. * essential because if the acceleration snapshot is taken after a
* failed advancement attempt, there's a risk that a grace period may
* conclude and a new one may start in the interim. If the snapshot is
* captured after this sequence of events, the acceleration snapshot 's'
* could be excessively advanced, leading to acceleration failure.
* In such a scenario, an 'acceleration leak' can occur, where new
* callbacks become indefinitely stuck in the RCU_NEXT_TAIL segment.
* Also note that encountering advancing failures is a normal
* occurrence when the grace period for RCU_WAIT_TAIL is in progress.
* *
* This could happen if: * To see this, consider the following events which occur if
* rcu_seq_snap() were to be called after advance:
* *
* 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the * 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the
* RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8). * RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8).
...@@ -1264,6 +1273,13 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1264,6 +1273,13 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp) { if (rhp) {
rcu_segcblist_advance(&sdp->srcu_cblist, rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
/*
* Acceleration can never fail because the base current gp_seq
* used for acceleration is <= the value of gp_seq used for
* advancing. This means that RCU_NEXT_TAIL segment will
* always be able to be emptied by the acceleration into the
* RCU_NEXT_READY_TAIL or RCU_WAIT_TAIL segments.
*/
WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s)); WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
} }
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
......
...@@ -24,22 +24,6 @@ void rcu_sync_init(struct rcu_sync *rsp) ...@@ -24,22 +24,6 @@ void rcu_sync_init(struct rcu_sync *rsp)
init_waitqueue_head(&rsp->gp_wait); init_waitqueue_head(&rsp->gp_wait);
} }
/**
* rcu_sync_enter_start - Force readers onto slow path for multiple updates
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* Must be called after rcu_sync_init() and before first use.
*
* Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
* pairs turn into NO-OPs.
*/
void rcu_sync_enter_start(struct rcu_sync *rsp)
{
rsp->gp_count++;
rsp->gp_state = GP_PASSED;
}
static void rcu_sync_func(struct rcu_head *rhp); static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp) static void rcu_sync_call(struct rcu_sync *rsp)
......
...@@ -32,6 +32,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp); ...@@ -32,6 +32,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @rtp_irq_work: IRQ work queue for deferred wakeups. * @rtp_irq_work: IRQ work queue for deferred wakeups.
* @barrier_q_head: RCU callback for barrier operation. * @barrier_q_head: RCU callback for barrier operation.
* @rtp_blkd_tasks: List of tasks blocked as readers. * @rtp_blkd_tasks: List of tasks blocked as readers.
* @rtp_exit_list: List of tasks in the latter portion of do_exit().
* @cpu: CPU number corresponding to this entry. * @cpu: CPU number corresponding to this entry.
* @rtpp: Pointer to the rcu_tasks structure. * @rtpp: Pointer to the rcu_tasks structure.
*/ */
...@@ -46,6 +47,7 @@ struct rcu_tasks_percpu { ...@@ -46,6 +47,7 @@ struct rcu_tasks_percpu {
struct irq_work rtp_irq_work; struct irq_work rtp_irq_work;
struct rcu_head barrier_q_head; struct rcu_head barrier_q_head;
struct list_head rtp_blkd_tasks; struct list_head rtp_blkd_tasks;
struct list_head rtp_exit_list;
int cpu; int cpu;
struct rcu_tasks *rtpp; struct rcu_tasks *rtpp;
}; };
...@@ -144,8 +146,6 @@ static struct rcu_tasks rt_name = \ ...@@ -144,8 +146,6 @@ static struct rcu_tasks rt_name = \
} }
#ifdef CONFIG_TASKS_RCU #ifdef CONFIG_TASKS_RCU
/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */ /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused); static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
...@@ -240,7 +240,6 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp) ...@@ -240,7 +240,6 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
static void cblist_init_generic(struct rcu_tasks *rtp) static void cblist_init_generic(struct rcu_tasks *rtp)
{ {
int cpu; int cpu;
unsigned long flags;
int lim; int lim;
int shift; int shift;
...@@ -266,15 +265,15 @@ static void cblist_init_generic(struct rcu_tasks *rtp) ...@@ -266,15 +265,15 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
WARN_ON_ONCE(!rtpcp); WARN_ON_ONCE(!rtpcp);
if (cpu) if (cpu)
raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock)); raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
local_irq_save(flags); // serialize initialization
if (rcu_segcblist_empty(&rtpcp->cblist)) if (rcu_segcblist_empty(&rtpcp->cblist))
rcu_segcblist_init(&rtpcp->cblist); rcu_segcblist_init(&rtpcp->cblist);
local_irq_restore(flags);
INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
rtpcp->cpu = cpu; rtpcp->cpu = cpu;
rtpcp->rtpp = rtp; rtpcp->rtpp = rtp;
if (!rtpcp->rtp_blkd_tasks.next) if (!rtpcp->rtp_blkd_tasks.next)
INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks); INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
if (!rtpcp->rtp_exit_list.next)
INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
} }
pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name, pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
...@@ -851,10 +850,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) ...@@ -851,10 +850,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// number of voluntary context switches, and add that task to the // number of voluntary context switches, and add that task to the
// holdout list. // holdout list.
// rcu_tasks_postscan(): // rcu_tasks_postscan():
// Invoke synchronize_srcu() to ensure that all tasks that were // Gather per-CPU lists of tasks in do_exit() to ensure that all
// in the process of exiting (and which thus might not know to // tasks that were in the process of exiting (and which thus might
// synchronize with this RCU Tasks grace period) have completed // not know to synchronize with this RCU Tasks grace period) have
// exiting. // completed exiting. The synchronize_rcu() in rcu_tasks_postgp()
// will take care of any tasks stuck in the non-preemptible region
// of do_exit() following its call to exit_tasks_rcu_stop().
// check_all_holdout_tasks(), repeatedly until holdout list is empty: // check_all_holdout_tasks(), repeatedly until holdout list is empty:
// Scans the holdout list, attempting to identify a quiescent state // Scans the holdout list, attempting to identify a quiescent state
// for each task on the list. If there is a quiescent state, the // for each task on the list. If there is a quiescent state, the
...@@ -867,8 +868,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) ...@@ -867,8 +868,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// with interrupts disabled. // with interrupts disabled.
// //
// For each exiting task, the exit_tasks_rcu_start() and // For each exiting task, the exit_tasks_rcu_start() and
// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU // exit_tasks_rcu_finish() functions add and remove, respectively, the
// read-side critical sections waited for by rcu_tasks_postscan(). // current task to a per-CPU list of tasks that rcu_tasks_postscan() must
// wait on. This is necessary because rcu_tasks_postscan() must wait on
// tasks that have already been removed from the global list of tasks.
// //
// Pre-grace-period update-side code is ordered before the grace // Pre-grace-period update-side code is ordered before the grace
// via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code // via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code
...@@ -932,9 +935,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop) ...@@ -932,9 +935,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
} }
} }
void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
/* Processing between scanning taskslist and draining the holdout list. */ /* Processing between scanning taskslist and draining the holdout list. */
static void rcu_tasks_postscan(struct list_head *hop) static void rcu_tasks_postscan(struct list_head *hop)
{ {
int cpu;
int rtsi = READ_ONCE(rcu_task_stall_info); int rtsi = READ_ONCE(rcu_task_stall_info);
if (!IS_ENABLED(CONFIG_TINY_RCU)) { if (!IS_ENABLED(CONFIG_TINY_RCU)) {
...@@ -948,9 +955,9 @@ static void rcu_tasks_postscan(struct list_head *hop) ...@@ -948,9 +955,9 @@ static void rcu_tasks_postscan(struct list_head *hop)
* this, divide the fragile exit path part in two intersecting * this, divide the fragile exit path part in two intersecting
* read side critical sections: * read side critical sections:
* *
* 1) An _SRCU_ read side starting before calling exit_notify(), * 1) A task_struct list addition before calling exit_notify(),
* which may remove the task from the tasklist, and ending after * which may remove the task from the tasklist, with the
* the final preempt_disable() call in do_exit(). * removal after the final preempt_disable() call in do_exit().
* *
* 2) An _RCU_ read side starting with the final preempt_disable() * 2) An _RCU_ read side starting with the final preempt_disable()
* call in do_exit() and ending with the final call to schedule() * call in do_exit() and ending with the final call to schedule()
...@@ -959,7 +966,37 @@ static void rcu_tasks_postscan(struct list_head *hop) ...@@ -959,7 +966,37 @@ static void rcu_tasks_postscan(struct list_head *hop)
* This handles the part 1). And postgp will handle part 2) with a * This handles the part 1). And postgp will handle part 2) with a
* call to synchronize_rcu(). * call to synchronize_rcu().
*/ */
synchronize_srcu(&tasks_rcu_exit_srcu);
for_each_possible_cpu(cpu) {
unsigned long j = jiffies + 1;
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
struct task_struct *t;
struct task_struct *t1;
struct list_head tmp;
raw_spin_lock_irq_rcu_node(rtpcp);
list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) {
if (list_empty(&t->rcu_tasks_holdout_list))
rcu_tasks_pertask(t, hop);
// RT kernels need frequent pauses, otherwise
// pause at least once per pair of jiffies.
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j))
continue;
// Keep our place in the list while pausing.
// Nothing else traverses this list, so adding a
// bare list_head is OK.
list_add(&tmp, &t->rcu_tasks_exit_list);
raw_spin_unlock_irq_rcu_node(rtpcp);
cond_resched(); // For CONFIG_PREEMPT=n kernels
raw_spin_lock_irq_rcu_node(rtpcp);
t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list);
list_del(&tmp);
j = jiffies + 1;
}
raw_spin_unlock_irq_rcu_node(rtpcp);
}
if (!IS_ENABLED(CONFIG_TINY_RCU)) if (!IS_ENABLED(CONFIG_TINY_RCU))
del_timer_sync(&tasks_rcu_exit_srcu_stall_timer); del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
...@@ -1027,7 +1064,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp) ...@@ -1027,7 +1064,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
* *
* In addition, this synchronize_rcu() waits for exiting tasks * In addition, this synchronize_rcu() waits for exiting tasks
* to complete their final preempt_disable() region of execution, * to complete their final preempt_disable() region of execution,
* cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
* enforcing the whole region before tasklist removal until * enforcing the whole region before tasklist removal until
* the final schedule() with TASK_DEAD state to be an RCU TASKS * the final schedule() with TASK_DEAD state to be an RCU TASKS
* read side critical section. * read side critical section.
...@@ -1035,9 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp) ...@@ -1035,9 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
synchronize_rcu(); synchronize_rcu();
} }
void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused) static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
{ {
#ifndef CONFIG_TINY_RCU #ifndef CONFIG_TINY_RCU
...@@ -1118,7 +1151,6 @@ module_param(rcu_tasks_lazy_ms, int, 0444); ...@@ -1118,7 +1151,6 @@ module_param(rcu_tasks_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_kthread(void) static int __init rcu_spawn_tasks_kthread(void)
{ {
cblist_init_generic(&rcu_tasks);
rcu_tasks.gp_sleep = HZ / 10; rcu_tasks.gp_sleep = HZ / 10;
rcu_tasks.init_fract = HZ / 10; rcu_tasks.init_fract = HZ / 10;
if (rcu_tasks_lazy_ms >= 0) if (rcu_tasks_lazy_ms >= 0)
...@@ -1147,25 +1179,48 @@ struct task_struct *get_rcu_tasks_gp_kthread(void) ...@@ -1147,25 +1179,48 @@ struct task_struct *get_rcu_tasks_gp_kthread(void)
EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread); EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
/* /*
* Contribute to protect against tasklist scan blind spot while the * Protect against tasklist scan blind spot while the task is exiting and
* task is exiting and may be removed from the tasklist. See * may be removed from the tasklist. Do this by adding the task to yet
* corresponding synchronize_srcu() for further details. * another list.
*
* Note that the task will remove itself from this list, so there is no
* need for get_task_struct(), except in the case where rcu_tasks_pertask()
* adds it to the holdout list, in which case rcu_tasks_pertask() supplies
* the needed get_task_struct().
*/ */
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) void exit_tasks_rcu_start(void)
{ {
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); unsigned long flags;
struct rcu_tasks_percpu *rtpcp;
struct task_struct *t = current;
WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list));
preempt_disable();
rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
t->rcu_tasks_exit_cpu = smp_processor_id();
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
if (!rtpcp->rtp_exit_list.next)
INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
preempt_enable();
} }
/* /*
* Contribute to protect against tasklist scan blind spot while the * Remove the task from the "yet another list" because do_exit() is now
* task is exiting and may be removed from the tasklist. See * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
* corresponding synchronize_srcu() for further details.
*/ */
void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) void exit_tasks_rcu_stop(void)
{ {
unsigned long flags;
struct rcu_tasks_percpu *rtpcp;
struct task_struct *t = current; struct task_struct *t = current;
__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list));
rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu);
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
list_del_init(&t->rcu_tasks_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
} }
/* /*
...@@ -1282,7 +1337,6 @@ module_param(rcu_tasks_rude_lazy_ms, int, 0444); ...@@ -1282,7 +1337,6 @@ module_param(rcu_tasks_rude_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_rude_kthread(void) static int __init rcu_spawn_tasks_rude_kthread(void)
{ {
cblist_init_generic(&rcu_tasks_rude);
rcu_tasks_rude.gp_sleep = HZ / 10; rcu_tasks_rude.gp_sleep = HZ / 10;
if (rcu_tasks_rude_lazy_ms >= 0) if (rcu_tasks_rude_lazy_ms >= 0)
rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms); rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms);
...@@ -1914,7 +1968,6 @@ module_param(rcu_tasks_trace_lazy_ms, int, 0444); ...@@ -1914,7 +1968,6 @@ module_param(rcu_tasks_trace_lazy_ms, int, 0444);
static int __init rcu_spawn_tasks_trace_kthread(void) static int __init rcu_spawn_tasks_trace_kthread(void)
{ {
cblist_init_generic(&rcu_tasks_trace);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
rcu_tasks_trace.gp_sleep = HZ / 10; rcu_tasks_trace.gp_sleep = HZ / 10;
rcu_tasks_trace.init_fract = HZ / 10; rcu_tasks_trace.init_fract = HZ / 10;
...@@ -2086,6 +2139,24 @@ late_initcall(rcu_tasks_verify_schedule_work); ...@@ -2086,6 +2139,24 @@ late_initcall(rcu_tasks_verify_schedule_work);
static void rcu_tasks_initiate_self_tests(void) { } static void rcu_tasks_initiate_self_tests(void) { }
#endif /* #else #ifdef CONFIG_PROVE_RCU */ #endif /* #else #ifdef CONFIG_PROVE_RCU */
void __init tasks_cblist_init_generic(void)
{
lockdep_assert_irqs_disabled();
WARN_ON(num_online_cpus() > 1);
#ifdef CONFIG_TASKS_RCU
cblist_init_generic(&rcu_tasks);
#endif
#ifdef CONFIG_TASKS_RUDE_RCU
cblist_init_generic(&rcu_tasks_rude);
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
cblist_init_generic(&rcu_tasks_trace);
#endif
}
void __init rcu_init_tasks_generic(void) void __init rcu_init_tasks_generic(void)
{ {
#ifdef CONFIG_TASKS_RCU #ifdef CONFIG_TASKS_RCU
......
...@@ -261,4 +261,5 @@ void __init rcu_init(void) ...@@ -261,4 +261,5 @@ void __init rcu_init(void)
{ {
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_early_boot_tests(); rcu_early_boot_tests();
tasks_cblist_init_generic();
} }
...@@ -145,7 +145,7 @@ static int rcu_scheduler_fully_active __read_mostly; ...@@ -145,7 +145,7 @@ static int rcu_scheduler_fully_active __read_mostly;
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp, static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags); unsigned long gps, unsigned long flags);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static struct task_struct *rcu_boost_task(struct rcu_node *rnp);
static void invoke_rcu_core(void); static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp); static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu); static void sync_sched_exp_online_cleanup(int cpu);
...@@ -2145,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp) ...@@ -2145,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
* Extract the list of ready callbacks, disabling IRQs to prevent * Extract the list of ready callbacks, disabling IRQs to prevent
* races with call_rcu() from interrupt handlers. Leave the * races with call_rcu() from interrupt handlers. Leave the
* callback counts, as rcu_barrier() needs to be conservative. * callback counts, as rcu_barrier() needs to be conservative.
*
* Callbacks execution is fully ordered against preceding grace period
* completion (materialized by rnp->gp_seq update) thanks to the
* smp_mb__after_unlock_lock() upon node locking required for callbacks
* advancing. In NOCB mode this ordering is then further relayed through
* the nocb locking that protects both callbacks advancing and extraction.
*/ */
rcu_nocb_lock_irqsave(rdp, flags); rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
...@@ -2591,12 +2597,26 @@ static int __init rcu_spawn_core_kthreads(void) ...@@ -2591,12 +2597,26 @@ static int __init rcu_spawn_core_kthreads(void)
return 0; return 0;
} }
static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func)
{
rcu_segcblist_enqueue(&rdp->cblist, head);
if (__is_kvfree_rcu_offset((unsigned long)func))
trace_rcu_kvfree_callback(rcu_state.name, head,
(unsigned long)func,
rcu_segcblist_n_cbs(&rdp->cblist));
else
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
}
/* /*
* Handle any core-RCU processing required by a call_rcu() invocation. * Handle any core-RCU processing required by a call_rcu() invocation.
*/ */
static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, static void call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
unsigned long flags) rcu_callback_t func, unsigned long flags)
{ {
rcutree_enqueue(rdp, head, func);
/* /*
* If called from an extended quiescent state, invoke the RCU * If called from an extended quiescent state, invoke the RCU
* core in order to force a re-evaluation of RCU's idleness. * core in order to force a re-evaluation of RCU's idleness.
...@@ -2692,7 +2712,6 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) ...@@ -2692,7 +2712,6 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
unsigned long flags; unsigned long flags;
bool lazy; bool lazy;
struct rcu_data *rdp; struct rcu_data *rdp;
bool was_alldone;
/* Misaligned rcu_head! */ /* Misaligned rcu_head! */
WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
...@@ -2729,30 +2748,18 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) ...@@ -2729,30 +2748,18 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
} }
check_cb_ovld(rdp); check_cb_ovld(rdp);
if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
return; // Enqueued onto ->nocb_bypass, so just leave.
// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
rcu_segcblist_enqueue(&rdp->cblist, head);
if (__is_kvfree_rcu_offset((unsigned long)func))
trace_rcu_kvfree_callback(rcu_state.name, head,
(unsigned long)func,
rcu_segcblist_n_cbs(&rdp->cblist));
else
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); if (unlikely(rcu_rdp_is_offloaded(rdp)))
call_rcu_nocb(rdp, head, func, flags, lazy);
/* Go handle any RCU core processing required. */ else
if (unlikely(rcu_rdp_is_offloaded(rdp))) { call_rcu_core(rdp, head, func, flags);
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
} else {
__call_rcu_core(rdp, head, flags);
local_irq_restore(flags); local_irq_restore(flags);
}
} }
#ifdef CONFIG_RCU_LAZY #ifdef CONFIG_RCU_LAZY
static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF);
module_param(enable_rcu_lazy, bool, 0444);
/** /**
* call_rcu_hurry() - Queue RCU callback for invocation after grace period, and * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
* flush all lazy callbacks (including the new one) to the main ->cblist while * flush all lazy callbacks (including the new one) to the main ->cblist while
...@@ -2778,6 +2785,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) ...@@ -2778,6 +2785,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
__call_rcu_common(head, func, false); __call_rcu_common(head, func, false);
} }
EXPORT_SYMBOL_GPL(call_rcu_hurry); EXPORT_SYMBOL_GPL(call_rcu_hurry);
#else
#define enable_rcu_lazy false
#endif #endif
/** /**
...@@ -2826,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); ...@@ -2826,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
*/ */
void call_rcu(struct rcu_head *head, rcu_callback_t func) void call_rcu(struct rcu_head *head, rcu_callback_t func)
{ {
__call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); __call_rcu_common(head, func, enable_rcu_lazy);
} }
EXPORT_SYMBOL_GPL(call_rcu); EXPORT_SYMBOL_GPL(call_rcu);
...@@ -4394,6 +4403,66 @@ rcu_boot_init_percpu_data(int cpu) ...@@ -4394,6 +4403,66 @@ rcu_boot_init_percpu_data(int cpu)
rcu_boot_init_nocb_percpu_data(rdp); rcu_boot_init_nocb_percpu_data(rdp);
} }
struct kthread_worker *rcu_exp_gp_kworker;
static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
{
struct kthread_worker *kworker;
const char *name = "rcu_exp_par_gp_kthread_worker/%d";
struct sched_param param = { .sched_priority = kthread_prio };
int rnp_index = rnp - rcu_get_root();
if (rnp->exp_kworker)
return;
kworker = kthread_create_worker(0, name, rnp_index);
if (IS_ERR_OR_NULL(kworker)) {
pr_err("Failed to create par gp kworker on %d/%d\n",
rnp->grplo, rnp->grphi);
return;
}
WRITE_ONCE(rnp->exp_kworker, kworker);
if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
}
static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp)
{
struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker);
if (!kworker)
return NULL;
return kworker->task;
}
static void __init rcu_start_exp_gp_kworker(void)
{
const char *name = "rcu_exp_gp_kthread_worker";
struct sched_param param = { .sched_priority = kthread_prio };
rcu_exp_gp_kworker = kthread_create_worker(0, name);
if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
pr_err("Failed to create %s!\n", name);
rcu_exp_gp_kworker = NULL;
return;
}
if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
}
static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp)
{
if (rcu_scheduler_fully_active) {
mutex_lock(&rnp->kthread_mutex);
rcu_spawn_one_boost_kthread(rnp);
rcu_spawn_exp_par_gp_kworker(rnp);
mutex_unlock(&rnp->kthread_mutex);
}
}
/* /*
* Invoked early in the CPU-online process, when pretty much all services * Invoked early in the CPU-online process, when pretty much all services
* are available. The incoming CPU is not present. * are available. The incoming CPU is not present.
...@@ -4442,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu) ...@@ -4442,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_spawn_one_boost_kthread(rnp); rcu_spawn_rnp_kthreads(rnp);
rcu_spawn_cpu_nocb_kthread(cpu); rcu_spawn_cpu_nocb_kthread(cpu);
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1); WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
...@@ -4450,13 +4519,64 @@ int rcutree_prepare_cpu(unsigned int cpu) ...@@ -4450,13 +4519,64 @@ int rcutree_prepare_cpu(unsigned int cpu)
} }
/* /*
* Update RCU priority boot kthread affinity for CPU-hotplug changes. * Update kthreads affinity during CPU-hotplug changes.
*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
* held, so the value of rnp->qsmaskinit will be stable.
*
* We don't include outgoingcpu in the affinity set, use -1 if there is
* no outgoing CPU. If there are no CPUs left in the affinity set,
* this function allows the kthread to execute on any CPU.
*
* Any future concurrent calls are serialized via ->kthread_mutex.
*/ */
static void rcutree_affinity_setting(unsigned int cpu, int outgoing) static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu)
{ {
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); cpumask_var_t cm;
unsigned long mask;
struct rcu_data *rdp;
struct rcu_node *rnp;
struct task_struct *task_boost, *task_exp;
rdp = per_cpu_ptr(&rcu_data, cpu);
rnp = rdp->mynode;
task_boost = rcu_boost_task(rnp);
task_exp = rcu_exp_par_gp_task(rnp);
/*
* If CPU is the boot one, those tasks are created later from early
* initcall since kthreadd must be created first.
*/
if (!task_boost && !task_exp)
return;
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
mutex_lock(&rnp->kthread_mutex);
mask = rcu_rnp_online_cpus(rnp);
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
if (cpumask_empty(cm)) {
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
if (outgoingcpu >= 0)
cpumask_clear_cpu(outgoingcpu, cm);
}
if (task_exp)
set_cpus_allowed_ptr(task_exp, cm);
if (task_boost)
set_cpus_allowed_ptr(task_boost, cm);
mutex_unlock(&rnp->kthread_mutex);
rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); free_cpumask_var(cm);
} }
/* /*
...@@ -4640,8 +4760,9 @@ void rcutree_migrate_callbacks(int cpu) ...@@ -4640,8 +4760,9 @@ void rcutree_migrate_callbacks(int cpu)
__call_rcu_nocb_wake(my_rdp, true, flags); __call_rcu_nocb_wake(my_rdp, true, flags);
} else { } else {
rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */ rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */
raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags); raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
} }
local_irq_restore(flags);
if (needwake) if (needwake)
rcu_gp_kthread_wake(); rcu_gp_kthread_wake();
lockdep_assert_irqs_enabled(); lockdep_assert_irqs_enabled();
...@@ -4730,51 +4851,6 @@ static int rcu_pm_notify(struct notifier_block *self, ...@@ -4730,51 +4851,6 @@ static int rcu_pm_notify(struct notifier_block *self,
return NOTIFY_OK; return NOTIFY_OK;
} }
#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_worker *rcu_exp_gp_kworker;
struct kthread_worker *rcu_exp_par_gp_kworker;
static void __init rcu_start_exp_gp_kworkers(void)
{
const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
struct sched_param param = { .sched_priority = kthread_prio };
rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
pr_err("Failed to create %s!\n", gp_kworker_name);
return;
}
rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
pr_err("Failed to create %s!\n", par_gp_kworker_name);
kthread_destroy_worker(rcu_exp_gp_kworker);
return;
}
sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
&param);
}
static inline void rcu_alloc_par_gp_wq(void)
{
}
#else /* !CONFIG_RCU_EXP_KTHREAD */
struct workqueue_struct *rcu_par_gp_wq;
static void __init rcu_start_exp_gp_kworkers(void)
{
}
static inline void rcu_alloc_par_gp_wq(void)
{
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
}
#endif /* CONFIG_RCU_EXP_KTHREAD */
/* /*
* Spawn the kthreads that handle RCU's grace periods. * Spawn the kthreads that handle RCU's grace periods.
*/ */
...@@ -4809,10 +4885,10 @@ static int __init rcu_spawn_gp_kthread(void) ...@@ -4809,10 +4885,10 @@ static int __init rcu_spawn_gp_kthread(void)
* due to rcu_scheduler_fully_active. * due to rcu_scheduler_fully_active.
*/ */
rcu_spawn_cpu_nocb_kthread(smp_processor_id()); rcu_spawn_cpu_nocb_kthread(smp_processor_id());
rcu_spawn_one_boost_kthread(rdp->mynode); rcu_spawn_rnp_kthreads(rdp->mynode);
rcu_spawn_core_kthreads(); rcu_spawn_core_kthreads();
/* Create kthread worker for expedited GPs */ /* Create kthread worker for expedited GPs */
rcu_start_exp_gp_kworkers(); rcu_start_exp_gp_kworker();
return 0; return 0;
} }
early_initcall(rcu_spawn_gp_kthread); early_initcall(rcu_spawn_gp_kthread);
...@@ -4915,7 +4991,7 @@ static void __init rcu_init_one(void) ...@@ -4915,7 +4991,7 @@ static void __init rcu_init_one(void)
init_waitqueue_head(&rnp->exp_wq[2]); init_waitqueue_head(&rnp->exp_wq[2]);
init_waitqueue_head(&rnp->exp_wq[3]); init_waitqueue_head(&rnp->exp_wq[3]);
spin_lock_init(&rnp->exp_lock); spin_lock_init(&rnp->exp_lock);
mutex_init(&rnp->boost_kthread_mutex); mutex_init(&rnp->kthread_mutex);
raw_spin_lock_init(&rnp->exp_poll_lock); raw_spin_lock_init(&rnp->exp_poll_lock);
rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED; rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp); INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp);
...@@ -5152,7 +5228,6 @@ void __init rcu_init(void) ...@@ -5152,7 +5228,6 @@ void __init rcu_init(void)
/* Create workqueue for Tree SRCU and for expedited GPs. */ /* Create workqueue for Tree SRCU and for expedited GPs. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq); WARN_ON(!rcu_gp_wq);
rcu_alloc_par_gp_wq();
/* Fill in default value for rcutree.qovld boot parameter. */ /* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */ /* -After- the rcu_node ->lock fields are initialized! */
...@@ -5165,6 +5240,8 @@ void __init rcu_init(void) ...@@ -5165,6 +5240,8 @@ void __init rcu_init(void)
(void)start_poll_synchronize_rcu_expedited(); (void)start_poll_synchronize_rcu_expedited();
rcu_test_sync_prims(); rcu_test_sync_prims();
tasks_cblist_init_generic();
} }
#include "tree_stall.h" #include "tree_stall.h"
......
...@@ -21,14 +21,10 @@ ...@@ -21,14 +21,10 @@
#include "rcu_segcblist.h" #include "rcu_segcblist.h"
/* Communicate arguments to a workqueue handler. */ /* Communicate arguments to a kthread worker handler. */
struct rcu_exp_work { struct rcu_exp_work {
unsigned long rew_s; unsigned long rew_s;
#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_work rew_work; struct kthread_work rew_work;
#else
struct work_struct rew_work;
#endif /* CONFIG_RCU_EXP_KTHREAD */
}; };
/* RCU's kthread states for tracing. */ /* RCU's kthread states for tracing. */
...@@ -72,6 +68,9 @@ struct rcu_node { ...@@ -72,6 +68,9 @@ struct rcu_node {
/* Online CPUs for next expedited GP. */ /* Online CPUs for next expedited GP. */
/* Any CPU that has ever been online will */ /* Any CPU that has ever been online will */
/* have its bit set. */ /* have its bit set. */
struct kthread_worker *exp_kworker;
/* Workers performing per node expedited GP */
/* initialization. */
unsigned long cbovldmask; unsigned long cbovldmask;
/* CPUs experiencing callback overload. */ /* CPUs experiencing callback overload. */
unsigned long ffmask; /* Fully functional CPUs. */ unsigned long ffmask; /* Fully functional CPUs. */
...@@ -113,7 +112,7 @@ struct rcu_node { ...@@ -113,7 +112,7 @@ struct rcu_node {
/* side effect, not as a lock. */ /* side effect, not as a lock. */
unsigned long boost_time; unsigned long boost_time;
/* When to start boosting (jiffies). */ /* When to start boosting (jiffies). */
struct mutex boost_kthread_mutex; struct mutex kthread_mutex;
/* Exclusion for thread spawning and affinity */ /* Exclusion for thread spawning and affinity */
/* manipulation. */ /* manipulation. */
struct task_struct *boost_kthread_task; struct task_struct *boost_kthread_task;
...@@ -467,10 +466,9 @@ static void rcu_init_one_nocb(struct rcu_node *rnp); ...@@ -467,10 +466,9 @@ static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j, bool lazy); unsigned long j, bool lazy);
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
bool *was_alldone, unsigned long flags, rcu_callback_t func, unsigned long flags, bool lazy);
bool lazy); static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags); unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
......
...@@ -198,10 +198,9 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, ...@@ -198,10 +198,9 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
} }
if (rnp->parent == NULL) { if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake) { if (wake)
smp_mb(); /* EGP done before wake_up(). */
swake_up_one_online(&rcu_state.expedited_wq); swake_up_one_online(&rcu_state.expedited_wq);
}
break; break;
} }
mask = rnp->grpmask; mask = rnp->grpmask;
...@@ -419,7 +418,6 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) ...@@ -419,7 +418,6 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
static void rcu_exp_sel_wait_wake(unsigned long s); static void rcu_exp_sel_wait_wake(unsigned long s);
#ifdef CONFIG_RCU_EXP_KTHREAD
static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
{ {
struct rcu_exp_work *rewp = struct rcu_exp_work *rewp =
...@@ -428,9 +426,14 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) ...@@ -428,9 +426,14 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
__sync_rcu_exp_select_node_cpus(rewp); __sync_rcu_exp_select_node_cpus(rewp);
} }
static inline bool rcu_gp_par_worker_started(void) static inline bool rcu_exp_worker_started(void)
{
return !!READ_ONCE(rcu_exp_gp_kworker);
}
static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{ {
return !!READ_ONCE(rcu_exp_par_gp_kworker); return !!READ_ONCE(rnp->exp_kworker);
} }
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
...@@ -441,7 +444,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) ...@@ -441,7 +444,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
* another work item on the same kthread worker can result in * another work item on the same kthread worker can result in
* deadlock. * deadlock.
*/ */
kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work); kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
} }
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
...@@ -466,64 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew ...@@ -466,64 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew
kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work); kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
} }
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
}
#else /* !CONFIG_RCU_EXP_KTHREAD */
static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
{
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
__sync_rcu_exp_select_node_cpus(rewp);
}
static inline bool rcu_gp_par_worker_started(void)
{
return !!READ_ONCE(rcu_par_gp_wq);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
{
int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
/* If all offline, queue the work on an unbound CPU. */
if (unlikely(cpu > rnp->grphi - rnp->grplo))
cpu = WORK_CPU_UNBOUND;
else
cpu += rnp->grplo;
queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
{
flush_work(&rnp->rew.rew_work);
}
/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct work_struct *wp)
{
struct rcu_exp_work *rewp;
rewp = container_of(wp, struct rcu_exp_work, rew_work);
rcu_exp_sel_wait_wake(rewp->rew_s);
}
static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
{
INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
queue_work(rcu_gp_wq, &rew->rew_work);
}
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
destroy_work_on_stack(&rew->rew_work);
}
#endif /* CONFIG_RCU_EXP_KTHREAD */
/* /*
* Select the nodes that the upcoming expedited grace period needs * Select the nodes that the upcoming expedited grace period needs
* to wait for. * to wait for.
...@@ -541,7 +486,7 @@ static void sync_rcu_exp_select_cpus(void) ...@@ -541,7 +486,7 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false; rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask)) if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */ continue; /* Avoid early boot non-existent wq. */
if (!rcu_gp_par_worker_started() || if (!rcu_exp_par_worker_started(rnp) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) { rcu_is_last_leaf_node(rnp)) {
/* No worker started yet or last leaf, do direct call. */ /* No worker started yet or last leaf, do direct call. */
...@@ -956,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) ...@@ -956,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
*/ */
void synchronize_rcu_expedited(void) void synchronize_rcu_expedited(void)
{ {
bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT);
unsigned long flags; unsigned long flags;
struct rcu_exp_work rew; struct rcu_exp_work rew;
struct rcu_node *rnp; struct rcu_node *rnp;
...@@ -996,7 +940,7 @@ void synchronize_rcu_expedited(void) ...@@ -996,7 +940,7 @@ void synchronize_rcu_expedited(void)
return; /* Someone else did our work for us. */ return; /* Someone else did our work for us. */
/* Ensure that load happens before action based on it. */ /* Ensure that load happens before action based on it. */
if (unlikely(boottime)) { if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) {
/* Direct call during scheduler init and early_initcalls(). */ /* Direct call during scheduler init and early_initcalls(). */
rcu_exp_sel_wait_wake(s); rcu_exp_sel_wait_wake(s);
} else { } else {
...@@ -1013,9 +957,6 @@ void synchronize_rcu_expedited(void) ...@@ -1013,9 +957,6 @@ void synchronize_rcu_expedited(void)
/* Let the next expedited grace period start. */ /* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex); mutex_unlock(&rcu_state.exp_mutex);
if (likely(!boottime))
synchronize_rcu_expedited_destroy_work(&rew);
} }
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
......
...@@ -256,6 +256,7 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) ...@@ -256,6 +256,7 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
return __wake_nocb_gp(rdp_gp, rdp, force, flags); return __wake_nocb_gp(rdp_gp, rdp, force, flags);
} }
#ifdef CONFIG_RCU_LAZY
/* /*
* LAZY_FLUSH_JIFFIES decides the maximum amount of time that * LAZY_FLUSH_JIFFIES decides the maximum amount of time that
* can elapse before lazy callbacks are flushed. Lazy callbacks * can elapse before lazy callbacks are flushed. Lazy callbacks
...@@ -264,21 +265,20 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) ...@@ -264,21 +265,20 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
* left unsubmitted to RCU after those many jiffies. * left unsubmitted to RCU after those many jiffies.
*/ */
#define LAZY_FLUSH_JIFFIES (10 * HZ) #define LAZY_FLUSH_JIFFIES (10 * HZ)
static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES;
#ifdef CONFIG_RCU_LAZY
// To be called only from test code. // To be called only from test code.
void rcu_lazy_set_jiffies_till_flush(unsigned long jif) void rcu_set_jiffies_lazy_flush(unsigned long jif)
{ {
jiffies_till_flush = jif; jiffies_lazy_flush = jif;
} }
EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush);
unsigned long rcu_lazy_get_jiffies_till_flush(void) unsigned long rcu_get_jiffies_lazy_flush(void)
{ {
return jiffies_till_flush; return jiffies_lazy_flush;
} }
EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush);
#endif #endif
/* /*
...@@ -299,7 +299,7 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, ...@@ -299,7 +299,7 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
*/ */
if (waketype == RCU_NOCB_WAKE_LAZY && if (waketype == RCU_NOCB_WAKE_LAZY &&
rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); mod_timer(&rdp_gp->nocb_timer, jiffies + rcu_get_jiffies_lazy_flush());
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else if (waketype == RCU_NOCB_WAKE_BYPASS) { } else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2); mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
...@@ -482,7 +482,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -482,7 +482,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// flush ->nocb_bypass to ->cblist. // flush ->nocb_bypass to ->cblist.
if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
(ncbs && bypass_is_lazy && (ncbs && bypass_is_lazy &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) ||
ncbs >= qhimark) { ncbs >= qhimark) {
rcu_nocb_lock(rdp); rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
...@@ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// 2. Both of these conditions are met: // 2. Both of these conditions are met:
// a. The bypass list previously had only lazy CBs, and: // a. The bypass list previously had only lazy CBs, and:
// b. The new CB is non-lazy. // b. The new CB is non-lazy.
if (ncbs && (!bypass_is_lazy || lazy)) { if (!ncbs || (bypass_is_lazy && !lazy)) {
local_irq_restore(flags);
} else {
// No-CBs GP kthread might be indefinitely asleep, if so, wake. // No-CBs GP kthread might be indefinitely asleep, if so, wake.
rcu_nocb_lock(rdp); // Rare during call_rcu() flood. rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
if (!rcu_segcblist_pend_cbs(&rdp->cblist)) { if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
...@@ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
} else { } else {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstBQnoWake")); TPS("FirstBQnoWake"));
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
} }
} }
return true; // Callback already enqueued. return true; // Callback already enqueued.
...@@ -566,11 +564,12 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, ...@@ -566,11 +564,12 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
long lazy_len; long lazy_len;
long len; long len;
struct task_struct *t; struct task_struct *t;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
// If we are being polled or there is no kthread, just leave. // If we are being polled or there is no kthread, just leave.
t = READ_ONCE(rdp->nocb_gp_kthread); t = READ_ONCE(rdp->nocb_gp_kthread);
if (rcu_nocb_poll || !t) { if (rcu_nocb_poll || !t) {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeNotPoll")); TPS("WakeNotPoll"));
return; return;
...@@ -583,17 +582,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, ...@@ -583,17 +582,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rdp->qlen_last_fqs_check = len; rdp->qlen_last_fqs_check = len;
// Only lazy CBs in bypass list // Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) { if (lazy_len && bypass_len == lazy_len) {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy")); TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) { } else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */ /* ... if queue was empty ... */
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false); wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty")); TPS("WakeEmpty"));
} else { } else {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred")); TPS("WakeEmptyIsDeferred"));
} }
...@@ -610,20 +609,32 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, ...@@ -610,20 +609,32 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
smp_mb(); /* Enqueue before timer_pending(). */ smp_mb(); /* Enqueue before timer_pending(). */
if ((rdp->nocb_cb_sleep || if ((rdp->nocb_cb_sleep ||
!rcu_segcblist_ready_cbs(&rdp->cblist)) && !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
!timer_pending(&rdp->nocb_timer)) { !timer_pending(&rdp_gp->nocb_timer)) {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred")); TPS("WakeOvfIsDeferred"));
} else { } else {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
} }
} else { } else {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
} }
} }
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
rcu_callback_t func, unsigned long flags, bool lazy)
{
bool was_alldone;
if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) {
/* Not enqueued on bypass but locked, do regular enqueue */
rcutree_enqueue(rdp, head, func);
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
}
}
static int nocb_gp_toggle_rdp(struct rcu_data *rdp, static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
bool *wake_state) bool *wake_state)
{ {
...@@ -723,7 +734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) ...@@ -723,7 +734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
lazy_ncbs = READ_ONCE(rdp->lazy_len); lazy_ncbs = READ_ONCE(rdp->lazy_len);
if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) ||
bypass_ncbs > 2 * qhimark)) { bypass_ncbs > 2 * qhimark)) {
flush_bypass = true; flush_bypass = true;
} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
...@@ -779,7 +790,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) ...@@ -779,7 +790,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rcu_segcblist_ready_cbs(&rdp->cblist)) { if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
needwake = rdp->nocb_cb_sleep; needwake = rdp->nocb_cb_sleep;
WRITE_ONCE(rdp->nocb_cb_sleep, false); WRITE_ONCE(rdp->nocb_cb_sleep, false);
smp_mb(); /* CB invocation -after- GP end. */
} else { } else {
needwake = false; needwake = false;
} }
...@@ -933,8 +943,7 @@ static void nocb_cb_wait(struct rcu_data *rdp) ...@@ -933,8 +943,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
swait_event_interruptible_exclusive(rdp->nocb_cb_wq, swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp)); nocb_cb_wait_cond(rdp));
// VVV Ensure CB invocation follows _sleep test. if (READ_ONCE(rdp->nocb_cb_sleep)) {
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
WARN_ON(signal_pending(current)); WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
} }
...@@ -1383,7 +1392,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) ...@@ -1383,7 +1392,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock_irqrestore(rdp, flags);
continue; continue;
} }
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); rcu_nocb_try_flush_bypass(rdp, jiffies);
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false); wake_nocb_gp(rdp, false);
sc->nr_to_scan -= _count; sc->nr_to_scan -= _count;
...@@ -1768,10 +1777,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -1768,10 +1777,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
return true; return true;
} }
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
bool *was_alldone, unsigned long flags, bool lazy) rcu_callback_t func, unsigned long flags, bool lazy)
{ {
return false; WARN_ON_ONCE(1); /* Should be dead code! */
} }
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
......
...@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) ...@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
struct sched_param sp; struct sched_param sp;
struct task_struct *t; struct task_struct *t;
mutex_lock(&rnp->boost_kthread_mutex); if (rnp->boost_kthread_task)
if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) return;
goto out;
t = kthread_create(rcu_boost_kthread, (void *)rnp, t = kthread_create(rcu_boost_kthread, (void *)rnp,
"rcub/%d", rnp_index); "rcub/%d", rnp_index);
if (WARN_ON_ONCE(IS_ERR(t))) if (WARN_ON_ONCE(IS_ERR(t)))
goto out; return;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t; rnp->boost_kthread_task = t;
...@@ -1210,48 +1209,11 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) ...@@ -1210,48 +1209,11 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
sp.sched_priority = kthread_prio; sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
out:
mutex_unlock(&rnp->boost_kthread_mutex);
} }
/* static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
* held, so the value of rnp->qsmaskinit will be stable.
*
* We don't include outgoingcpu in the affinity set, use -1 if there is
* no outgoing CPU. If there are no CPUs left in the affinity set,
* this function allows the kthread to execute on any CPU.
*
* Any future concurrent calls are serialized via ->boost_kthread_mutex.
*/
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{ {
struct task_struct *t = rnp->boost_kthread_task; return READ_ONCE(rnp->boost_kthread_task);
unsigned long mask;
cpumask_var_t cm;
int cpu;
if (!t)
return;
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
mutex_lock(&rnp->boost_kthread_mutex);
mask = rcu_rnp_online_cpus(rnp);
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
if (cpumask_empty(cm)) {
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
if (outgoingcpu >= 0)
cpumask_clear_cpu(outgoingcpu, cm);
}
set_cpus_allowed_ptr(t, cm);
mutex_unlock(&rnp->boost_kthread_mutex);
free_cpumask_var(cm);
} }
#else /* #ifdef CONFIG_RCU_BOOST */ #else /* #ifdef CONFIG_RCU_BOOST */
...@@ -1270,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) ...@@ -1270,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{ {
} }
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
{ {
return NULL;
} }
#endif /* #else #ifdef CONFIG_RCU_BOOST */ #endif /* #else #ifdef CONFIG_RCU_BOOST */
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment