Commit d38c8fe4 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'doc.2022.06.21a', 'fixes.2022.07.19a', 'nocb.2022.07.19a',...

Merge branches 'doc.2022.06.21a', 'fixes.2022.07.19a', 'nocb.2022.07.19a', 'poll.2022.07.21a', 'rcu-tasks.2022.06.21a' and 'torture.2022.06.21a' into HEAD

doc.2022.06.21a: Documentation updates.
fixes.2022.07.19a: Miscellaneous fixes.
nocb.2022.07.19a: Callback-offload updates.
poll.2022.07.21a: Polled grace-period updates.
rcu-tasks.2022.06.21a: Tasks RCU updates.
torture.2022.06.21a: Torture-test updates.
......@@ -3659,6 +3659,9 @@
just as if they had also been called out in the
rcu_nocbs= boot parameter.
Note that this argument takes precedence over
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
noiotrap [SH] Disables trapped I/O port accesses.
noirqdebug [X86-32] Disables the code which attempts to detect and
......@@ -4557,6 +4560,9 @@
no-callback mode from boot but the mode may be
toggled at runtime via cpusets.
Note that this argument takes precedence over
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
rcu_nocb_poll [KNL]
Rather than requiring that offloaded CPUs
(specified by rcu_nocbs= above) explicitly
......@@ -5799,6 +5805,24 @@
expediting. Set to zero to disable automatic
expediting.
srcutree.srcu_max_nodelay [KNL]
Specifies the number of no-delay instances
per jiffy for which the SRCU grace period
worker thread will be rescheduled with zero
delay. Beyond this limit, worker thread will
be rescheduled with a sleep delay of one jiffy.
srcutree.srcu_max_nodelay_phase [KNL]
Specifies the per-grace-period phase, number of
non-sleeping polls of readers. Beyond this limit,
grace period worker thread will be rescheduled
with a sleep delay of one jiffy, between each
rescan of the readers, for a grace period phase.
srcutree.srcu_retry_check_delay [KNL]
Specifies number of microseconds of non-sleeping
delay between each non-sleeping poll of readers.
srcutree.small_contention_lim [KNL]
Specifies the number of update-side contention
events per jiffy will be tolerated before
......
......@@ -41,6 +41,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
void rcu_barrier_tasks(void);
void rcu_barrier_tasks_rude(void);
void synchronize_rcu(void);
unsigned long get_completed_synchronize_rcu(void);
#ifdef CONFIG_PREEMPT_RCU
......@@ -169,13 +170,24 @@ void synchronize_rcu_tasks(void);
# endif
# ifdef CONFIG_TASKS_TRACE_RCU
# define rcu_tasks_trace_qs(t) \
do { \
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
!unlikely(READ_ONCE((t)->trc_reader_nesting))) { \
smp_store_release(&(t)->trc_reader_checked, true); \
smp_mb(); /* Readers partitioned by store. */ \
} \
// Bits for ->trc_reader_special.b.need_qs field.
#define TRC_NEED_QS 0x1 // Task needs a quiescent state.
#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state.
u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new);
void rcu_tasks_trace_qs_blkd(struct task_struct *t);
# define rcu_tasks_trace_qs(t) \
do { \
int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
\
if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \
likely(!___rttq_nesting)) { \
rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \
} else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
!READ_ONCE((t)->trc_reader_special.b.blocked)) { \
rcu_tasks_trace_qs_blkd(t); \
} \
} while (0)
# else
# define rcu_tasks_trace_qs(t) do { } while (0)
......@@ -184,7 +196,7 @@ void synchronize_rcu_tasks(void);
#define rcu_tasks_qs(t, preempt) \
do { \
rcu_tasks_classic_qs((t), (preempt)); \
rcu_tasks_trace_qs((t)); \
rcu_tasks_trace_qs(t); \
} while (0)
# ifdef CONFIG_TASKS_RUDE_RCU
......
......@@ -75,7 +75,7 @@ static inline void rcu_read_unlock_trace(void)
nesting = READ_ONCE(t->trc_reader_nesting) - 1;
barrier(); // Critical section before disabling.
// Disable IPI-based setting of .need_qs.
WRITE_ONCE(t->trc_reader_nesting, INT_MIN);
WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting);
if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
WRITE_ONCE(t->trc_reader_nesting, nesting);
return; // We assume shallow reader nesting.
......
......@@ -23,6 +23,16 @@ static inline void cond_synchronize_rcu(unsigned long oldstate)
might_sleep();
}
static inline unsigned long start_poll_synchronize_rcu_expedited(void)
{
return start_poll_synchronize_rcu();
}
static inline void cond_synchronize_rcu_expedited(unsigned long oldstate)
{
cond_synchronize_rcu(oldstate);
}
extern void rcu_barrier(void);
static inline void synchronize_rcu_expedited(void)
......@@ -38,7 +48,7 @@ static inline void synchronize_rcu_expedited(void)
*/
extern void kvfree(const void *addr);
static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
if (head) {
call_rcu(head, func);
......@@ -51,6 +61,15 @@ static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
kvfree((void *) func);
}
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
#else
static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
__kvfree_call_rcu(head, func);
}
#endif
void rcu_qs(void);
static inline void rcu_softirq_qs(void)
......
......@@ -40,6 +40,8 @@ bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
bool rcu_gp_might_be_stalled(void);
unsigned long start_poll_synchronize_rcu_expedited(void);
void cond_synchronize_rcu_expedited(unsigned long oldstate);
unsigned long get_state_synchronize_rcu(void);
unsigned long start_poll_synchronize_rcu(void);
bool poll_state_synchronize_rcu(unsigned long oldstate);
......
......@@ -843,8 +843,9 @@ struct task_struct {
int trc_reader_nesting;
int trc_ipi_to_cpu;
union rcu_special trc_reader_special;
bool trc_reader_checked;
struct list_head trc_holdout_list;
struct list_head trc_blkd_node;
int trc_blkd_cpu;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
struct sched_info sched_info;
......@@ -2223,6 +2224,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);
/*
* In order to reduce various lock holder preemption latencies provide an
......
......@@ -157,6 +157,7 @@ struct task_struct init_task
.trc_reader_nesting = 0,
.trc_reader_special.s = 0,
.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
.trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node),
#endif
#ifdef CONFIG_CPUSETS
.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
......
......@@ -1814,6 +1814,7 @@ static inline void rcu_copy_process(struct task_struct *p)
p->trc_reader_nesting = 0;
p->trc_reader_special.s = 0;
INIT_LIST_HEAD(&p->trc_holdout_list);
INIT_LIST_HEAD(&p->trc_blkd_node);
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
......
......@@ -262,6 +262,35 @@ config RCU_NOCB_CPU
Say Y here if you need reduced OS jitter, despite added overhead.
Say N here if you are unsure.
config RCU_NOCB_CPU_DEFAULT_ALL
bool "Offload RCU callback processing from all CPUs by default"
depends on RCU_NOCB_CPU
default n
help
Use this option to offload callback processing from all CPUs
by default, in the absence of the rcu_nocbs or nohz_full boot
parameter. This also avoids the need to use any boot parameters
to achieve the effect of offloading all CPUs on boot.
Say Y here if you want offload all CPUs by default on boot.
Say N here if you are unsure.
config RCU_NOCB_CPU_CB_BOOST
bool "Offload RCU callback from real-time kthread"
depends on RCU_NOCB_CPU && RCU_BOOST
default y if PREEMPT_RT
help
Use this option to invoke offloaded callbacks as SCHED_FIFO
to avoid starvation by heavy SCHED_OTHER background load.
Of course, running as SCHED_FIFO during callback floods will
cause the rcuo[ps] kthreads to monopolize the CPU for hundreds
of milliseconds or more. Therefore, when enabling this option,
it is your responsibility to ensure that latency-sensitive
tasks either run with higher priority or run on some other CPU.
Say Y here if you want to set RT priority for offloading kthreads.
Say N here if you are building a !PREEMPT_RT kernel and are unsure.
config TASKS_TRACE_RCU_READ_MB
bool "Tasks Trace RCU readers use memory barriers in user and idle"
depends on RCU_EXPERT && TASKS_TRACE_RCU
......
......@@ -121,7 +121,7 @@ config RCU_EQS_DEBUG
config RCU_STRICT_GRACE_PERIOD
bool "Provide debug RCU implementation with short grace periods"
depends on DEBUG_KERNEL && RCU_EXPERT && NR_CPUS <= 4
depends on DEBUG_KERNEL && RCU_EXPERT && NR_CPUS <= 4 && !TINY_RCU
default n
select PREEMPT_COUNT if PREEMPT=n
help
......
......@@ -23,6 +23,9 @@
#define RCU_SEQ_CTR_SHIFT 2
#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1)
/* Low-order bit definition for polled grace-period APIs. */
#define RCU_GET_STATE_COMPLETED 0x1
extern int sysctl_sched_rt_runtime;
/*
......@@ -119,6 +122,18 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
return ULONG_CMP_GE(READ_ONCE(*sp), s);
}
/*
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred, but do not allow the
* (ULONG_MAX / 2) safety-factor/guard-band.
*/
static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
{
unsigned long cur_s = READ_ONCE(*sp);
return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_STATE_MASK + 1));
}
/*
* Has a grace period completed since the time the old gp_seq was collected?
*/
......
......@@ -419,6 +419,7 @@ rcu_scale_writer(void *arg)
VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
WARN_ON(!wdpp);
set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
current->flags |= PF_NO_SETAFFINITY;
sched_set_fifo_low(current);
if (holdoff)
......
This diff is collapsed.
......@@ -385,7 +385,7 @@ static struct ref_scale_ops rwsem_ops = {
};
// Definitions for global spinlock
static DEFINE_SPINLOCK(test_lock);
static DEFINE_RAW_SPINLOCK(test_lock);
static void ref_lock_section(const int nloops)
{
......@@ -393,8 +393,8 @@ static void ref_lock_section(const int nloops)
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock(&test_lock);
spin_unlock(&test_lock);
raw_spin_lock(&test_lock);
raw_spin_unlock(&test_lock);
}
preempt_enable();
}
......@@ -405,9 +405,9 @@ static void ref_lock_delay_section(const int nloops, const int udl, const int nd
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock(&test_lock);
raw_spin_lock(&test_lock);
un_delay(udl, ndl);
spin_unlock(&test_lock);
raw_spin_unlock(&test_lock);
}
preempt_enable();
}
......@@ -427,8 +427,8 @@ static void ref_lock_irq_section(const int nloops)
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock_irqsave(&test_lock, flags);
spin_unlock_irqrestore(&test_lock, flags);
raw_spin_lock_irqsave(&test_lock, flags);
raw_spin_unlock_irqrestore(&test_lock, flags);
}
preempt_enable();
}
......@@ -440,9 +440,9 @@ static void ref_lock_irq_delay_section(const int nloops, const int udl, const in
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock_irqsave(&test_lock, flags);
raw_spin_lock_irqsave(&test_lock, flags);
un_delay(udl, ndl);
spin_unlock_irqrestore(&test_lock, flags);
raw_spin_unlock_irqrestore(&test_lock, flags);
}
preempt_enable();
}
......
......@@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
return sum;
}
#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
/*
* We use an adaptive strategy for synchronize_srcu() and especially for
* synchronize_srcu_expedited(). We spin for a fixed time period
* (defined below, boot time configurable) to allow SRCU readers to exit
* their read-side critical sections. If there are still some readers
* after one jiffy, we repeatedly block for one jiffy time periods.
* The blocking time is increased as the grace-period age increases,
* with max blocking time capped at 10 jiffies.
*/
#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5
static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
module_param(srcu_retry_check_delay, ulong, 0444);
#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
// no-delay instances.
#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
// no-delay instances.
#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low))
#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high))
#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
// called from process_srcu().
#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \
(2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
// Maximum per-GP-phase consecutive no-delay instances.
#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \
SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \
SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
module_param(srcu_max_nodelay_phase, ulong, 0444);
// Maximum consecutive no-delay instances.
#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
module_param(srcu_max_nodelay, ulong, 0444);
/*
* Return grace-period delay, zero if there are expedited grace
......@@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
unsigned long gpstart;
unsigned long j;
unsigned long jbase = SRCU_INTERVAL;
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
jbase = 0;
if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
if (!jbase) {
WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
jbase = 1;
if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
j = jiffies - 1;
gpstart = READ_ONCE(ssp->srcu_gp_start);
if (time_after(j, gpstart))
jbase += j - gpstart;
if (!jbase) {
WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
jbase = 1;
}
}
return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
......@@ -606,15 +654,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
* We use an adaptive strategy for synchronize_srcu() and especially for
* synchronize_srcu_expedited(). We spin for a fixed time period
* (defined below) to allow SRCU readers to exit their read-side critical
* sections. If there are still some readers after a few microseconds,
* we repeatedly block for 1-millisecond time periods.
*/
#define SRCU_RETRY_CHECK_DELAY 5
/*
* Start an SRCU grace period.
*/
......@@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
*/
static void srcu_gp_end(struct srcu_struct *ssp)
{
unsigned long cbdelay;
unsigned long cbdelay = 1;
bool cbs;
bool last_lvl;
int cpu;
......@@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
cbdelay = !!srcu_get_delay(ssp);
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
cbdelay = 0;
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
......@@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
unsigned long curdelay;
curdelay = !srcu_get_delay(ssp);
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
return true;
if (--trycount + !srcu_get_delay(ssp) <= 0)
if ((--trycount + curdelay) <= 0)
return false;
udelay(SRCU_RETRY_CHECK_DELAY);
udelay(srcu_retry_check_delay);
}
}
......@@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work)
j = jiffies;
if (READ_ONCE(ssp->reschedule_jiffies) == j) {
WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
WRITE_ONCE(ssp->reschedule_count, 1);
......@@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void)
pr_info("Hierarchical SRCU implementation.\n");
if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
return 0;
}
early_initcall(srcu_bootup_announce);
......
This diff is collapsed.
......@@ -58,7 +58,7 @@ void rcu_qs(void)
rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
raise_softirq_irqoff(RCU_SOFTIRQ);
}
WRITE_ONCE(rcu_ctrlblk.gp_seq, rcu_ctrlblk.gp_seq + 1);
WRITE_ONCE(rcu_ctrlblk.gp_seq, rcu_ctrlblk.gp_seq + 2);
local_irq_restore(flags);
}
......@@ -139,8 +139,10 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
/*
* Wait for a grace period to elapse. But it is illegal to invoke
* synchronize_rcu() from within an RCU read-side critical section.
* Therefore, any legal call to synchronize_rcu() is a quiescent
* state, and so on a UP system, synchronize_rcu() need do nothing.
* Therefore, any legal call to synchronize_rcu() is a quiescent state,
* and so on a UP system, synchronize_rcu() need do nothing, other than
* let the polled APIs know that another grace period elapsed.
*
* (But Lai Jiangshan points out the benefits of doing might_sleep()
* to reduce latency.)
*
......@@ -152,6 +154,7 @@ void synchronize_rcu(void)
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU read-side critical section");
WRITE_ONCE(rcu_ctrlblk.gp_seq, rcu_ctrlblk.gp_seq + 2);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
......@@ -213,10 +216,24 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
*/
bool poll_state_synchronize_rcu(unsigned long oldstate)
{
return READ_ONCE(rcu_ctrlblk.gp_seq) != oldstate;
return oldstate == RCU_GET_STATE_COMPLETED || READ_ONCE(rcu_ctrlblk.gp_seq) != oldstate;
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
if (head) {
void *ptr = (void *) head - (unsigned long) func;
kasan_record_aux_stack_noalloc(ptr);
}
__kvfree_call_rcu(head, func);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
#endif
void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
......
This diff is collapsed.
......@@ -133,6 +133,10 @@ struct rcu_node {
wait_queue_head_t exp_wq[4];
struct rcu_exp_work rew;
bool exp_need_flush; /* Need to flush workitem? */
raw_spinlock_t exp_poll_lock;
/* Lock and data for polled expedited grace periods. */
unsigned long exp_seq_poll_rq;
struct work_struct exp_poll_wq;
} ____cacheline_internodealigned_in_smp;
/*
......@@ -235,6 +239,7 @@ struct rcu_data {
* if rdp_gp.
*/
struct list_head nocb_entry_rdp; /* rcu_data node in wakeup chain. */
struct rcu_data *nocb_toggling_rdp; /* rdp queued for (de-)offloading */
/* The following fields are used by CB kthread, hence new cacheline. */
struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp;
......@@ -323,6 +328,9 @@ struct rcu_state {
short gp_state; /* GP kthread sleep state. */
unsigned long gp_wake_time; /* Last GP kthread wake. */
unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */
unsigned long gp_seq_polled; /* GP seq for polled API. */
unsigned long gp_seq_polled_snap; /* ->gp_seq_polled at normal GP start. */
unsigned long gp_seq_polled_exp_snap; /* ->gp_seq_polled at expedited GP start. */
/* End of fields guarded by root rcu_node's lock. */
......@@ -425,7 +433,7 @@ static void rcu_flavor_sched_clock_irq(int user);
static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static bool rcu_is_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(struct rcu_data *rdp);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
......@@ -481,3 +489,6 @@ static void rcu_iw_handler(struct irq_work *iwp);
static void check_cpu_stall(struct rcu_data *rdp);
static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
const unsigned long gpssdelay);
/* Forward declarations for tree_exp.h. */
static void sync_rcu_do_polled_gp(struct work_struct *wp);
......@@ -18,6 +18,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_exp_gp_seq_start(void)
{
rcu_seq_start(&rcu_state.expedited_sequence);
rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
}
/*
......@@ -34,6 +35,7 @@ static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
*/
static void rcu_exp_gp_seq_end(void)
{
rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
rcu_seq_end(&rcu_state.expedited_sequence);
smp_mb(); /* Ensure that consecutive grace periods serialize. */
}
......@@ -621,7 +623,6 @@ static void synchronize_rcu_expedited_wait(void)
return;
if (rcu_stall_is_suppressed())
continue;
panic_on_rcu_stall();
trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rcu_state.name);
......@@ -636,10 +637,11 @@ static void synchronize_rcu_expedited_wait(void)
continue;
ndetected++;
rdp = per_cpu_ptr(&rcu_data, cpu);
pr_cont(" %d-%c%c%c", cpu,
pr_cont(" %d-%c%c%c%c", cpu,
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
"D."[!!(rdp->cpu_no_qs.b.exp)]);
}
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
......@@ -669,6 +671,7 @@ static void synchronize_rcu_expedited_wait(void)
}
}
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
panic_on_rcu_stall();
}
}
......@@ -913,8 +916,18 @@ void synchronize_rcu_expedited(void)
"Illegal synchronize_rcu_expedited() in RCU read-side critical section");
/* Is the state is such that the call is a grace period? */
if (rcu_blocking_is_gp())
return;
if (rcu_blocking_is_gp()) {
// Note well that this code runs with !PREEMPT && !SMP.
// In addition, all code that advances grace periods runs
// at process level. Therefore, this expedited GP overlaps
// with other expedited GPs only by being fully nested within
// them, which allows reuse of ->gp_seq_polled_exp_snap.
rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
if (rcu_init_invoked())
cond_resched();
return; // Context allows vacuous grace periods.
}
/* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) {
......@@ -950,3 +963,93 @@ void synchronize_rcu_expedited(void)
synchronize_rcu_expedited_destroy_work(&rew);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
/*
* Ensure that start_poll_synchronize_rcu_expedited() has the expedited
* RCU grace periods that it needs.
*/
static void sync_rcu_do_polled_gp(struct work_struct *wp)
{
unsigned long flags;
int i = 0;
struct rcu_node *rnp = container_of(wp, struct rcu_node, exp_poll_wq);
unsigned long s;
raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
s = rnp->exp_seq_poll_rq;
rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
if (s == RCU_GET_STATE_COMPLETED)
return;
while (!poll_state_synchronize_rcu(s)) {
synchronize_rcu_expedited();
if (i == 10 || i == 20)
pr_info("%s: i = %d s = %lx gp_seq_polled = %lx\n", __func__, i, s, READ_ONCE(rcu_state.gp_seq_polled));
i++;
}
raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
s = rnp->exp_seq_poll_rq;
if (poll_state_synchronize_rcu(s))
rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
}
/**
* start_poll_synchronize_rcu_expedited - Snapshot current RCU state and start expedited grace period
*
* Returns a cookie to pass to a call to cond_synchronize_rcu(),
* cond_synchronize_rcu_expedited(), or poll_state_synchronize_rcu(),
* allowing them to determine whether or not any sort of grace period has
* elapsed in the meantime. If the needed expedited grace period is not
* already slated to start, initiates that grace period.
*/
unsigned long start_poll_synchronize_rcu_expedited(void)
{
unsigned long flags;
struct rcu_data *rdp;
struct rcu_node *rnp;
unsigned long s;
s = get_state_synchronize_rcu();
rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
rnp = rdp->mynode;
if (rcu_init_invoked())
raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
if (!poll_state_synchronize_rcu(s)) {
rnp->exp_seq_poll_rq = s;
if (rcu_init_invoked())
queue_work(rcu_gp_wq, &rnp->exp_poll_wq);
}
if (rcu_init_invoked())
raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
return s;
}
EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited);
/**
* cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period
*
* @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
*
* If any type of full RCU grace period has elapsed since the earlier
* call to get_state_synchronize_rcu(), start_poll_synchronize_rcu(),
* or start_poll_synchronize_rcu_expedited(), just return. Otherwise,
* invoke synchronize_rcu_expedited() to wait for a full grace period.
*
* Yes, this function does not take counter wrap into account.
* But counter wrap is harmless. If the counter wraps, we have waited for
* more than 2 billion grace periods (and way more on a 64-bit system!),
* so waiting for a couple of additional grace periods should be just fine.
*
* This function provides the same memory-ordering guarantees that
* would be provided by a synchronize_rcu() that was invoked at the call
* to the function that provided @oldstate and that returned at the end
* of this function.
*/
void cond_synchronize_rcu_expedited(unsigned long oldstate)
{
if (!poll_state_synchronize_rcu(oldstate))
synchronize_rcu_expedited();
}
EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited);
This diff is collapsed.
......@@ -460,7 +460,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
* be quite short, for example, in the case of the call from
* rcu_read_unlock_special().
*/
static void
static notrace void
rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
{
bool empty_exp;
......@@ -581,7 +581,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* is disabled. This function cannot be expected to understand these
* nuances, so the caller must handle them.
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
......@@ -595,7 +595,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
* evaluate safety in terms of interrupt, softirq, and preemption
* disabling.
*/
static void rcu_preempt_deferred_qs(struct task_struct *t)
static notrace void rcu_preempt_deferred_qs(struct task_struct *t)
{
unsigned long flags;
......@@ -899,8 +899,8 @@ void rcu_note_context_switch(bool preempt)
this_cpu_write(rcu_data.rcu_urgent_qs, false);
if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
rcu_momentary_dyntick_idle();
rcu_tasks_qs(current, preempt);
out:
rcu_tasks_qs(current, preempt);
trace_rcu_utilization(TPS("End context switch"));
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
......@@ -926,7 +926,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
* Because there is no preemptible RCU, there can be no deferred quiescent
* states.
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
......@@ -935,7 +935,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
// period for a quiescent state from this CPU. Note that requests from
// tasks are handled when removing the task from the blocked-tasks list
// below.
static void rcu_preempt_deferred_qs(struct task_struct *t)
static notrace void rcu_preempt_deferred_qs(struct task_struct *t)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
......@@ -1012,6 +1012,25 @@ static void rcu_cpu_kthread_setup(unsigned int cpu)
WRITE_ONCE(rdp->rcuc_activity, jiffies);
}
static bool rcu_is_callbacks_nocb_kthread(struct rcu_data *rdp)
{
#ifdef CONFIG_RCU_NOCB_CPU
return rdp->nocb_cb_kthread == current;
#else
return false;
#endif
}
/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
static bool rcu_is_callbacks_kthread(struct rcu_data *rdp)
{
return rdp->rcu_cpu_kthread_task == current ||
rcu_is_callbacks_nocb_kthread(rdp);
}
#ifdef CONFIG_RCU_BOOST
/*
......@@ -1140,7 +1159,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
(!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
(!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld ||
IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)))) {
if (rnp->exp_tasks == NULL)
WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
......@@ -1151,15 +1171,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
}
}
/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
static bool rcu_is_callbacks_kthread(void)
{
return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
}
#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
/*
......@@ -1242,11 +1253,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
static bool rcu_is_callbacks_kthread(void)
{
return false;
}
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
{
}
......
......@@ -516,6 +516,19 @@ int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls.
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot);
module_param(rcu_cpu_stall_suppress_at_boot, int, 0444);
/**
* get_completed_synchronize_rcu - Return a pre-completed polled state cookie
*
* Returns a value that will always be treated by functions like
* poll_state_synchronize_rcu() as a cookie whose grace period has already
* completed.
*/
unsigned long get_completed_synchronize_rcu(void)
{
return RCU_GET_STATE_COMPLETED;
}
EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu);
#ifdef CONFIG_PROVE_RCU
/*
......
......@@ -4263,6 +4263,38 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
return ret;
}
/**
* cpu_curr_snapshot - Return a snapshot of the currently running task
* @cpu: The CPU on which to snapshot the task.
*
* Returns the task_struct pointer of the task "currently" running on
* the specified CPU. If the same task is running on that CPU throughout,
* the return value will be a pointer to that task's task_struct structure.
* If the CPU did any context switches even vaguely concurrently with the
* execution of this function, the return value will be a pointer to the
* task_struct structure of a randomly chosen task that was running on
* that CPU somewhere around the time that this function was executing.
*
* If the specified CPU was offline, the return value is whatever it
* is, perhaps a pointer to the task_struct structure of that CPU's idle
* task, but there is no guarantee. Callers wishing a useful return
* value must take some action to ensure that the specified CPU remains
* online throughout.
*
* This function executes full memory barriers before and after fetching
* the pointer, which permits the caller to confine this function's fetch
* with respect to the caller's accesses to other shared variables.
*/
struct task_struct *cpu_curr_snapshot(int cpu)
{
struct task_struct *t;
smp_mb(); /* Pairing determined by caller's synchronization design. */
t = rcu_dereference(cpu_curr(cpu));
smp_mb(); /* Pairing determined by caller's synchronization design. */
return t;
}
/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
......
......@@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str)
if (val)
static_branch_enable(&csdlock_debug_enabled);
return 0;
return 1;
}
early_param("csdlock_debug", csdlock_debug);
__setup("csdlock_debug=", csdlock_debug);
static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
......
......@@ -35,7 +35,7 @@ then
exit 1
fi
# Remember where we started so that we can get back and the end.
# Remember where we started so that we can get back at the end.
curcommit="`git status | head -1 | awk '{ print $NF }'`"
nfail=0
......@@ -73,15 +73,10 @@ do
# Test the specified commit.
git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
echo git checkout return code: $? "(Commit $ntry: $i)"
kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1
ret=$?
echo kvm.sh return code $ret for commit $i from branch $gitbr
# Move the build products to their resting place.
runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`"
mv $runresdir $resdir/$ds/$idir
rrd="`echo $runresdir | sed -e 's,^.*/,,'`"
echo Run results: $resdir/$ds/$idir/$rrd
echo Run results: $resdir/$ds/$idir
if test "$ret" -ne 0
then
# Failure, so leave all evidence intact.
......
......@@ -262,6 +262,7 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log"
# Wait for all remaining scenarios to complete and collect results.
for i in $systems
do
echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
while checkremotefile "$i" "$resdir/$ds/remote.run"
do
sleep 30
......
......@@ -164,7 +164,7 @@ do
shift
;;
--gdb)
TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG
TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG
TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
;;
......@@ -180,7 +180,7 @@ do
shift
;;
--kasan)
TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
if test -n "$torture_qemu_mem_default"
then
TORTURE_QEMU_MEM=2G
......@@ -192,7 +192,7 @@ do
shift
;;
--kcsan)
TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
--kmake-arg|--kmake-args)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment