Commit b770efc4 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'doc.2021.07.20c', 'fixes.2021.08.06a', 'nocb.2021.07.20c',...

Merge branches 'doc.2021.07.20c', 'fixes.2021.08.06a', 'nocb.2021.07.20c', 'nolibc.2021.07.20c', 'tasks.2021.07.20c', 'torture.2021.07.27a' and 'torturescript.2021.07.27a' into HEAD

doc.2021.07.20c: Documentation updates.
fixes.2021.08.06a: Miscellaneous fixes.
nocb.2021.07.20c: Callback-offloading (NOCB CPU) updates.
nolibc.2021.07.20c: Tiny userspace library updates.
tasks.2021.07.20c: Tasks RCU updates.
torture.2021.07.27a: In-kernel torture-test updates.
torturescript.2021.07.27a: Torture-test scripting updates.
...@@ -10,15 +10,6 @@ ...@@ -10,15 +10,6 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
/*
* Why is there no list_empty_rcu()? Because list_empty() serves this
* purpose. The list_empty() function fetches the RCU-protected pointer
* and compares it to the address of the list head, but neither dereferences
* this pointer itself nor provides this pointer to the caller. Therefore,
* it is not necessary to use rcu_dereference(), so that list_empty() can
* be used anywhere you would want to use a list_empty_rcu().
*/
/* /*
* INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
* @list: list to be initialized * @list: list to be initialized
...@@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, ...@@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
/* /*
* Where are list_empty_rcu() and list_first_entry_rcu()? * Where are list_empty_rcu() and list_first_entry_rcu()?
* *
* Implementing those functions following their counterparts list_empty() and * They do not exist because they would lead to subtle race conditions:
* list_first_entry() is not advisable because they lead to subtle race
* conditions as the following snippet shows:
* *
* if (!list_empty_rcu(mylist)) { * if (!list_empty_rcu(mylist)) {
* struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
* do_something(bar); * do_something(bar);
* } * }
* *
* The list may not be empty when list_empty_rcu checks it, but it may be when * The list might be non-empty when list_empty_rcu() checks it, but it
* list_first_entry_rcu rereads the ->next pointer. * might have become empty by the time that list_first_entry_rcu() rereads
* * the ->next pointer, which would result in a SEGV.
* Rereading the ->next pointer is not a problem for list_empty() and *
* list_first_entry() because they would be protected by a lock that blocks * When not using RCU, it is OK for list_first_entry() to re-read that
* writers. * pointer because both functions should be protected by some lock that
* blocks writers.
*
* When using RCU, list_empty() uses READ_ONCE() to fetch the
* RCU-protected ->next pointer and then compares it to the address of the
* list head. However, it neither dereferences this pointer nor provides
* this pointer to its caller. Thus, READ_ONCE() suffices (that is,
* rcu_dereference() is not needed), which means that list_empty() can be
* used anywhere you would want to use list_empty_rcu(). Just don't
* expect anything useful to happen if you do a subsequent lockless
* call to list_first_entry_rcu()!!!
* *
* See list_first_or_null_rcu for an alternative. * See list_first_or_null_rcu for an alternative.
*/ */
......
...@@ -53,7 +53,7 @@ void __rcu_read_unlock(void); ...@@ -53,7 +53,7 @@ void __rcu_read_unlock(void);
* nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
* types of kernel builds, the rcu_read_lock() nesting depth is unknowable. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
*/ */
#define rcu_preempt_depth() (current->rcu_read_lock_nesting) #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
#else /* #ifdef CONFIG_PREEMPT_RCU */ #else /* #ifdef CONFIG_PREEMPT_RCU */
...@@ -167,7 +167,7 @@ void synchronize_rcu_tasks(void); ...@@ -167,7 +167,7 @@ void synchronize_rcu_tasks(void);
# define synchronize_rcu_tasks synchronize_rcu # define synchronize_rcu_tasks synchronize_rcu
# endif # endif
# ifdef CONFIG_TASKS_RCU_TRACE # ifdef CONFIG_TASKS_TRACE_RCU
# define rcu_tasks_trace_qs(t) \ # define rcu_tasks_trace_qs(t) \
do { \ do { \
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
......
...@@ -14,9 +14,6 @@ ...@@ -14,9 +14,6 @@
#include <asm/param.h> /* for HZ */ #include <asm/param.h> /* for HZ */
/* Never flag non-existent other CPUs! */
static inline bool rcu_eqs_special_set(int cpu) { return false; }
unsigned long get_state_synchronize_rcu(void); unsigned long get_state_synchronize_rcu(void);
unsigned long start_poll_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void);
bool poll_state_synchronize_rcu(unsigned long oldstate); bool poll_state_synchronize_rcu(unsigned long oldstate);
......
...@@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) ...@@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
int idx; int idx;
idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
return idx; return idx;
} }
...@@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, ...@@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
{ {
int idx; int idx;
idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1;
pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
tt, tf, idx, tt, tf, idx,
READ_ONCE(ssp->srcu_lock_nesting[!idx]), data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])),
READ_ONCE(ssp->srcu_lock_nesting[idx])); data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
} }
#endif #endif
...@@ -59,7 +59,7 @@ static struct task_struct **writer_tasks; ...@@ -59,7 +59,7 @@ static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks; static struct task_struct **reader_tasks;
static bool lock_is_write_held; static bool lock_is_write_held;
static bool lock_is_read_held; static atomic_t lock_is_read_held;
static unsigned long last_lock_release; static unsigned long last_lock_release;
struct lock_stress_stats { struct lock_stress_stats {
...@@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg) ...@@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg)
if (WARN_ON_ONCE(lock_is_write_held)) if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++; lwsp->n_lock_fail++;
lock_is_write_held = true; lock_is_write_held = true;
if (WARN_ON_ONCE(lock_is_read_held)) if (WARN_ON_ONCE(atomic_read(&lock_is_read_held)))
lwsp->n_lock_fail++; /* rare, but... */ lwsp->n_lock_fail++; /* rare, but... */
lwsp->n_lock_acquired++; lwsp->n_lock_acquired++;
...@@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg) ...@@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg)
schedule_timeout_uninterruptible(1); schedule_timeout_uninterruptible(1);
cxt.cur_ops->readlock(tid); cxt.cur_ops->readlock(tid);
lock_is_read_held = true; atomic_inc(&lock_is_read_held);
if (WARN_ON_ONCE(lock_is_write_held)) if (WARN_ON_ONCE(lock_is_write_held))
lrsp->n_lock_fail++; /* rare, but... */ lrsp->n_lock_fail++; /* rare, but... */
lrsp->n_lock_acquired++; lrsp->n_lock_acquired++;
cxt.cur_ops->read_delay(&rand); cxt.cur_ops->read_delay(&rand);
lock_is_read_held = false; atomic_dec(&lock_is_read_held);
cxt.cur_ops->readunlock(tid); cxt.cur_ops->readunlock(tid);
stutter_wait("lock_torture_reader"); stutter_wait("lock_torture_reader");
...@@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg) ...@@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg)
static void __torture_print_stats(char *page, static void __torture_print_stats(char *page,
struct lock_stress_stats *statp, bool write) struct lock_stress_stats *statp, bool write)
{ {
long cur;
bool fail = false; bool fail = false;
int i, n_stress; int i, n_stress;
long max = 0, min = statp ? statp[0].n_lock_acquired : 0; long max = 0, min = statp ? data_race(statp[0].n_lock_acquired) : 0;
long long sum = 0; long long sum = 0;
n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
for (i = 0; i < n_stress; i++) { for (i = 0; i < n_stress; i++) {
if (statp[i].n_lock_fail) if (data_race(statp[i].n_lock_fail))
fail = true; fail = true;
sum += statp[i].n_lock_acquired; cur = data_race(statp[i].n_lock_acquired);
if (max < statp[i].n_lock_acquired) sum += cur;
max = statp[i].n_lock_acquired; if (max < cur)
if (min > statp[i].n_lock_acquired) max = cur;
min = statp[i].n_lock_acquired; if (min > cur)
min = cur;
} }
page += sprintf(page, page += sprintf(page,
"%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
...@@ -996,7 +998,6 @@ static int __init lock_torture_init(void) ...@@ -996,7 +998,6 @@ static int __init lock_torture_init(void)
} }
if (nreaders_stress) { if (nreaders_stress) {
lock_is_read_held = false;
cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress, cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress,
sizeof(*cxt.lrsa), sizeof(*cxt.lrsa),
GFP_KERNEL); GFP_KERNEL);
......
...@@ -487,7 +487,7 @@ rcu_scale_writer(void *arg) ...@@ -487,7 +487,7 @@ rcu_scale_writer(void *arg)
if (gp_async) { if (gp_async) {
cur_ops->gp_barrier(); cur_ops->gp_barrier();
} }
writer_n_durations[me] = i_max; writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer"); torture_kthread_stopping("rcu_scale_writer");
return 0; return 0;
} }
...@@ -561,7 +561,7 @@ rcu_scale_cleanup(void) ...@@ -561,7 +561,7 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i]; wdpp = writer_durations[i];
if (!wdpp) if (!wdpp)
continue; continue;
for (j = 0; j <= writer_n_durations[i]; j++) { for (j = 0; j < writer_n_durations[i]; j++) {
wdp = &wdpp[j]; wdp = &wdpp[j];
pr_alert("%s%s %4d writer-duration: %5d %llu\n", pr_alert("%s%s %4d writer-duration: %5d %llu\n",
scale_type, SCALE_FLAG, scale_type, SCALE_FLAG,
......
...@@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args) ...@@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args)
__func__, raw_smp_processor_id()); __func__, raw_smp_processor_id());
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
stop_at)) stop_at))
if (stall_cpu_block) if (stall_cpu_block) {
#ifdef CONFIG_PREEMPTION
preempt_schedule();
#else
schedule_timeout_uninterruptible(HZ); schedule_timeout_uninterruptible(HZ);
#endif
}
if (stall_cpu_irqsoff) if (stall_cpu_irqsoff)
local_irq_enable(); local_irq_enable();
else if (!stall_cpu_block) else if (!stall_cpu_block)
......
...@@ -467,6 +467,40 @@ static struct ref_scale_ops acqrel_ops = { ...@@ -467,6 +467,40 @@ static struct ref_scale_ops acqrel_ops = {
.name = "acqrel" .name = "acqrel"
}; };
static volatile u64 stopopts;
static void ref_clock_section(const int nloops)
{
u64 x = 0;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--)
x += ktime_get_real_fast_ns();
preempt_enable();
stopopts = x;
}
static void ref_clock_delay_section(const int nloops, const int udl, const int ndl)
{
u64 x = 0;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
x += ktime_get_real_fast_ns();
un_delay(udl, ndl);
}
preempt_enable();
stopopts = x;
}
static struct ref_scale_ops clock_ops = {
.readsection = ref_clock_section,
.delaysection = ref_clock_delay_section,
.name = "clock"
};
static void rcu_scale_one_reader(void) static void rcu_scale_one_reader(void)
{ {
if (readdelay <= 0) if (readdelay <= 0)
...@@ -759,7 +793,7 @@ ref_scale_init(void) ...@@ -759,7 +793,7 @@ ref_scale_init(void)
int firsterr = 0; int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = { static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops, &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
}; };
if (!torture_init_begin(scale_type, verbose)) if (!torture_init_begin(scale_type, verbose))
......
...@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); ...@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
*/ */
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{ {
int newval = ssp->srcu_lock_nesting[idx] - 1; int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
......
...@@ -643,8 +643,8 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } ...@@ -643,8 +643,8 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
// //
// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
// passing an empty function to schedule_on_each_cpu(). This approach // passing an empty function to schedule_on_each_cpu(). This approach
// provides an asynchronous call_rcu_tasks_rude() API and batching // provides an asynchronous call_rcu_tasks_rude() API and batching of
// of concurrent calls to the synchronous synchronize_rcu_rude() API. // concurrent calls to the synchronous synchronize_rcu_tasks_rude() API.
// This invokes schedule_on_each_cpu() in order to send IPIs far and wide // This invokes schedule_on_each_cpu() in order to send IPIs far and wide
// and induces otherwise unnecessary context switches on all online CPUs, // and induces otherwise unnecessary context switches on all online CPUs,
// whether idle or not. // whether idle or not.
...@@ -785,7 +785,10 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread); ...@@ -785,7 +785,10 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
// set that task's .need_qs flag so that task's next outermost // set that task's .need_qs flag so that task's next outermost
// rcu_read_unlock_trace() will report the quiescent state (in which // rcu_read_unlock_trace() will report the quiescent state (in which
// case the count of readers is incremented). If both attempts fail, // case the count of readers is incremented). If both attempts fail,
// the task is added to a "holdout" list. // the task is added to a "holdout" list. Note that IPIs are used
// to invoke trc_read_check_handler() in the context of running tasks
// in order to avoid ordering overhead on common-case shared-variable
// accessses.
// rcu_tasks_trace_postscan(): // rcu_tasks_trace_postscan():
// Initialize state and attempt to identify an immediate quiescent // Initialize state and attempt to identify an immediate quiescent
// state as above (but only for idle tasks), unblock CPU-hotplug // state as above (but only for idle tasks), unblock CPU-hotplug
...@@ -847,7 +850,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); ...@@ -847,7 +850,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
/* If we are the last reader, wake up the grace-period kthread. */ /* If we are the last reader, wake up the grace-period kthread. */
void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
{ {
int nq = t->trc_reader_special.b.need_qs; int nq = READ_ONCE(t->trc_reader_special.b.need_qs);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb) t->trc_reader_special.b.need_mb)
...@@ -894,7 +897,7 @@ static void trc_read_check_handler(void *t_in) ...@@ -894,7 +897,7 @@ static void trc_read_check_handler(void *t_in)
// If the task is not in a read-side critical section, and // If the task is not in a read-side critical section, and
// if this is the last reader, awaken the grace-period kthread. // if this is the last reader, awaken the grace-period kthread.
if (likely(!t->trc_reader_nesting)) { if (likely(!READ_ONCE(t->trc_reader_nesting))) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait); wake_up(&trc_wait);
// Mark as checked after decrement to avoid false // Mark as checked after decrement to avoid false
...@@ -903,7 +906,7 @@ static void trc_read_check_handler(void *t_in) ...@@ -903,7 +906,7 @@ static void trc_read_check_handler(void *t_in)
goto reset_ipi; goto reset_ipi;
} }
// If we are racing with an rcu_read_unlock_trace(), try again later. // If we are racing with an rcu_read_unlock_trace(), try again later.
if (unlikely(t->trc_reader_nesting < 0)) { if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait); wake_up(&trc_wait);
goto reset_ipi; goto reset_ipi;
...@@ -913,14 +916,14 @@ static void trc_read_check_handler(void *t_in) ...@@ -913,14 +916,14 @@ static void trc_read_check_handler(void *t_in)
// Get here if the task is in a read-side critical section. Set // Get here if the task is in a read-side critical section. Set
// its state so that it will awaken the grace-period kthread upon // its state so that it will awaken the grace-period kthread upon
// exit from that critical section. // exit from that critical section.
WARN_ON_ONCE(t->trc_reader_special.b.need_qs); WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true); WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
reset_ipi: reset_ipi:
// Allow future IPIs to be sent on CPU and for task. // Allow future IPIs to be sent on CPU and for task.
// Also order this IPI handler against any later manipulations of // Also order this IPI handler against any later manipulations of
// the intended task. // the intended task.
smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^ smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
} }
...@@ -950,6 +953,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) ...@@ -950,6 +953,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
n_heavy_reader_ofl_updates++; n_heavy_reader_ofl_updates++;
in_qs = true; in_qs = true;
} else { } else {
// The task is not running, so C-language access is safe.
in_qs = likely(!t->trc_reader_nesting); in_qs = likely(!t->trc_reader_nesting);
} }
...@@ -964,7 +968,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) ...@@ -964,7 +968,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
// state so that it will awaken the grace-period kthread upon exit // state so that it will awaken the grace-period kthread upon exit
// from that critical section. // from that critical section.
atomic_inc(&trc_n_readers_need_end); // One more to wait on. atomic_inc(&trc_n_readers_need_end); // One more to wait on.
WARN_ON_ONCE(t->trc_reader_special.b.need_qs); WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true); WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
return true; return true;
} }
...@@ -982,7 +986,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, ...@@ -982,7 +986,7 @@ static void trc_wait_for_one_reader(struct task_struct *t,
// The current task had better be in a quiescent state. // The current task had better be in a quiescent state.
if (t == current) { if (t == current) {
t->trc_reader_checked = true; t->trc_reader_checked = true;
WARN_ON_ONCE(t->trc_reader_nesting); WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
return; return;
} }
...@@ -994,6 +998,12 @@ static void trc_wait_for_one_reader(struct task_struct *t, ...@@ -994,6 +998,12 @@ static void trc_wait_for_one_reader(struct task_struct *t,
} }
put_task_struct(t); put_task_struct(t);
// If this task is not yet on the holdout list, then we are in
// an RCU read-side critical section. Otherwise, the invocation of
// rcu_add_holdout() that added it to the list did the necessary
// get_task_struct(). Either way, the task cannot be freed out
// from under this code.
// If currently running, send an IPI, either way, add to list. // If currently running, send an IPI, either way, add to list.
trc_add_holdout(t, bhp); trc_add_holdout(t, bhp);
if (task_curr(t) && if (task_curr(t) &&
...@@ -1092,8 +1102,8 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) ...@@ -1092,8 +1102,8 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
".i"[is_idle_task(t)], ".i"[is_idle_task(t)],
".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting),
" N"[!!t->trc_reader_special.b.need_qs], " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
cpu); cpu);
sched_show_task(t); sched_show_task(t);
} }
...@@ -1187,7 +1197,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) ...@@ -1187,7 +1197,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
static void exit_tasks_rcu_finish_trace(struct task_struct *t) static void exit_tasks_rcu_finish_trace(struct task_struct *t)
{ {
WRITE_ONCE(t->trc_reader_checked, true); WRITE_ONCE(t->trc_reader_checked, true);
WARN_ON_ONCE(t->trc_reader_nesting); WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
WRITE_ONCE(t->trc_reader_nesting, 0); WRITE_ONCE(t->trc_reader_nesting, 0);
if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
rcu_read_unlock_trace_special(t, 0); rcu_read_unlock_trace_special(t, 0);
......
...@@ -74,17 +74,10 @@ ...@@ -74,17 +74,10 @@
/* Data structures. */ /* Data structures. */
/*
* Steal a bit from the bottom of ->dynticks for idle entry/exit
* control. Initially this is for TLB flushing.
*/
#define RCU_DYNTICK_CTRL_MASK 0x1
#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks_nesting = 1, .dynticks_nesting = 1,
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), .dynticks = ATOMIC_INIT(1),
#ifdef CONFIG_RCU_NOCB_CPU #ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
#endif #endif
...@@ -258,6 +251,15 @@ void rcu_softirq_qs(void) ...@@ -258,6 +251,15 @@ void rcu_softirq_qs(void)
rcu_tasks_qs(current, false); rcu_tasks_qs(current, false);
} }
/*
* Increment the current CPU's rcu_data structure's ->dynticks field
* with ordering. Return the new value.
*/
static noinline noinstr unsigned long rcu_dynticks_inc(int incby)
{
return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks));
}
/* /*
* Record entry into an extended quiescent state. This is only to be * Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state, that is, * called when not already in an extended quiescent state, that is,
...@@ -266,7 +268,6 @@ void rcu_softirq_qs(void) ...@@ -266,7 +268,6 @@ void rcu_softirq_qs(void)
*/ */
static noinstr void rcu_dynticks_eqs_enter(void) static noinstr void rcu_dynticks_eqs_enter(void)
{ {
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq; int seq;
/* /*
...@@ -275,13 +276,9 @@ static noinstr void rcu_dynticks_eqs_enter(void) ...@@ -275,13 +276,9 @@ static noinstr void rcu_dynticks_eqs_enter(void)
* next idle sojourn. * next idle sojourn.
*/ */
rcu_dynticks_task_trace_enter(); // Before ->dynticks update! rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); seq = rcu_dynticks_inc(1);
// RCU is no longer watching. Better be in extended quiescent state! // RCU is no longer watching. Better be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
(seq & RCU_DYNTICK_CTRL_CTR));
/* Better not have special action (TLB flush) pending! */
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
(seq & RCU_DYNTICK_CTRL_MASK));
} }
/* /*
...@@ -291,7 +288,6 @@ static noinstr void rcu_dynticks_eqs_enter(void) ...@@ -291,7 +288,6 @@ static noinstr void rcu_dynticks_eqs_enter(void)
*/ */
static noinstr void rcu_dynticks_eqs_exit(void) static noinstr void rcu_dynticks_eqs_exit(void)
{ {
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq; int seq;
/* /*
...@@ -299,15 +295,10 @@ static noinstr void rcu_dynticks_eqs_exit(void) ...@@ -299,15 +295,10 @@ static noinstr void rcu_dynticks_eqs_exit(void)
* and we also must force ordering with the next RCU read-side * and we also must force ordering with the next RCU read-side
* critical section. * critical section.
*/ */
seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); seq = rcu_dynticks_inc(1);
// RCU is now watching. Better not be in an extended quiescent state! // RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update! rcu_dynticks_task_trace_exit(); // After ->dynticks update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
!(seq & RCU_DYNTICK_CTRL_CTR));
if (seq & RCU_DYNTICK_CTRL_MASK) {
arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
smp_mb__after_atomic(); /* _exit after clearing mask. */
}
} }
/* /*
...@@ -324,9 +315,9 @@ static void rcu_dynticks_eqs_online(void) ...@@ -324,9 +315,9 @@ static void rcu_dynticks_eqs_online(void)
{ {
struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR) if (atomic_read(&rdp->dynticks) & 0x1)
return; return;
atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); rcu_dynticks_inc(1);
} }
/* /*
...@@ -336,9 +327,7 @@ static void rcu_dynticks_eqs_online(void) ...@@ -336,9 +327,7 @@ static void rcu_dynticks_eqs_online(void)
*/ */
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{ {
struct rcu_data *rdp = this_cpu_ptr(&rcu_data); return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
} }
/* /*
...@@ -347,9 +336,8 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) ...@@ -347,9 +336,8 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
*/ */
static int rcu_dynticks_snap(struct rcu_data *rdp) static int rcu_dynticks_snap(struct rcu_data *rdp)
{ {
int snap = atomic_add_return(0, &rdp->dynticks); smp_mb(); // Fundamental RCU ordering guarantee.
return atomic_read_acquire(&rdp->dynticks);
return snap & ~RCU_DYNTICK_CTRL_MASK;
} }
/* /*
...@@ -358,7 +346,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp) ...@@ -358,7 +346,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp)
*/ */
static bool rcu_dynticks_in_eqs(int snap) static bool rcu_dynticks_in_eqs(int snap)
{ {
return !(snap & RCU_DYNTICK_CTRL_CTR); return !(snap & 0x1);
} }
/* Return true if the specified CPU is currently idle from an RCU viewpoint. */ /* Return true if the specified CPU is currently idle from an RCU viewpoint. */
...@@ -389,8 +377,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) ...@@ -389,8 +377,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
int snap; int snap;
// If not quiescent, force back to earlier extended quiescent state. // If not quiescent, force back to earlier extended quiescent state.
snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK | snap = atomic_read(&rdp->dynticks) & ~0x1;
RCU_DYNTICK_CTRL_CTR);
smp_rmb(); // Order ->dynticks and *vp reads. smp_rmb(); // Order ->dynticks and *vp reads.
if (READ_ONCE(*vp)) if (READ_ONCE(*vp))
...@@ -398,32 +385,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) ...@@ -398,32 +385,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
smp_rmb(); // Order *vp read and ->dynticks re-read. smp_rmb(); // Order *vp read and ->dynticks re-read.
// If still in the same extended quiescent state, we are good! // If still in the same extended quiescent state, we are good!
return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK); return snap == atomic_read(&rdp->dynticks);
}
/*
* Set the special (bottom) bit of the specified CPU so that it
* will take special action (such as flushing its TLB) on the
* next exit from an extended quiescent state. Returns true if
* the bit was successfully set, or false if the CPU was not in
* an extended quiescent state.
*/
bool rcu_eqs_special_set(int cpu)
{
int old;
int new;
int new_old;
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
new_old = atomic_read(&rdp->dynticks);
do {
old = new_old;
if (old & RCU_DYNTICK_CTRL_CTR)
return false;
new = old | RCU_DYNTICK_CTRL_MASK;
new_old = atomic_cmpxchg(&rdp->dynticks, old, new);
} while (new_old != old);
return true;
} }
/* /*
...@@ -439,13 +401,12 @@ bool rcu_eqs_special_set(int cpu) ...@@ -439,13 +401,12 @@ bool rcu_eqs_special_set(int cpu)
*/ */
notrace void rcu_momentary_dyntick_idle(void) notrace void rcu_momentary_dyntick_idle(void)
{ {
int special; int seq;
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, seq = rcu_dynticks_inc(2);
&this_cpu_ptr(&rcu_data)->dynticks);
/* It is illegal to call this from idle state. */ /* It is illegal to call this from idle state. */
WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); WARN_ON_ONCE(!(seq & 0x1));
rcu_preempt_deferred_qs(current); rcu_preempt_deferred_qs(current);
} }
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
...@@ -1325,7 +1286,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) ...@@ -1325,7 +1286,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
*/ */
jtsq = READ_ONCE(jiffies_to_sched_qs); jtsq = READ_ONCE(jiffies_to_sched_qs);
ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);
rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu); rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu);
if (!READ_ONCE(*rnhqp) && if (!READ_ONCE(*rnhqp) &&
(time_after(jiffies, rcu_state.gp_start + jtsq * 2) || (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
time_after(jiffies, rcu_state.jiffies_resched) || time_after(jiffies, rcu_state.jiffies_resched) ||
...@@ -1772,7 +1733,7 @@ static void rcu_strict_gp_boundary(void *unused) ...@@ -1772,7 +1733,7 @@ static void rcu_strict_gp_boundary(void *unused)
/* /*
* Initialize a new grace period. Return false if no grace period required. * Initialize a new grace period. Return false if no grace period required.
*/ */
static bool rcu_gp_init(void) static noinline_for_stack bool rcu_gp_init(void)
{ {
unsigned long firstseq; unsigned long firstseq;
unsigned long flags; unsigned long flags;
...@@ -1966,7 +1927,7 @@ static void rcu_gp_fqs(bool first_time) ...@@ -1966,7 +1927,7 @@ static void rcu_gp_fqs(bool first_time)
/* /*
* Loop doing repeated quiescent-state forcing until the grace period ends. * Loop doing repeated quiescent-state forcing until the grace period ends.
*/ */
static void rcu_gp_fqs_loop(void) static noinline_for_stack void rcu_gp_fqs_loop(void)
{ {
bool first_gp_fqs; bool first_gp_fqs;
int gf = 0; int gf = 0;
...@@ -1993,8 +1954,8 @@ static void rcu_gp_fqs_loop(void) ...@@ -1993,8 +1954,8 @@ static void rcu_gp_fqs_loop(void)
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqswait")); TPS("fqswait"));
WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS); WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
ret = swait_event_idle_timeout_exclusive( (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq,
rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j); rcu_gp_fqs_check_wake(&gf), j);
rcu_gp_torture_wait(); rcu_gp_torture_wait();
WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS); WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
/* Locking provides needed memory barriers. */ /* Locking provides needed memory barriers. */
...@@ -2471,9 +2432,6 @@ int rcutree_dead_cpu(unsigned int cpu) ...@@ -2471,9 +2432,6 @@ int rcutree_dead_cpu(unsigned int cpu)
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
/* Adjust any no-longer-needed kthreads. */ /* Adjust any no-longer-needed kthreads. */
rcu_boost_kthread_setaffinity(rnp, -1); rcu_boost_kthread_setaffinity(rnp, -1);
/* Do any needed no-CB deferred wakeups from this CPU. */
do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));
// Stop-machine done, so allow nohz_full to disable tick. // Stop-machine done, so allow nohz_full to disable tick.
tick_dep_clear(TICK_DEP_BIT_RCU); tick_dep_clear(TICK_DEP_BIT_RCU);
return 0; return 0;
...@@ -4050,7 +4008,7 @@ void rcu_barrier(void) ...@@ -4050,7 +4008,7 @@ void rcu_barrier(void)
*/ */
init_completion(&rcu_state.barrier_completion); init_completion(&rcu_state.barrier_completion);
atomic_set(&rcu_state.barrier_cpu_count, 2); atomic_set(&rcu_state.barrier_cpu_count, 2);
get_online_cpus(); cpus_read_lock();
/* /*
* Force each CPU with callbacks to register a new callback. * Force each CPU with callbacks to register a new callback.
...@@ -4081,7 +4039,7 @@ void rcu_barrier(void) ...@@ -4081,7 +4039,7 @@ void rcu_barrier(void)
rcu_state.barrier_sequence); rcu_state.barrier_sequence);
} }
} }
put_online_cpus(); cpus_read_unlock();
/* /*
* Now that we have an rcu_barrier_callback() callback on each * Now that we have an rcu_barrier_callback() callback on each
...@@ -4784,4 +4742,5 @@ void __init rcu_init(void) ...@@ -4784,4 +4742,5 @@ void __init rcu_init(void)
#include "tree_stall.h" #include "tree_stall.h"
#include "tree_exp.h" #include "tree_exp.h"
#include "tree_nocb.h"
#include "tree_plugin.h" #include "tree_plugin.h"
This diff is collapsed.
This diff is collapsed.
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
* Author: Paul E. McKenney <paulmck@linux.ibm.com> * Author: Paul E. McKenney <paulmck@linux.ibm.com>
*/ */
#include <linux/kvm_para.h>
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// //
// Controlling CPU stall warnings, including delay calculation. // Controlling CPU stall warnings, including delay calculation.
...@@ -117,17 +119,14 @@ static void panic_on_rcu_stall(void) ...@@ -117,17 +119,14 @@ static void panic_on_rcu_stall(void)
} }
/** /**
* rcu_cpu_stall_reset - prevent further stall warnings in current grace period * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
*
* Set the stall-warning timeout way off into the future, thus preventing
* any RCU CPU stall-warning messages from appearing in the current set of
* RCU grace periods.
* *
* The caller must disable hard irqs. * The caller must disable hard irqs.
*/ */
void rcu_cpu_stall_reset(void) void rcu_cpu_stall_reset(void)
{ {
WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); WRITE_ONCE(rcu_state.jiffies_stall,
jiffies + rcu_jiffies_till_stall_check());
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -267,8 +266,10 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) ...@@ -267,8 +266,10 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
struct task_struct *ts[8]; struct task_struct *ts[8];
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
if (!rcu_preempt_blocked_readers_cgp(rnp)) if (!rcu_preempt_blocked_readers_cgp(rnp)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0; return 0;
}
pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
rnp->level, rnp->grplo, rnp->grphi); rnp->level, rnp->grplo, rnp->grphi);
t = list_entry(rnp->gp_tasks->prev, t = list_entry(rnp->gp_tasks->prev,
...@@ -280,8 +281,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) ...@@ -280,8 +281,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
break; break;
} }
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
for (i--; i; i--) { while (i) {
t = ts[i]; t = ts[--i];
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
pr_cont(" P%d", t->pid); pr_cont(" P%d", t->pid);
else else
...@@ -350,7 +351,7 @@ static void rcu_dump_cpu_stacks(void) ...@@ -350,7 +351,7 @@ static void rcu_dump_cpu_stacks(void)
static void print_cpu_stall_fast_no_hz(char *cp, int cpu) static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{ {
struct rcu_data *rdp = &per_cpu(rcu_data, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
rdp->last_accelerate & 0xffff, jiffies & 0xffff, rdp->last_accelerate & 0xffff, jiffies & 0xffff,
...@@ -464,9 +465,10 @@ static void rcu_check_gp_kthread_starvation(void) ...@@ -464,9 +465,10 @@ static void rcu_check_gp_kthread_starvation(void)
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n",
rcu_state.name, j, rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq), (long)rcu_seq_current(&rcu_state.gp_seq),
data_race(rcu_state.gp_flags), data_race(READ_ONCE(rcu_state.gp_flags)),
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, gp_state_getname(rcu_state.gp_state),
gpk ? gpk->__state : ~0, cpu); data_race(READ_ONCE(rcu_state.gp_state)),
gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu);
if (gpk) { if (gpk) {
pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
pr_err("RCU grace-period kthread stack dump:\n"); pr_err("RCU grace-period kthread stack dump:\n");
...@@ -509,7 +511,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) ...@@ -509,7 +511,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
(long)rcu_seq_current(&rcu_state.gp_seq), (long)rcu_seq_current(&rcu_state.gp_seq),
data_race(rcu_state.gp_flags), data_race(rcu_state.gp_flags),
gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
gpk->__state); data_race(READ_ONCE(gpk->__state)));
pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
} }
...@@ -568,11 +570,11 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) ...@@ -568,11 +570,11 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
pr_err("INFO: Stall ended before state dump start\n"); pr_err("INFO: Stall ended before state dump start\n");
} else { } else {
j = jiffies; j = jiffies;
gpa = data_race(rcu_state.gp_activity); gpa = data_race(READ_ONCE(rcu_state.gp_activity));
pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
rcu_state.name, j - gpa, j, gpa, rcu_state.name, j - gpa, j, gpa,
data_race(jiffies_till_next_fqs), data_race(READ_ONCE(jiffies_till_next_fqs)),
rcu_get_root()->qsmask); data_race(READ_ONCE(rcu_get_root()->qsmask)));
} }
} }
/* Rewrite if needed in case of slow consoles. */ /* Rewrite if needed in case of slow consoles. */
...@@ -646,6 +648,7 @@ static void print_cpu_stall(unsigned long gps) ...@@ -646,6 +648,7 @@ static void print_cpu_stall(unsigned long gps)
static void check_cpu_stall(struct rcu_data *rdp) static void check_cpu_stall(struct rcu_data *rdp)
{ {
bool didstall = false;
unsigned long gs1; unsigned long gs1;
unsigned long gs2; unsigned long gs2;
unsigned long gps; unsigned long gps;
...@@ -691,24 +694,46 @@ static void check_cpu_stall(struct rcu_data *rdp) ...@@ -691,24 +694,46 @@ static void check_cpu_stall(struct rcu_data *rdp)
ULONG_CMP_GE(gps, js)) ULONG_CMP_GE(gps, js))
return; /* No stall or GP completed since entering function. */ return; /* No stall or GP completed since entering function. */
rnp = rdp->mynode; rnp = rdp->mynode;
jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; jn = jiffies + ULONG_MAX / 2;
if (rcu_gp_in_progress() && if (rcu_gp_in_progress() &&
(READ_ONCE(rnp->qsmask) & rdp->grpmask) && (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like an RCU stall. Check to see if the host
* stopped the vm.
*/
if (kvm_check_and_clear_guest_paused())
return;
/* We haven't checked in, so go dump stack. */ /* We haven't checked in, so go dump stack. */
print_cpu_stall(gps); print_cpu_stall(gps);
if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL); rcu_ftrace_dump(DUMP_ALL);
didstall = true;
} else if (rcu_gp_in_progress() && } else if (rcu_gp_in_progress() &&
ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like an RCU stall. Check to see if the host
* stopped the vm.
*/
if (kvm_check_and_clear_guest_paused())
return;
/* They had a few time units to dump stack, so complain. */ /* They had a few time units to dump stack, so complain. */
print_other_cpu_stall(gs2, gps); print_other_cpu_stall(gs2, gps);
if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) if (READ_ONCE(rcu_cpu_stall_ftrace_dump))
rcu_ftrace_dump(DUMP_ALL); rcu_ftrace_dump(DUMP_ALL);
didstall = true;
}
if (didstall && READ_ONCE(rcu_state.jiffies_stall) == jn) {
jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
WRITE_ONCE(rcu_state.jiffies_stall, jn);
} }
} }
...@@ -742,7 +767,7 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) ...@@ -742,7 +767,7 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
rcu_for_each_leaf_node(rnp) { rcu_for_each_leaf_node(rnp) {
if (!cpup) { if (!cpup) {
if (READ_ONCE(rnp->qsmask)) { if (data_race(READ_ONCE(rnp->qsmask))) {
return false; return false;
} else { } else {
if (READ_ONCE(rnp->gp_tasks)) if (READ_ONCE(rnp->gp_tasks))
...@@ -791,32 +816,34 @@ void show_rcu_gp_kthreads(void) ...@@ -791,32 +816,34 @@ void show_rcu_gp_kthreads(void)
struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
j = jiffies; j = jiffies;
ja = j - data_race(rcu_state.gp_activity); ja = j - data_race(READ_ONCE(rcu_state.gp_activity));
jr = j - data_race(rcu_state.gp_req_activity); jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity));
js = j - data_race(rcu_state.gp_start); js = j - data_race(READ_ONCE(rcu_state.gp_start));
jw = j - data_race(rcu_state.gp_wake_time); jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time));
pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state), rcu_state.name, gp_state_getname(rcu_state.gp_state),
rcu_state.gp_state, t ? t->__state : 0x1ffff, t ? t->rt_priority : 0xffU, data_race(READ_ONCE(rcu_state.gp_state)),
js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU,
(long)data_race(rcu_state.gp_seq), js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)),
(long)data_race(rcu_get_root()->gp_seq_needed), (long)data_race(READ_ONCE(rcu_state.gp_seq)),
data_race(rcu_state.gp_max), (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)),
data_race(rcu_state.gp_flags)); data_race(READ_ONCE(rcu_state.gp_max)),
data_race(READ_ONCE(rcu_state.gp_flags)));
rcu_for_each_node_breadth_first(rnp) { rcu_for_each_node_breadth_first(rnp) {
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) && if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
!data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) && !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) &&
!data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks)) !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks)))
continue; continue;
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n", pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
rnp->grplo, rnp->grphi, rnp->grplo, rnp->grphi,
(long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed), (long)data_race(READ_ONCE(rnp->gp_seq)),
data_race(rnp->qsmask), (long)data_race(READ_ONCE(rnp->gp_seq_needed)),
".b"[!!data_race(rnp->boost_kthread_task)], data_race(READ_ONCE(rnp->qsmask)),
".B"[!!data_race(rnp->boost_tasks)], ".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))],
".E"[!!data_race(rnp->exp_tasks)], ".B"[!!data_race(READ_ONCE(rnp->boost_tasks))],
".G"[!!data_race(rnp->gp_tasks)], ".E"[!!data_race(READ_ONCE(rnp->exp_tasks))],
data_race(rnp->n_boosts)); ".G"[!!data_race(READ_ONCE(rnp->gp_tasks))],
data_race(READ_ONCE(rnp->n_boosts)));
if (!rcu_is_leaf_node(rnp)) if (!rcu_is_leaf_node(rnp))
continue; continue;
for_each_leaf_node_possible_cpu(rnp, cpu) { for_each_leaf_node_possible_cpu(rnp, cpu) {
...@@ -826,12 +853,12 @@ void show_rcu_gp_kthreads(void) ...@@ -826,12 +853,12 @@ void show_rcu_gp_kthreads(void)
READ_ONCE(rdp->gp_seq_needed))) READ_ONCE(rdp->gp_seq_needed)))
continue; continue;
pr_info("\tcpu %d ->gp_seq_needed %ld\n", pr_info("\tcpu %d ->gp_seq_needed %ld\n",
cpu, (long)data_race(rdp->gp_seq_needed)); cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed)));
} }
} }
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu); rdp = per_cpu_ptr(&rcu_data, cpu);
cbs += data_race(rdp->n_cbs_invoked); cbs += data_race(READ_ONCE(rdp->n_cbs_invoked));
if (rcu_segcblist_is_offloaded(&rdp->cblist)) if (rcu_segcblist_is_offloaded(&rdp->cblist))
show_rcu_nocb_state(rdp); show_rcu_nocb_state(rdp);
} }
...@@ -913,11 +940,11 @@ void rcu_fwd_progress_check(unsigned long j) ...@@ -913,11 +940,11 @@ void rcu_fwd_progress_check(unsigned long j)
if (rcu_gp_in_progress()) { if (rcu_gp_in_progress()) {
pr_info("%s: GP age %lu jiffies\n", pr_info("%s: GP age %lu jiffies\n",
__func__, jiffies - rcu_state.gp_start); __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start)));
show_rcu_gp_kthreads(); show_rcu_gp_kthreads();
} else { } else {
pr_info("%s: Last GP end %lu jiffies ago\n", pr_info("%s: Last GP end %lu jiffies ago\n",
__func__, jiffies - rcu_state.gp_end); __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end)));
preempt_disable(); preempt_disable();
rdp = this_cpu_ptr(&rcu_data); rdp = this_cpu_ptr(&rcu_data);
rcu_check_gp_start_stall(rdp->mynode, rdp, j); rcu_check_gp_start_stall(rdp->mynode, rdp, j);
......
...@@ -64,6 +64,7 @@ torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU ...@@ -64,6 +64,7 @@ torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU
torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
torture_param(int, weight_resched, -1, "Testing weight for resched_cpu() operations."); torture_param(int, weight_resched, -1, "Testing weight for resched_cpu() operations.");
torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations."); torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations.");
torture_param(int, weight_single_rpc, -1, "Testing weight for single-CPU RPC operations.");
torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations."); torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations.");
torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations."); torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations.");
torture_param(int, weight_many_wait, -1, "Testing weight for multi-CPU operations."); torture_param(int, weight_many_wait, -1, "Testing weight for multi-CPU operations.");
...@@ -86,6 +87,8 @@ struct scf_statistics { ...@@ -86,6 +87,8 @@ struct scf_statistics {
long long n_resched; long long n_resched;
long long n_single; long long n_single;
long long n_single_ofl; long long n_single_ofl;
long long n_single_rpc;
long long n_single_rpc_ofl;
long long n_single_wait; long long n_single_wait;
long long n_single_wait_ofl; long long n_single_wait_ofl;
long long n_many; long long n_many;
...@@ -101,14 +104,17 @@ static DEFINE_PER_CPU(long long, scf_invoked_count); ...@@ -101,14 +104,17 @@ static DEFINE_PER_CPU(long long, scf_invoked_count);
// Data for random primitive selection // Data for random primitive selection
#define SCF_PRIM_RESCHED 0 #define SCF_PRIM_RESCHED 0
#define SCF_PRIM_SINGLE 1 #define SCF_PRIM_SINGLE 1
#define SCF_PRIM_MANY 2 #define SCF_PRIM_SINGLE_RPC 2
#define SCF_PRIM_ALL 3 #define SCF_PRIM_MANY 3
#define SCF_NPRIMS 7 // Need wait and no-wait versions of each, #define SCF_PRIM_ALL 4
// except for SCF_PRIM_RESCHED. #define SCF_NPRIMS 8 // Need wait and no-wait versions of each,
// except for SCF_PRIM_RESCHED and
// SCF_PRIM_SINGLE_RPC.
static char *scf_prim_name[] = { static char *scf_prim_name[] = {
"resched_cpu", "resched_cpu",
"smp_call_function_single", "smp_call_function_single",
"smp_call_function_single_rpc",
"smp_call_function_many", "smp_call_function_many",
"smp_call_function", "smp_call_function",
}; };
...@@ -128,6 +134,8 @@ struct scf_check { ...@@ -128,6 +134,8 @@ struct scf_check {
bool scfc_out; bool scfc_out;
int scfc_cpu; // -1 for not _single(). int scfc_cpu; // -1 for not _single().
bool scfc_wait; bool scfc_wait;
bool scfc_rpc;
struct completion scfc_completion;
}; };
// Use to wait for all threads to start. // Use to wait for all threads to start.
...@@ -158,6 +166,7 @@ static void scf_torture_stats_print(void) ...@@ -158,6 +166,7 @@ static void scf_torture_stats_print(void)
scfs.n_resched += scf_stats_p[i].n_resched; scfs.n_resched += scf_stats_p[i].n_resched;
scfs.n_single += scf_stats_p[i].n_single; scfs.n_single += scf_stats_p[i].n_single;
scfs.n_single_ofl += scf_stats_p[i].n_single_ofl; scfs.n_single_ofl += scf_stats_p[i].n_single_ofl;
scfs.n_single_rpc += scf_stats_p[i].n_single_rpc;
scfs.n_single_wait += scf_stats_p[i].n_single_wait; scfs.n_single_wait += scf_stats_p[i].n_single_wait;
scfs.n_single_wait_ofl += scf_stats_p[i].n_single_wait_ofl; scfs.n_single_wait_ofl += scf_stats_p[i].n_single_wait_ofl;
scfs.n_many += scf_stats_p[i].n_many; scfs.n_many += scf_stats_p[i].n_many;
...@@ -168,9 +177,10 @@ static void scf_torture_stats_print(void) ...@@ -168,9 +177,10 @@ static void scf_torture_stats_print(void)
if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) || if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs)) atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
bangstr = "!!! "; bangstr = "!!! ";
pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ", pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld single_rpc: %lld single_rpc_ofl: %lld many: %lld/%lld all: %lld/%lld ",
SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched, SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched,
scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl, scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl,
scfs.n_single_rpc, scfs.n_single_rpc_ofl,
scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait); scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait);
torture_onoff_stats(); torture_onoff_stats();
pr_cont("ste: %d stnmie: %d stnmoe: %d staf: %d\n", atomic_read(&n_errs), pr_cont("ste: %d stnmie: %d stnmoe: %d staf: %d\n", atomic_read(&n_errs),
...@@ -282,10 +292,13 @@ static void scf_handler(void *scfc_in) ...@@ -282,10 +292,13 @@ static void scf_handler(void *scfc_in)
out: out:
if (unlikely(!scfcp)) if (unlikely(!scfcp))
return; return;
if (scfcp->scfc_wait) if (scfcp->scfc_wait) {
WRITE_ONCE(scfcp->scfc_out, true); WRITE_ONCE(scfcp->scfc_out, true);
else if (scfcp->scfc_rpc)
complete(&scfcp->scfc_completion);
} else {
kfree(scfcp); kfree(scfcp);
}
} }
// As above, but check for correct CPU. // As above, but check for correct CPU.
...@@ -319,6 +332,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra ...@@ -319,6 +332,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
scfcp->scfc_cpu = -1; scfcp->scfc_cpu = -1;
scfcp->scfc_wait = scfsp->scfs_wait; scfcp->scfc_wait = scfsp->scfs_wait;
scfcp->scfc_out = false; scfcp->scfc_out = false;
scfcp->scfc_rpc = false;
} }
} }
switch (scfsp->scfs_prim) { switch (scfsp->scfs_prim) {
...@@ -350,6 +364,34 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra ...@@ -350,6 +364,34 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
scfcp = NULL; scfcp = NULL;
} }
break; break;
case SCF_PRIM_SINGLE_RPC:
if (!scfcp)
break;
cpu = torture_random(trsp) % nr_cpu_ids;
scfp->n_single_rpc++;
scfcp->scfc_cpu = cpu;
scfcp->scfc_wait = true;
init_completion(&scfcp->scfc_completion);
scfcp->scfc_rpc = true;
barrier(); // Prevent race-reduction compiler optimizations.
scfcp->scfc_in = true;
ret = smp_call_function_single(cpu, scf_handler_1, (void *)scfcp, 0);
if (!ret) {
if (use_cpus_read_lock)
cpus_read_unlock();
else
preempt_enable();
wait_for_completion(&scfcp->scfc_completion);
if (use_cpus_read_lock)
cpus_read_lock();
else
preempt_disable();
} else {
scfp->n_single_rpc_ofl++;
kfree(scfcp);
scfcp = NULL;
}
break;
case SCF_PRIM_MANY: case SCF_PRIM_MANY:
if (scfsp->scfs_wait) if (scfsp->scfs_wait)
scfp->n_many_wait++; scfp->n_many_wait++;
...@@ -379,10 +421,12 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra ...@@ -379,10 +421,12 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
} }
if (scfcp && scfsp->scfs_wait) { if (scfcp && scfsp->scfs_wait) {
if (WARN_ON_ONCE((num_online_cpus() > 1 || scfsp->scfs_prim == SCF_PRIM_SINGLE) && if (WARN_ON_ONCE((num_online_cpus() > 1 || scfsp->scfs_prim == SCF_PRIM_SINGLE) &&
!scfcp->scfc_out)) !scfcp->scfc_out)) {
pr_warn("%s: Memory-ordering failure, scfs_prim: %d.\n", __func__, scfsp->scfs_prim);
atomic_inc(&n_mb_out_errs); // Leak rather than trash! atomic_inc(&n_mb_out_errs); // Leak rather than trash!
else } else {
kfree(scfcp); kfree(scfcp);
}
barrier(); // Prevent race-reduction compiler optimizations. barrier(); // Prevent race-reduction compiler optimizations.
} }
if (use_cpus_read_lock) if (use_cpus_read_lock)
...@@ -453,8 +497,8 @@ static void ...@@ -453,8 +497,8 @@ static void
scftorture_print_module_parms(const char *tag) scftorture_print_module_parms(const char *tag)
{ {
pr_alert(SCFTORT_FLAG pr_alert(SCFTORT_FLAG
"--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag, "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_rpc=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait); verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_rpc, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait);
} }
static void scf_cleanup_handler(void *unused) static void scf_cleanup_handler(void *unused)
...@@ -469,7 +513,7 @@ static void scf_torture_cleanup(void) ...@@ -469,7 +513,7 @@ static void scf_torture_cleanup(void)
return; return;
WRITE_ONCE(scfdone, true); WRITE_ONCE(scfdone, true);
if (nthreads) if (nthreads && scf_stats_p)
for (i = 0; i < nthreads; i++) for (i = 0; i < nthreads; i++)
torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task); torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task);
else else
...@@ -497,6 +541,7 @@ static int __init scf_torture_init(void) ...@@ -497,6 +541,7 @@ static int __init scf_torture_init(void)
int firsterr = 0; int firsterr = 0;
unsigned long weight_resched1 = weight_resched; unsigned long weight_resched1 = weight_resched;
unsigned long weight_single1 = weight_single; unsigned long weight_single1 = weight_single;
unsigned long weight_single_rpc1 = weight_single_rpc;
unsigned long weight_single_wait1 = weight_single_wait; unsigned long weight_single_wait1 = weight_single_wait;
unsigned long weight_many1 = weight_many; unsigned long weight_many1 = weight_many;
unsigned long weight_many_wait1 = weight_many_wait; unsigned long weight_many_wait1 = weight_many_wait;
...@@ -508,11 +553,13 @@ static int __init scf_torture_init(void) ...@@ -508,11 +553,13 @@ static int __init scf_torture_init(void)
scftorture_print_module_parms("Start of test"); scftorture_print_module_parms("Start of test");
if (weight_resched == -1 && weight_single == -1 && weight_single_wait == -1 && if (weight_resched == -1 &&
weight_single == -1 && weight_single_rpc == -1 && weight_single_wait == -1 &&
weight_many == -1 && weight_many_wait == -1 && weight_many == -1 && weight_many_wait == -1 &&
weight_all == -1 && weight_all_wait == -1) { weight_all == -1 && weight_all_wait == -1) {
weight_resched1 = 2 * nr_cpu_ids; weight_resched1 = 2 * nr_cpu_ids;
weight_single1 = 2 * nr_cpu_ids; weight_single1 = 2 * nr_cpu_ids;
weight_single_rpc1 = 2 * nr_cpu_ids;
weight_single_wait1 = 2 * nr_cpu_ids; weight_single_wait1 = 2 * nr_cpu_ids;
weight_many1 = 2; weight_many1 = 2;
weight_many_wait1 = 2; weight_many_wait1 = 2;
...@@ -523,6 +570,8 @@ static int __init scf_torture_init(void) ...@@ -523,6 +570,8 @@ static int __init scf_torture_init(void)
weight_resched1 = 0; weight_resched1 = 0;
if (weight_single == -1) if (weight_single == -1)
weight_single1 = 0; weight_single1 = 0;
if (weight_single_rpc == -1)
weight_single_rpc1 = 0;
if (weight_single_wait == -1) if (weight_single_wait == -1)
weight_single_wait1 = 0; weight_single_wait1 = 0;
if (weight_many == -1) if (weight_many == -1)
...@@ -534,7 +583,7 @@ static int __init scf_torture_init(void) ...@@ -534,7 +583,7 @@ static int __init scf_torture_init(void)
if (weight_all_wait == -1) if (weight_all_wait == -1)
weight_all_wait1 = 0; weight_all_wait1 = 0;
} }
if (weight_single1 == 0 && weight_single_wait1 == 0 && if (weight_single1 == 0 && weight_single_rpc1 == 0 && weight_single_wait1 == 0 &&
weight_many1 == 0 && weight_many_wait1 == 0 && weight_many1 == 0 && weight_many_wait1 == 0 &&
weight_all1 == 0 && weight_all_wait1 == 0) { weight_all1 == 0 && weight_all_wait1 == 0) {
VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense"); VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
...@@ -546,6 +595,7 @@ static int __init scf_torture_init(void) ...@@ -546,6 +595,7 @@ static int __init scf_torture_init(void)
else if (weight_resched1) else if (weight_resched1)
VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored"); VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false); scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true);
scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true); scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
scf_sel_add(weight_many1, SCF_PRIM_MANY, false); scf_sel_add(weight_many1, SCF_PRIM_MANY, false);
scf_sel_add(weight_many_wait1, SCF_PRIM_MANY, true); scf_sel_add(weight_many_wait1, SCF_PRIM_MANY, true);
......
...@@ -7781,6 +7781,17 @@ int __sched __cond_resched(void) ...@@ -7781,6 +7781,17 @@ int __sched __cond_resched(void)
preempt_schedule_common(); preempt_schedule_common();
return 1; return 1;
} }
/*
* In preemptible kernels, ->rcu_read_lock_nesting tells the tick
* whether the current CPU is in an RCU read-side critical section,
* so the tick can report quiescent states even for CPUs looping
* in kernel context. In contrast, in non-preemptible kernels,
* RCU readers leave no in-memory hints, which means that CPU-bound
* processes executing in kernel context might never report an
* RCU quiescent state. Therefore, the following code causes
* cond_resched() to report a quiescent state, but only when RCU
* is in urgent need of one.
*/
#ifndef CONFIG_PREEMPT_RCU #ifndef CONFIG_PREEMPT_RCU
rcu_all_qs(); rcu_all_qs();
#endif #endif
......
...@@ -521,11 +521,11 @@ static void torture_shuffle_tasks(void) ...@@ -521,11 +521,11 @@ static void torture_shuffle_tasks(void)
struct shuffle_task *stp; struct shuffle_task *stp;
cpumask_setall(shuffle_tmp_mask); cpumask_setall(shuffle_tmp_mask);
get_online_cpus(); cpus_read_lock();
/* No point in shuffling if there is only one online CPU (ex: UP) */ /* No point in shuffling if there is only one online CPU (ex: UP) */
if (num_online_cpus() == 1) { if (num_online_cpus() == 1) {
put_online_cpus(); cpus_read_unlock();
return; return;
} }
...@@ -541,7 +541,7 @@ static void torture_shuffle_tasks(void) ...@@ -541,7 +541,7 @@ static void torture_shuffle_tasks(void)
set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask); set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
mutex_unlock(&shuffle_task_mutex); mutex_unlock(&shuffle_task_mutex);
put_online_cpus(); cpus_read_unlock();
} }
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
......
...@@ -1031,7 +1031,7 @@ struct sys_stat_struct { ...@@ -1031,7 +1031,7 @@ struct sys_stat_struct {
* scall32-o32.S in the kernel sources. * scall32-o32.S in the kernel sources.
* - the system call is performed by calling "syscall" * - the system call is performed by calling "syscall"
* - syscall return comes in v0, and register a3 needs to be checked to know * - syscall return comes in v0, and register a3 needs to be checked to know
* if an error occured, in which case errno is in v0. * if an error occurred, in which case errno is in v0.
* - the arguments are cast to long and assigned into the target registers * - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we * which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints. * don't have to experience issues with register constraints.
...@@ -2243,6 +2243,19 @@ unsigned int sleep(unsigned int seconds) ...@@ -2243,6 +2243,19 @@ unsigned int sleep(unsigned int seconds)
return 0; return 0;
} }
static __attribute__((unused))
int msleep(unsigned int msecs)
{
struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
return (my_timeval.tv_sec * 1000) +
(my_timeval.tv_usec / 1000) +
!!(my_timeval.tv_usec % 1000);
else
return 0;
}
static __attribute__((unused)) static __attribute__((unused))
int stat(const char *path, struct stat *buf) int stat(const char *path, struct stat *buf)
{ {
......
...@@ -68,16 +68,12 @@ do ...@@ -68,16 +68,12 @@ do
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN { cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime()); srand(n + me + systime());
ncpus = split(cpus, ca); ncpus = split(cpus, ca);
curcpu = ca[int(rand() * ncpus + 1)]; print ca[int(rand() * ncpus + 1)];
z = "";
for (i = 1; 4 * i <= curcpu; i++)
z = z "0";
print "0x" 2 ^ (curcpu % 4) z;
}' < /dev/null` }' < /dev/null`
n=$(($n+1)) n=$(($n+1))
if ! taskset -p $cpumask $$ > /dev/null 2>&1 if ! taskset -c -p $cpumask $$ > /dev/null 2>&1
then then
echo taskset failure: '"taskset -p ' $cpumask $$ '"' echo taskset failure: '"taskset -c -p ' $cpumask $$ '"'
exit 1 exit 1
fi fi
......
...@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG" ...@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
then then
exit 0 exit 0
fi fi
cat $1/*/console.log | find $1 -name console.log -exec cat {} \; |
grep "BUG: KCSAN: " | grep "BUG: KCSAN: " |
sed -e 's/^\[[^]]*] //' | sed -e 's/^\[[^]]*] //' |
sort | sort |
......
...@@ -142,7 +142,7 @@ then ...@@ -142,7 +142,7 @@ then
echo "Cannot copy from $oldrun to $rundir." echo "Cannot copy from $oldrun to $rundir."
usage usage
fi fi
rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
touch "$rundir/log" touch "$rundir/log"
echo $scriptname $args | tee -a "$rundir/log" echo $scriptname $args | tee -a "$rundir/log"
echo $oldrun > "$rundir/re-run" echo $oldrun > "$rundir/re-run"
...@@ -179,6 +179,6 @@ if test -n "$dryrun" ...@@ -179,6 +179,6 @@ if test -n "$dryrun"
then then
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log" echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
else else
( cd "$rundir"; sh $T/runbatches.sh ) ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log"
kvm-end-run-stats.sh "$rundir" "$starttime" kvm-end-run-stats.sh "$rundir" "$starttime"
fi fi
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
# Produce awk statements roughly depicting the system's CPU and cache
# layout. If the required information is not available, produce
# error messages as awk comments. Successful exit regardless.
#
# Usage: kvm-assign-cpus.sh /path/to/sysfs
T=/tmp/kvm-assign-cpus.sh.$$
trap 'rm -rf $T' 0 2
mkdir $T
sysfsdir=${1-/sys/devices/system/node}
if ! cd "$sysfsdir" > $T/msg 2>&1
then
sed -e 's/^/# /' < $T/msg
exit 0
fi
nodelist="`ls -d node*`"
for i in node*
do
if ! test -d $i/
then
echo "# Not a directory: $sysfsdir/node*"
exit 0
fi
for j in $i/cpu*/cache/index*
do
if ! test -d $j/
then
echo "# Not a directory: $sysfsdir/$j"
exit 0
else
break
fi
done
indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`"
break
done
for i in node*/cpu*/cache/index*/shared_cpu_list
do
if ! test -f $i
then
echo "# Not a file: $sysfsdir/$i"
exit 0
else
break
fi
done
firstshared=
for i in $indexlist
do
rm -f $T/cpulist
for n in node*
do
f="$n/cpu*/cache/$i/shared_cpu_list"
if ! cat $f > $T/msg 2>&1
then
sed -e 's/^/# /' < $T/msg
exit 0
fi
cat $f >> $T/cpulist
done
if grep -q '[-,]' $T/cpulist
then
if test -z "$firstshared"
then
firstshared="$i"
fi
fi
done
if test -z "$firstshared"
then
splitindex="`echo $indexlist | sed -e 's/ .*$//'`"
else
splitindex="$firstshared"
fi
nodenum=0
for n in node*
do
cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n |
awk -v nodenum="$nodenum" '
BEGIN {
idx = 0;
}
{
nlists = split($0, cpulists, ",");
for (i = 1; i <= nlists; i++) {
listsize = split(cpulists[i], cpus, "-");
if (listsize == 1)
cpus[2] = cpus[1];
for (j = cpus[1]; j <= cpus[2]; j++) {
print "cpu[" nodenum "][" idx "] = " j ";";
idx++;
}
}
}
END {
print "nodecpus[" nodenum "] = " idx ";";
}'
nodenum=`expr $nodenum + 1`
done
echo "numnodes = $nodenum;"
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
# Create an awk script that takes as input numbers of CPUs and outputs
# lists of CPUs, one per line in both cases.
#
# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ]
#
# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk
# statements initializing the variables describing the system's topology.
#
# The optional state is input by this script (if the file exists and is
# non-empty), and can also be output by this script.
cpuarrays="${1-/sys/devices/system/node}"
scriptfile="${2}"
statefile="${3}"
if ! test -f "$cpuarrays"
then
echo "File not found: $cpuarrays" 1>&2
exit 1
fi
scriptdir="`dirname "$scriptfile"`"
if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir"
then
echo "Directory not usable for script output: $scriptdir"
exit 1
fi
cat << '___EOF___' > "$scriptfile"
BEGIN {
___EOF___
cat "$cpuarrays" >> "$scriptfile"
if test -r "$statefile"
then
cat "$statefile" >> "$scriptfile"
fi
cat << '___EOF___' >> "$scriptfile"
}
# Do we have the system architecture to guide CPU affinity?
function gotcpus()
{
return numnodes != "";
}
# Return a comma-separated list of the next n CPUs.
function nextcpus(n, i, s)
{
for (i = 0; i < n; i++) {
if (nodecpus[curnode] == "")
curnode = 0;
if (cpu[curnode][curcpu[curnode]] == "")
curcpu[curnode] = 0;
if (s != "")
s = s ",";
s = s cpu[curnode][curcpu[curnode]];
curcpu[curnode]++;
curnode++
}
return s;
}
# Dump out the current node/CPU state so that a later invocation of this
# script can continue where this one left off. Of course, this only works
# when a state file was specified and where there was valid sysfs state.
# Returns 1 if the state was dumped, 0 otherwise.
#
# Dumping the state for one system configuration and loading it into
# another isn't likely to do what you want, whatever that might be.
function dumpcpustate( i, fn)
{
___EOF___
echo ' fn = "'"$statefile"'";' >> $scriptfile
cat << '___EOF___' >> "$scriptfile"
if (fn != "" && gotcpus()) {
print "curnode = " curnode ";" > fn;
for (i = 0; i < numnodes; i++)
if (curcpu[i] != "")
print "curcpu[" i "] = " curcpu[i] ";" >> fn;
return 1;
}
if (fn != "")
print "# No CPU state to dump." > fn;
return 0;
}
___EOF___
...@@ -25,7 +25,7 @@ then ...@@ -25,7 +25,7 @@ then
echo "$configfile -------" echo "$configfile -------"
else else
title="$configfile ------- $ncs acquisitions/releases" title="$configfile ------- $ncs acquisitions/releases"
dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`
if test -z "$dur" if test -z "$dur"
then then
: :
......
...@@ -25,7 +25,7 @@ if test -z "$nscfs" ...@@ -25,7 +25,7 @@ if test -z "$nscfs"
then then
echo "$configfile ------- " echo "$configfile ------- "
else else
dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`" dur="`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`"
if test -z "$dur" if test -z "$dur"
then then
rate="" rate=""
......
...@@ -74,7 +74,10 @@ do ...@@ -74,7 +74,10 @@ do
done done
if test -f "$rd/kcsan.sum" if test -f "$rd/kcsan.sum"
then then
if grep -q CONFIG_KCSAN=y $T if ! test -f $T
then
:
elif grep -q CONFIG_KCSAN=y $T
then then
echo "Compiler or architecture does not support KCSAN!" echo "Compiler or architecture does not support KCSAN!"
echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'? echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
# Periodically scan a directory tree to prevent files from being reaped
# by systemd and friends on long runs.
#
# Usage: kvm-remote-noreap.sh pathname
#
# Copyright (C) 2021 Facebook, Inc.
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
pathname="$1"
if test "$pathname" = ""
then
echo Usage: kvm-remote-noreap.sh pathname
exit 1
fi
if ! test -d "$pathname"
then
echo Usage: kvm-remote-noreap.sh pathname
echo " pathname must be a directory."
exit 2
fi
while test -d "$pathname"
do
find "$pathname" -type f -exec touch -c {} \; > /dev/null 2>&1
sleep 30
done
...@@ -124,10 +124,12 @@ awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" ' ...@@ -124,10 +124,12 @@ awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
n = $1; n = $1;
sub(/\./, "", n); sub(/\./, "", n);
fn = dest "/kvm-remote-" n ".sh" fn = dest "/kvm-remote-" n ".sh"
print "kvm-remote-noreap.sh " rundir " &" > fn;
scenarios = ""; scenarios = "";
for (i = 2; i <= NF; i++) for (i = 2; i <= NF; i++)
scenarios = scenarios " " $i; scenarios = scenarios " " $i;
print "kvm-test-1-run-batch.sh" scenarios > fn; print "kvm-test-1-run-batch.sh" scenarios >> fn;
print "sync" >> fn;
print "rm " rundir "/remote.run" >> fn; print "rm " rundir "/remote.run" >> fn;
}' }'
chmod +x $T/bin/kvm-remote-*.sh chmod +x $T/bin/kvm-remote-*.sh
...@@ -172,11 +174,20 @@ checkremotefile () { ...@@ -172,11 +174,20 @@ checkremotefile () {
do do
ssh $1 "test -f \"$2\"" ssh $1 "test -f \"$2\""
ret=$? ret=$?
if test "$ret" -ne 255 if test "$ret" -eq 255
then then
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
elif test "$ret" -eq 0
then
return 0
elif test "$ret" -eq 1
then
echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
return 1
else
echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
return $ret return $ret
fi fi
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
sleep $sleeptime sleep $sleeptime
done done
} }
...@@ -242,7 +253,8 @@ do ...@@ -242,7 +253,8 @@ do
do do
sleep 30 sleep 30
done done
( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) echo " ---" Collecting results from $i `date`
( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
......
...@@ -50,10 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings ...@@ -50,10 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
echo ---- System running test: `uname -a` echo ---- System running test: `uname -a`
echo ---- Starting kernels. `date` | tee -a log echo ---- Starting kernels. `date` | tee -a log
$TORTURE_JITTER_START $TORTURE_JITTER_START
kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
for i in "$@" for i in "$@"
do do
echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
echo > $i/kvm-test-1-run-qemu.sh.out echo > $i/kvm-test-1-run-qemu.sh.out
export TORTURE_AFFINITY=
kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
cat << ' ___EOF___' >> $T/cpubatches.awk
END {
affinitylist = "";
if (!gotcpus()) {
print "echo No CPU-affinity information, so no taskset command.";
} else if (cpu_count !~ /^[0-9][0-9]*$/) {
print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
} else {
affinitylist = nextcpus(cpu_count);
if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
else if (!dumpcpustate())
print "echo " scenario ": Could not dump state, so no taskset command.";
else
print "export TORTURE_AFFINITY=" affinitylist;
}
}
___EOF___
cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
$affinity_export
kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
done done
for i in $runfiles for i in $runfiles
......
...@@ -39,27 +39,34 @@ echo ' ---' `date`: Starting kernel, PID $$ ...@@ -39,27 +39,34 @@ echo ' ---' `date`: Starting kernel, PID $$
grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings . $T/qemu-cmd-settings
# Decorate qemu-cmd with redirection, backgrounding, and PID capture # Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture
sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd taskset_command=
echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd if test -n "$TORTURE_AFFINITY"
then
taskset_command="taskset -c $TORTURE_AFFINITY "
fi
sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
echo 'qemu_pid=$!' >> $T/qemu-cmd
echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd
echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd
# In case qemu refuses to run... # In case qemu refuses to run...
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
# Attempt to run qemu # Attempt to run qemu
kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & ( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0 commandcompleted=0
if test -z "$TORTURE_KCONFIG_GDB_ARG" if test -z "$TORTURE_KCONFIG_GDB_ARG"
then then
sleep 10 # Give qemu's pid a chance to reach the file sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid" if test -s "$resdir/qemu-pid"
then then
qemu_pid=`cat "$resdir/qemu_pid"` qemu_pid=`cat "$resdir/qemu-pid"`
echo Monitoring qemu job at pid $qemu_pid echo Monitoring qemu job at pid $qemu_pid `date`
else else
qemu_pid="" qemu_pid=""
echo Monitoring qemu job at yet-as-unknown pid echo Monitoring qemu job at yet-as-unknown pid `date`
fi fi
fi fi
if test -n "$TORTURE_KCONFIG_GDB_ARG" if test -n "$TORTURE_KCONFIG_GDB_ARG"
...@@ -82,9 +89,9 @@ then ...@@ -82,9 +89,9 @@ then
fi fi
while : while :
do do
if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then then
qemu_pid=`cat "$resdir/qemu_pid"` qemu_pid=`cat "$resdir/qemu-pid"`
fi fi
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
...@@ -115,22 +122,22 @@ do ...@@ -115,22 +122,22 @@ do
break break
fi fi
done done
if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then then
qemu_pid=`cat "$resdir/qemu_pid"` qemu_pid=`cat "$resdir/qemu-pid"`
fi fi
if test $commandcompleted -eq 0 -a -n "$qemu_pid" if test $commandcompleted -eq 0 && test -n "$qemu_pid"
then then
if ! test -f "$resdir/../STOP.1" if ! test -f "$resdir/../STOP.1"
then then
echo Grace period for qemu job at pid $qemu_pid echo Grace period for qemu job at pid $qemu_pid `date`
fi fi
oldline="`tail $resdir/console.log`" oldline="`tail $resdir/console.log`"
while : while :
do do
if test -f "$resdir/../STOP.1" if test -f "$resdir/../STOP.1"
then then
echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid kill -KILL $qemu_pid
break break
fi fi
...@@ -152,13 +159,17 @@ then ...@@ -152,13 +159,17 @@ then
then then
last_ts=0 last_ts=0
fi fi
if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE"
then then
must_continue=yes must_continue=yes
if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date`
fi fi
if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) fi
if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then then
echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid kill -KILL $qemu_pid
break break
fi fi
...@@ -172,5 +183,3 @@ fi ...@@ -172,5 +183,3 @@ fi
# Tell the script that this run is done. # Tell the script that this run is done.
rm -f $resdir/build.run rm -f $resdir/build.run
parse-console.sh $resdir/console.log $title
...@@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm ...@@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm
echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY" if test -n "$TORTURE_BUILDONLY"
then then
...@@ -214,3 +215,4 @@ then ...@@ -214,3 +215,4 @@ then
fi fi
kvm-test-1-run-qemu.sh $resdir kvm-test-1-run-qemu.sh $resdir
parse-console.sh $resdir/console.log $title
...@@ -430,17 +430,10 @@ then ...@@ -430,17 +430,10 @@ then
git diff HEAD >> $resdir/$ds/testid.txt git diff HEAD >> $resdir/$ds/testid.txt
fi fi
___EOF___ ___EOF___
awk < $T/cfgcpu.pack \ kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \ kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
-v CONFIGDIR="$CONFIGFRAG/" \ cat << '___EOF___' >> $T/dumpbatches.awk
-v KVM="$KVM" \ BEGIN {
-v ncpus=$cpus \
-v jitter="$jitter" \
-v rd=$resdir/$ds/ \
-v dur=$dur \
-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
-v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
'BEGIN {
i = 0; i = 0;
} }
...@@ -451,7 +444,7 @@ awk < $T/cfgcpu.pack \ ...@@ -451,7 +444,7 @@ awk < $T/cfgcpu.pack \
} }
# Dump out the scripting required to run one test batch. # Dump out the scripting required to run one test batch.
function dump(first, pastlast, batchnum) function dump(first, pastlast, batchnum, affinitylist)
{ {
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun=" print "needqemurun="
...@@ -483,6 +476,14 @@ function dump(first, pastlast, batchnum) ...@@ -483,6 +476,14 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "mkdir " rd cfr[jn] " || :"; print "mkdir " rd cfr[jn] " || :";
print "touch " builddir ".wait"; print "touch " builddir ".wait";
affinitylist = "";
if (gotcpus()) {
affinitylist = nextcpus(cpusr[jn]);
}
if (affinitylist ~ /^[0-9,-][0-9,-]*$/)
print "export TORTURE_AFFINITY=" affinitylist;
else
print "export TORTURE_AFFINITY=";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait" print "while test -f " builddir ".wait"
...@@ -560,7 +561,19 @@ END { ...@@ -560,7 +561,19 @@ END {
# Dump the last batch. # Dump the last batch.
if (ncpus != 0) if (ncpus != 0)
dump(first, i, batchnum); dump(first, i, batchnum);
}' >> $T/script }
___EOF___
awk < $T/cfgcpu.pack \
-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
-v CONFIGDIR="$CONFIGFRAG/" \
-v KVM="$KVM" \
-v ncpus=$cpus \
-v jitter="$jitter" \
-v rd=$resdir/$ds/ \
-v dur=$dur \
-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
-v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
-f $T/dumpbatches.awk >> $T/script
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script. # Extract the tests and their batches from the script.
......
...@@ -53,6 +53,7 @@ do_refscale=yes ...@@ -53,6 +53,7 @@ do_refscale=yes
do_kvfree=yes do_kvfree=yes
do_kasan=yes do_kasan=yes
do_kcsan=no do_kcsan=no
do_clocksourcewd=yes
# doyesno - Helper function for yes/no arguments # doyesno - Helper function for yes/no arguments
function doyesno () { function doyesno () {
...@@ -72,6 +73,7 @@ usage () { ...@@ -72,6 +73,7 @@ usage () {
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
echo " --doall" echo " --doall"
echo " --doallmodconfig / --do-no-allmodconfig" echo " --doallmodconfig / --do-no-allmodconfig"
echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan" echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan" echo " --do-kcsan / --do-no-kcsan"
echo " --do-kvfree / --do-no-kvfree" echo " --do-kvfree / --do-no-kvfree"
...@@ -109,7 +111,7 @@ do ...@@ -109,7 +111,7 @@ do
configs_scftorture="$configs_scftorture $2" configs_scftorture="$configs_scftorture $2"
shift shift
;; ;;
--doall) --do-all|--doall)
do_allmodconfig=yes do_allmodconfig=yes
do_rcutorture=yes do_rcutorture=yes
do_locktorture=yes do_locktorture=yes
...@@ -119,10 +121,14 @@ do ...@@ -119,10 +121,14 @@ do
do_kvfree=yes do_kvfree=yes
do_kasan=yes do_kasan=yes
do_kcsan=yes do_kcsan=yes
do_clocksourcewd=yes
;; ;;
--do-allmodconfig|--do-no-allmodconfig) --do-allmodconfig|--do-no-allmodconfig)
do_allmodconfig=`doyesno "$1" --do-allmodconfig` do_allmodconfig=`doyesno "$1" --do-allmodconfig`
;; ;;
--do-clocksourcewd|--do-no-clocksourcewd)
do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
;;
--do-kasan|--do-no-kasan) --do-kasan|--do-no-kasan)
do_kasan=`doyesno "$1" --do-kasan` do_kasan=`doyesno "$1" --do-kasan`
;; ;;
...@@ -135,7 +141,7 @@ do ...@@ -135,7 +141,7 @@ do
--do-locktorture|--do-no-locktorture) --do-locktorture|--do-no-locktorture)
do_locktorture=`doyesno "$1" --do-locktorture` do_locktorture=`doyesno "$1" --do-locktorture`
;; ;;
--do-none) --do-none|--donone)
do_allmodconfig=no do_allmodconfig=no
do_rcutorture=no do_rcutorture=no
do_locktorture=no do_locktorture=no
...@@ -145,6 +151,7 @@ do ...@@ -145,6 +151,7 @@ do
do_kvfree=no do_kvfree=no
do_kasan=no do_kasan=no
do_kcsan=no do_kcsan=no
do_clocksourcewd=no
;; ;;
--do-rcuscale|--do-no-rcuscale) --do-rcuscale|--do-no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale` do_rcuscale=`doyesno "$1" --do-rcuscale`
...@@ -279,9 +286,9 @@ function torture_one { ...@@ -279,9 +286,9 @@ function torture_one {
# torture_bootargs="[ kernel boot arguments ]" # torture_bootargs="[ kernel boot arguments ]"
# torture_set flavor [ kvm.sh arguments ] # torture_set flavor [ kvm.sh arguments ]
# #
# Note that "flavor" is an arbitrary string. Supply --torture if needed. # Note that "flavor" is an arbitrary string that does not affect kvm.sh
# Note that quoting is problematic. So on the command line, pass multiple # in any way. So also supply --torture if you need something other than
# values with multiple kvm.sh argument instances. # the default.
function torture_set { function torture_set {
local cur_kcsan_kmake_args= local cur_kcsan_kmake_args=
local kcsan_kmake_tag= local kcsan_kmake_tag=
...@@ -377,6 +384,22 @@ then ...@@ -377,6 +384,22 @@ then
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
fi fi
if test "$do_clocksourcewd" = "yes"
then
torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1"
torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
# In case our work is already done...
if test "$do_rcutorture" != "yes"
then
torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
fi
fi
echo " --- " $scriptname $args echo " --- " $scriptname $args
echo " --- " Done `date` | tee -a $T/log echo " --- " Done `date` | tee -a $T/log
ret=0 ret=0
...@@ -395,6 +418,10 @@ then ...@@ -395,6 +418,10 @@ then
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
ret=2 ret=2
fi fi
if test "$do_kcsan" = "yes"
then
TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
......
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=2 CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT_VOLUNTARY=n
......
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=2 CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT_VOLUNTARY=n
......
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_NR_CPUS=2 CONFIG_NR_CPUS=4
CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y CONFIG_PREEMPT=y
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment