Commit 7053ea1a authored by Rik van Riel's avatar Rik van Riel Committed by Ingo Molnar

stop_machine: Fix race between stop_two_cpus() and stop_cpus()

There is a race between stop_two_cpus, and the global stop_cpus.

It is possible for two CPUs to get their stopper functions queued
"backwards" from one another, resulting in the stopper threads
getting stuck, and the system hanging. This can happen because
queuing up stoppers is not synchronized.

This patch adds synchronization between stop_cpus (a rare operation),
and stop_two_cpus.
Reported-and-Tested-by: default avatarPrarit Bhargava <prarit@redhat.com>
Signed-off-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Acked-by: default avatarMel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/20131101104146.03d1e043@annuminas.surriel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 37dc6b50
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/smpboot.h> #include <linux/smpboot.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/lglock.h>
/* /*
* Structure to determine completion condition and record errors. May * Structure to determine completion condition and record errors. May
...@@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); ...@@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
static bool stop_machine_initialized = false; static bool stop_machine_initialized = false;
/*
* Avoids a race between stop_two_cpus and global stop_cpus, where
* the stoppers could get queued up in reverse order, leading to
* system deadlock. Using an lglock means stop_two_cpus remains
* relatively cheap.
*/
DEFINE_STATIC_LGLOCK(stop_cpus_lock);
static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
{ {
memset(done, 0, sizeof(*done)); memset(done, 0, sizeof(*done));
...@@ -276,6 +285,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -276,6 +285,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
return -ENOENT; return -ENOENT;
} }
lg_local_lock(&stop_cpus_lock);
/* /*
* Queuing needs to be done by the lowest numbered CPU, to ensure * Queuing needs to be done by the lowest numbered CPU, to ensure
* that works are always queued in the same order on every CPU. * that works are always queued in the same order on every CPU.
...@@ -284,6 +294,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -284,6 +294,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
smp_call_function_single(min(cpu1, cpu2), smp_call_function_single(min(cpu1, cpu2),
&irq_cpu_stop_queue_work, &irq_cpu_stop_queue_work,
&call_args, 0); &call_args, 0);
lg_local_unlock(&stop_cpus_lock);
preempt_enable(); preempt_enable();
wait_for_completion(&done.completion); wait_for_completion(&done.completion);
...@@ -335,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask, ...@@ -335,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
* preempted by a stopper which might wait for other stoppers * preempted by a stopper which might wait for other stoppers
* to enter @fn which can lead to deadlock. * to enter @fn which can lead to deadlock.
*/ */
preempt_disable(); lg_global_lock(&stop_cpus_lock);
for_each_cpu(cpu, cpumask) for_each_cpu(cpu, cpumask)
cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
preempt_enable(); lg_global_unlock(&stop_cpus_lock);
} }
static int __stop_cpus(const struct cpumask *cpumask, static int __stop_cpus(const struct cpumask *cpumask,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment