Commit ce96b5ac authored by Dmitry Adamushko's avatar Dmitry Adamushko Committed by Ingo Molnar

sched: fix __set_task_cpu() SMP race

Grant Wilson has reported rare SCHED_FAIR_USER crashes on his quad-core
system, which crashes can only be explained via runqueue corruption.

there is a narrow SMP race in __set_task_cpu(): after ->cpu is set up to
a new value, task_rq_lock(p, ...) can be successfuly executed on another
CPU. We must ensure that updates of per-task data have been completed by
this moment.

this bug has been hiding in the Linux scheduler for an eternity (we never
had any explicit barrier for task->cpu in set_task_cpu() - so the bug was
introduced in 2.5.1), but only became visible via set_task_cfs_rq() being
accidentally put after the task->cpu update. It also probably needs a
sufficiently out-of-order CPU to trigger.
Reported-by: default avatarGrant Wilson <grant.wilson@zen.co.uk>
Signed-off-by: default avatarDmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent dae51f56
...@@ -216,15 +216,15 @@ static inline struct task_group *task_group(struct task_struct *p) ...@@ -216,15 +216,15 @@ static inline struct task_group *task_group(struct task_struct *p)
} }
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
static inline void set_task_cfs_rq(struct task_struct *p) static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu)
{ {
p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
p->se.parent = task_group(p)->se[task_cpu(p)]; p->se.parent = task_group(p)->se[cpu];
} }
#else #else
static inline void set_task_cfs_rq(struct task_struct *p) { } static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
...@@ -1022,10 +1022,16 @@ unsigned long weighted_cpuload(const int cpu) ...@@ -1022,10 +1022,16 @@ unsigned long weighted_cpuload(const int cpu)
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{ {
set_task_cfs_rq(p, cpu);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/*
* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
* successfuly executed on another CPU. We must ensure that updates of
* per-task data have been completed by this moment.
*/
smp_wmb();
task_thread_info(p)->cpu = cpu; task_thread_info(p)->cpu = cpu;
#endif #endif
set_task_cfs_rq(p);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -7088,7 +7094,7 @@ void sched_move_task(struct task_struct *tsk) ...@@ -7088,7 +7094,7 @@ void sched_move_task(struct task_struct *tsk)
rq = task_rq_lock(tsk, &flags); rq = task_rq_lock(tsk, &flags);
if (tsk->sched_class != &fair_sched_class) { if (tsk->sched_class != &fair_sched_class) {
set_task_cfs_rq(tsk); set_task_cfs_rq(tsk, task_cpu(tsk));
goto done; goto done;
} }
...@@ -7103,7 +7109,7 @@ void sched_move_task(struct task_struct *tsk) ...@@ -7103,7 +7109,7 @@ void sched_move_task(struct task_struct *tsk)
tsk->sched_class->put_prev_task(rq, tsk); tsk->sched_class->put_prev_task(rq, tsk);
} }
set_task_cfs_rq(tsk); set_task_cfs_rq(tsk, task_cpu(tsk));
if (on_rq) { if (on_rq) {
if (unlikely(running)) if (unlikely(running))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment