Commit e210bffd authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched/fair: Fix and optimize the fork() path

The task_fork_fair() callback already calls __set_task_cpu() and takes
rq->lock.

If we move the sched_class::task_fork callback in sched_fork() under
the existing p->pi_lock, right after its set_task_cpu() call, we can
avoid doing two such calls and omit the IRQ disabling on the rq->lock.

Change to __set_task_cpu() to skip the migration bits, this is a new
task, not a migration. Similarly, make wake_up_new_task() use
__set_task_cpu() for the same reason, the task hasn't actually
migrated as it hasn't ever ran.

This cures the problem of calling migrate_task_rq_fair(), which does
remove_entity_from_load_avg() on tasks that have never been added to
the load avg to begin with.

This bug would result in transiently messed up load_avg values, averaged
out after a few dozen milliseconds. This is probably the reason why
this bug was not found for such a long time.
Reported-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 630741fb
...@@ -2383,9 +2383,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -2383,9 +2383,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_class = &fair_sched_class; p->sched_class = &fair_sched_class;
} }
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
/* /*
* The child is not yet in the pid-hash so no cgroup attach races, * The child is not yet in the pid-hash so no cgroup attach races,
* and the cgroup is pinned to this child due to cgroup_fork() * and the cgroup is pinned to this child due to cgroup_fork()
...@@ -2394,7 +2391,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -2394,7 +2391,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
* Silence PROVE_RCU. * Silence PROVE_RCU.
*/ */
raw_spin_lock_irqsave(&p->pi_lock, flags); raw_spin_lock_irqsave(&p->pi_lock, flags);
set_task_cpu(p, cpu); /*
* We're setting the cpu for the first time, we don't migrate,
* so use __set_task_cpu().
*/
__set_task_cpu(p, cpu);
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
#ifdef CONFIG_SCHED_INFO #ifdef CONFIG_SCHED_INFO
...@@ -2534,8 +2537,11 @@ void wake_up_new_task(struct task_struct *p) ...@@ -2534,8 +2537,11 @@ void wake_up_new_task(struct task_struct *p)
* Fork balancing, do it here and not earlier because: * Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path * - cpus_allowed can change in the fork path
* - any previously selected cpu might disappear through hotplug * - any previously selected cpu might disappear through hotplug
*
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
* as we're not fully set-up yet.
*/ */
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif #endif
rq = __task_rq_lock(p, &rf); rq = __task_rq_lock(p, &rf);
post_init_entity_util_avg(&p->se); post_init_entity_util_avg(&p->se);
......
...@@ -8289,31 +8289,17 @@ static void task_fork_fair(struct task_struct *p) ...@@ -8289,31 +8289,17 @@ static void task_fork_fair(struct task_struct *p)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se, *curr; struct sched_entity *se = &p->se, *curr;
int this_cpu = smp_processor_id();
struct rq *rq = this_rq(); struct rq *rq = this_rq();
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
raw_spin_lock(&rq->lock);
update_rq_clock(rq); update_rq_clock(rq);
cfs_rq = task_cfs_rq(current); cfs_rq = task_cfs_rq(current);
curr = cfs_rq->curr; curr = cfs_rq->curr;
if (curr) {
/*
* Not only the cpu but also the task_group of the parent might have
* been changed after parent->se.parent,cfs_rq were copied to
* child->se.parent,cfs_rq. So call __set_task_cpu() to make those
* of child point to valid ones.
*/
rcu_read_lock();
__set_task_cpu(p, this_cpu);
rcu_read_unlock();
update_curr(cfs_rq); update_curr(cfs_rq);
if (curr)
se->vruntime = curr->vruntime; se->vruntime = curr->vruntime;
}
place_entity(cfs_rq, se, 1); place_entity(cfs_rq, se, 1);
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
...@@ -8326,8 +8312,7 @@ static void task_fork_fair(struct task_struct *p) ...@@ -8326,8 +8312,7 @@ static void task_fork_fair(struct task_struct *p)
} }
se->vruntime -= cfs_rq->min_vruntime; se->vruntime -= cfs_rq->min_vruntime;
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&rq->lock, flags);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment