Commit 3ea94de1 authored by Joonwoo Park's avatar Joonwoo Park Committed by Ingo Molnar

sched/core: Fix incorrect wait time and wait count statistics

At present scheduler resets task's wait start timestamp when the task
migrates to another rq.  This misleads scheduler itself into reporting
less wait time than actual by omitting time spent for waiting prior to
migration and also more wait count than actual by counting migration as
wait end event which can be seen by trace or /proc/<pid>/sched with
CONFIG_SCHEDSTATS=y.

Carry forward migrating task's wait time prior to migration and
don't count migration as a wait end event to fix such statistics error.

In order to determine whether task is migrating mark task->on_rq with
TASK_ON_RQ_MIGRATING while dequeuing and enqueuing due to migration.
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: ohaugan@codeaurora.org
Link: http://lkml.kernel.org/r/20151113033854.GA4247@codeaurora.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 51170840
...@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new ...@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
{ {
lockdep_assert_held(&rq->lock); lockdep_assert_held(&rq->lock);
dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING; p->on_rq = TASK_ON_RQ_MIGRATING;
dequeue_task(rq, p, 0);
set_task_cpu(p, new_cpu); set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
...@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new ...@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu); BUG_ON(task_cpu(p) != new_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0); enqueue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
return rq; return rq;
...@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) ...@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
!p->on_rq); !p->on_rq);
/*
* Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
* because schedstat_wait_{start,end} rebase migrating task's wait_start
* time relying on p->on_rq.
*/
WARN_ON_ONCE(p->state == TASK_RUNNING &&
p->sched_class == &fair_sched_class &&
(p->on_rq && !task_on_rq_migrating(p)));
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
/* /*
* The caller should hold either p->pi_lock or rq->lock, when changing * The caller should hold either p->pi_lock or rq->lock, when changing
...@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) ...@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
src_rq = task_rq(p); src_rq = task_rq(p);
dst_rq = cpu_rq(cpu); dst_rq = cpu_rq(cpu);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0); deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu); set_task_cpu(p, cpu);
activate_task(dst_rq, p, 0); activate_task(dst_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0); check_preempt_curr(dst_rq, p, 0);
} else { } else {
/* /*
......
...@@ -738,12 +738,56 @@ static void update_curr_fair(struct rq *rq) ...@@ -738,12 +738,56 @@ static void update_curr_fair(struct rq *rq)
update_curr(cfs_rq_of(&rq->curr->se)); update_curr(cfs_rq_of(&rq->curr->se));
} }
#ifdef CONFIG_SCHEDSTATS
static inline void static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq))); u64 wait_start = rq_clock(rq_of(cfs_rq));
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
likely(wait_start > se->statistics.wait_start))
wait_start -= se->statistics.wait_start;
se->statistics.wait_start = wait_start;
} }
static void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct task_struct *p;
u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
if (entity_is_task(se)) {
p = task_of(se);
if (task_on_rq_migrating(p)) {
/*
* Preserve migrating task's wait time so wait_start
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
se->statistics.wait_start = delta;
return;
}
trace_sched_stat_wait(p, delta);
}
se->statistics.wait_max = max(se->statistics.wait_max, delta);
se->statistics.wait_count++;
se->statistics.wait_sum += delta;
se->statistics.wait_start = 0;
}
#else
static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
}
static inline void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
}
#endif
/* /*
* Task is being enqueued - update stats: * Task is being enqueued - update stats:
*/ */
...@@ -757,23 +801,6 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -757,23 +801,6 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_wait_start(cfs_rq, se); update_stats_wait_start(cfs_rq, se);
} }
static void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
trace_sched_stat_wait(task_of(se),
rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
}
#endif
schedstat_set(se->statistics.wait_start, 0);
}
static inline void static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
...@@ -5745,8 +5772,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env) ...@@ -5745,8 +5772,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
{ {
lockdep_assert_held(&env->src_rq->lock); lockdep_assert_held(&env->src_rq->lock);
deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING; p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(env->src_rq, p, 0);
set_task_cpu(p, env->dst_cpu); set_task_cpu(p, env->dst_cpu);
} }
...@@ -5879,8 +5906,8 @@ static void attach_task(struct rq *rq, struct task_struct *p) ...@@ -5879,8 +5906,8 @@ static void attach_task(struct rq *rq, struct task_struct *p)
lockdep_assert_held(&rq->lock); lockdep_assert_held(&rq->lock);
BUG_ON(task_rq(p) != rq); BUG_ON(task_rq(p) != rq);
p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(rq, p, 0); activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment