Commit fe44d621 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: Fix the irqtime code to deal with u64 wraps

Some ARM systems have a short sched_clock() [ which needs to be fixed
too ], but this exposed a bug in the irq_time code as well, it doesn't
deal with wraps at all.

Fix the irq_time code to deal with u64 wraps by re-writing the code to
only use delta increments, which avoids the whole issue.
Reviewed-by: default avatarVenkatesh Pallipadi <venki@google.com>
Reported-by: default avatarMikael Pettersson <mikpe@it.uu.se>
Tested-by: default avatarMikael Pettersson <mikpe@it.uu.se>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1292242433.6803.199.camel@twins>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent dbd87b5a
...@@ -636,23 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) ...@@ -636,23 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p)
#endif /* CONFIG_CGROUP_SCHED */ #endif /* CONFIG_CGROUP_SCHED */
static u64 irq_time_cpu(int cpu); static void update_rq_clock_task(struct rq *rq, s64 delta);
static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
inline void update_rq_clock(struct rq *rq) static void update_rq_clock(struct rq *rq)
{ {
int cpu = cpu_of(rq); s64 delta;
u64 irq_time;
if (rq->skip_clock_update) if (rq->skip_clock_update)
return; return;
rq->clock = sched_clock_cpu(cpu); delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
irq_time = irq_time_cpu(cpu); rq->clock += delta;
if (rq->clock - irq_time > rq->clock_task) update_rq_clock_task(rq, delta);
rq->clock_task = rq->clock - irq_time;
sched_irq_time_avg_update(rq, irq_time);
} }
/* /*
...@@ -1946,19 +1941,20 @@ void disable_sched_clock_irqtime(void) ...@@ -1946,19 +1941,20 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0; sched_clock_irqtime = 0;
} }
static u64 irq_time_cpu(int cpu) static inline u64 irq_time_cpu(int cpu)
{ {
if (!sched_clock_irqtime)
return 0;
return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
} }
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
*/
void account_system_vtime(struct task_struct *curr) void account_system_vtime(struct task_struct *curr)
{ {
unsigned long flags; unsigned long flags;
s64 delta;
int cpu; int cpu;
u64 now, delta;
if (!sched_clock_irqtime) if (!sched_clock_irqtime)
return; return;
...@@ -1966,9 +1962,9 @@ void account_system_vtime(struct task_struct *curr) ...@@ -1966,9 +1962,9 @@ void account_system_vtime(struct task_struct *curr)
local_irq_save(flags); local_irq_save(flags);
cpu = smp_processor_id(); cpu = smp_processor_id();
now = sched_clock_cpu(cpu); delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
delta = now - per_cpu(irq_start_time, cpu); __this_cpu_add(irq_start_time, delta);
per_cpu(irq_start_time, cpu) = now;
/* /*
* We do not account for softirq time from ksoftirqd here. * We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread * We want to continue accounting softirq time to ksoftirqd thread
...@@ -1976,33 +1972,54 @@ void account_system_vtime(struct task_struct *curr) ...@@ -1976,33 +1972,54 @@ void account_system_vtime(struct task_struct *curr)
* that do not consume any time, but still wants to run. * that do not consume any time, but still wants to run.
*/ */
if (hardirq_count()) if (hardirq_count())
per_cpu(cpu_hardirq_time, cpu) += delta; __this_cpu_add(cpu_hardirq_time, delta);
else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
per_cpu(cpu_softirq_time, cpu) += delta; __this_cpu_add(cpu_softirq_time, delta);
local_irq_restore(flags); local_irq_restore(flags);
} }
EXPORT_SYMBOL_GPL(account_system_vtime); EXPORT_SYMBOL_GPL(account_system_vtime);
static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) static void update_rq_clock_task(struct rq *rq, s64 delta)
{ {
if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { s64 irq_delta;
u64 delta_irq = curr_irq_time - rq->prev_irq_time;
rq->prev_irq_time = curr_irq_time; irq_delta = irq_time_cpu(cpu_of(rq)) - rq->prev_irq_time;
sched_rt_avg_update(rq, delta_irq);
} /*
* Since irq_time is only updated on {soft,}irq_exit, we might run into
* this case when a previous update_rq_clock() happened inside a
* {soft,}irq region.
*
* When this happens, we stop ->clock_task and only update the
* prev_irq_time stamp to account for the part that fit, so that a next
* update will consume the rest. This ensures ->clock_task is
* monotonic.
*
* It does however cause some slight miss-attribution of {soft,}irq
* time, a more accurate solution would be to update the irq_time using
* the current rq->clock timestamp, except that would require using
* atomic ops.
*/
if (irq_delta > delta)
irq_delta = delta;
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
rq->clock_task += delta;
if (irq_delta && sched_feat(NONIRQ_POWER))
sched_rt_avg_update(rq, irq_delta);
} }
#else #else /* CONFIG_IRQ_TIME_ACCOUNTING */
static u64 irq_time_cpu(int cpu) static void update_rq_clock_task(struct rq *rq, s64 delta)
{ {
return 0; rq->clock_task += delta;
} }
static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#endif
#include "sched_idletask.c" #include "sched_idletask.c"
#include "sched_fair.c" #include "sched_fair.c"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment