Commit 766fd5f6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull NOHZ updates from Ingo Molnar:

 - fix system/idle cputime leaked on cputime accounting (all nohz
   configs) (Rik van Riel)

 - remove the messy, ad-hoc irqtime account on nohz-full and make it
   compatible with CONFIG_IRQ_TIME_ACCOUNTING=y instead (Rik van Riel)

 - cleanups (Frederic Weisbecker)

 - remove unecessary irq disablement in the irqtime code (Rik van Riel)

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/cputime: Drop local_irq_save/restore from irqtime_account_irq()
  sched/cputime: Reorganize vtime native irqtime accounting headers
  sched/cputime: Clean up the old vtime gen irqtime accounting completely
  sched/cputime: Replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code
  sched/cputime: Count actually elapsed irq & softirq time
parents cca08cd6 553bf6bb
...@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t; ...@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
(__force u64)(__ct) (__force u64)(__ct)
#define nsecs_to_cputime(__nsecs) \ #define nsecs_to_cputime(__nsecs) \
(__force cputime_t)(__nsecs) (__force cputime_t)(__nsecs)
#define nsecs_to_cputime64(__nsecs) \
(__force cputime64_t)(__nsecs)
/* /*
......
...@@ -12,11 +12,9 @@ struct task_struct; ...@@ -12,11 +12,9 @@ struct task_struct;
/* /*
* vtime_accounting_cpu_enabled() definitions/declarations * vtime_accounting_cpu_enabled() definitions/declarations
*/ */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
static inline bool vtime_accounting_cpu_enabled(void) { return true; } static inline bool vtime_accounting_cpu_enabled(void) { return true; }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
/* /*
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
* in that case and compute the tickless cputime. * in that case and compute the tickless cputime.
...@@ -37,11 +35,9 @@ static inline bool vtime_accounting_cpu_enabled(void) ...@@ -37,11 +35,9 @@ static inline bool vtime_accounting_cpu_enabled(void)
return false; return false;
} }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
static inline bool vtime_accounting_cpu_enabled(void) { return false; } static inline bool vtime_accounting_cpu_enabled(void) { return false; }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #endif
/* /*
...@@ -64,35 +60,15 @@ extern void vtime_account_system(struct task_struct *tsk); ...@@ -64,35 +60,15 @@ extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk);
#ifdef __ARCH_HAS_VTIME_ACCOUNT
extern void vtime_account_irq_enter(struct task_struct *tsk);
#else
extern void vtime_common_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_enter(struct task_struct *tsk)
{
if (vtime_accounting_cpu_enabled())
vtime_common_account_irq_enter(tsk);
}
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void arch_vtime_task_switch(struct task_struct *tsk); extern void arch_vtime_task_switch(struct task_struct *tsk);
extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
if (vtime_accounting_cpu_enabled())
vtime_gen_account_irq_exit(tsk);
}
extern void vtime_user_enter(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk);
static inline void vtime_user_exit(struct task_struct *tsk) static inline void vtime_user_exit(struct task_struct *tsk)
...@@ -103,11 +79,6 @@ extern void vtime_guest_enter(struct task_struct *tsk); ...@@ -103,11 +79,6 @@ extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk, int cpu); extern void vtime_init_idle(struct task_struct *tsk, int cpu);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
}
static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { }
static inline void vtime_guest_enter(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { }
...@@ -115,6 +86,19 @@ static inline void vtime_guest_exit(struct task_struct *tsk) { } ...@@ -115,6 +86,19 @@ static inline void vtime_guest_exit(struct task_struct *tsk) { }
static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
#endif #endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
extern void vtime_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
extern void irqtime_account_irq(struct task_struct *tsk); extern void irqtime_account_irq(struct task_struct *tsk);
#else #else
......
...@@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN ...@@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN
If unsure, say N. If unsure, say N.
endchoice
config IRQ_TIME_ACCOUNTING config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting" bool "Fine granularity task level IRQ time accounting"
depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
help help
Select this option to enable fine granularity task irq time Select this option to enable fine granularity task irq time
accounting. This is done by reading a timestamp on each accounting. This is done by reading a timestamp on each
...@@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING ...@@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING
If in doubt, say N here. If in doubt, say N here.
endchoice
config BSD_PROCESS_ACCT config BSD_PROCESS_ACCT
bool "BSD Process Accounting" bool "BSD Process Accounting"
depends on MULTIUSER depends on MULTIUSER
......
...@@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); ...@@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
*/ */
void irqtime_account_irq(struct task_struct *curr) void irqtime_account_irq(struct task_struct *curr)
{ {
unsigned long flags;
s64 delta; s64 delta;
int cpu; int cpu;
if (!sched_clock_irqtime) if (!sched_clock_irqtime)
return; return;
local_irq_save(flags);
cpu = smp_processor_id(); cpu = smp_processor_id();
delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
__this_cpu_add(irq_start_time, delta); __this_cpu_add(irq_start_time, delta);
...@@ -75,44 +72,53 @@ void irqtime_account_irq(struct task_struct *curr) ...@@ -75,44 +72,53 @@ void irqtime_account_irq(struct task_struct *curr)
__this_cpu_add(cpu_softirq_time, delta); __this_cpu_add(cpu_softirq_time, delta);
irq_time_write_end(); irq_time_write_end();
local_irq_restore(flags);
} }
EXPORT_SYMBOL_GPL(irqtime_account_irq); EXPORT_SYMBOL_GPL(irqtime_account_irq);
static int irqtime_account_hi_update(void) static cputime_t irqtime_account_hi_update(cputime_t maxtime)
{ {
u64 *cpustat = kcpustat_this_cpu->cpustat; u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags; unsigned long flags;
u64 latest_ns; cputime_t irq_cputime;
int ret = 0;
local_irq_save(flags); local_irq_save(flags);
latest_ns = this_cpu_read(cpu_hardirq_time); irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) cpustat[CPUTIME_IRQ];
ret = 1; irq_cputime = min(irq_cputime, maxtime);
cpustat[CPUTIME_IRQ] += irq_cputime;
local_irq_restore(flags); local_irq_restore(flags);
return ret; return irq_cputime;
} }
static int irqtime_account_si_update(void) static cputime_t irqtime_account_si_update(cputime_t maxtime)
{ {
u64 *cpustat = kcpustat_this_cpu->cpustat; u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags; unsigned long flags;
u64 latest_ns; cputime_t softirq_cputime;
int ret = 0;
local_irq_save(flags); local_irq_save(flags);
latest_ns = this_cpu_read(cpu_softirq_time); softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) cpustat[CPUTIME_SOFTIRQ];
ret = 1; softirq_cputime = min(softirq_cputime, maxtime);
cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
local_irq_restore(flags); local_irq_restore(flags);
return ret; return softirq_cputime;
} }
#else /* CONFIG_IRQ_TIME_ACCOUNTING */ #else /* CONFIG_IRQ_TIME_ACCOUNTING */
#define sched_clock_irqtime (0) #define sched_clock_irqtime (0)
static cputime_t irqtime_account_hi_update(cputime_t dummy)
{
return 0;
}
static cputime_t irqtime_account_si_update(cputime_t dummy)
{
return 0;
}
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
static inline void task_group_account_field(struct task_struct *p, int index, static inline void task_group_account_field(struct task_struct *p, int index,
...@@ -257,31 +263,44 @@ void account_idle_time(cputime_t cputime) ...@@ -257,31 +263,44 @@ void account_idle_time(cputime_t cputime)
cpustat[CPUTIME_IDLE] += (__force u64) cputime; cpustat[CPUTIME_IDLE] += (__force u64) cputime;
} }
static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies) static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
{ {
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
if (static_key_false(&paravirt_steal_enabled)) { if (static_key_false(&paravirt_steal_enabled)) {
cputime_t steal_cputime;
u64 steal; u64 steal;
unsigned long steal_jiffies;
steal = paravirt_steal_clock(smp_processor_id()); steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time; steal -= this_rq()->prev_steal_time;
/* steal_cputime = min(nsecs_to_cputime(steal), maxtime);
* steal is in nsecs but our caller is expecting steal account_steal_time(steal_cputime);
* time in jiffies. Lets cast the result to jiffies this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
* granularity and account the rest on the next rounds.
*/
steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
account_steal_time(jiffies_to_cputime(steal_jiffies)); return steal_cputime;
return steal_jiffies;
} }
#endif #endif
return 0; return 0;
} }
/*
* Account how much elapsed time was spent in steal, irq, or softirq time.
*/
static inline cputime_t account_other_time(cputime_t max)
{
cputime_t accounted;
accounted = steal_account_process_time(max);
if (accounted < max)
accounted += irqtime_account_hi_update(max - accounted);
if (accounted < max)
accounted += irqtime_account_si_update(max - accounted);
return accounted;
}
/* /*
* Accumulate raw cputime values of dead tasks (sig->[us]time) and live * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
* tasks (sum on group iteration) belonging to @tsk's group. * tasks (sum on group iteration) belonging to @tsk's group.
...@@ -342,21 +361,23 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) ...@@ -342,21 +361,23 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
static void irqtime_account_process_tick(struct task_struct *p, int user_tick, static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int ticks) struct rq *rq, int ticks)
{ {
cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); u64 cputime = (__force u64) cputime_one_jiffy * ticks;
u64 cputime = (__force u64) cputime_one_jiffy; cputime_t scaled, other;
u64 *cpustat = kcpustat_this_cpu->cpustat;
if (steal_account_process_tick(ULONG_MAX)) /*
* When returning from idle, many ticks can get accounted at
* once, including some ticks of steal, irq, and softirq time.
* Subtract those ticks from the amount of time accounted to
* idle, or potentially user or system time. Due to rounding,
* other time can exceed ticks occasionally.
*/
other = account_other_time(cputime);
if (other >= cputime)
return; return;
cputime -= other;
scaled = cputime_to_scaled(cputime);
cputime *= ticks; if (this_cpu_ksoftirqd() == p) {
scaled *= ticks;
if (irqtime_account_hi_update()) {
cpustat[CPUTIME_IRQ] += cputime;
} else if (irqtime_account_si_update()) {
cpustat[CPUTIME_SOFTIRQ] += cputime;
} else if (this_cpu_ksoftirqd() == p) {
/* /*
* ksoftirqd time do not get accounted in cpu_softirq_time. * ksoftirqd time do not get accounted in cpu_softirq_time.
* So, we have to handle it separately here. * So, we have to handle it separately here.
...@@ -406,6 +427,10 @@ void vtime_common_task_switch(struct task_struct *prev) ...@@ -406,6 +427,10 @@ void vtime_common_task_switch(struct task_struct *prev)
} }
#endif #endif
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* /*
* Archs that account the whole time spent in the idle task * Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement * (outside irq) as idle time can rely on this and just implement
...@@ -415,33 +440,16 @@ void vtime_common_task_switch(struct task_struct *prev) ...@@ -415,33 +440,16 @@ void vtime_common_task_switch(struct task_struct *prev)
* vtime_account(). * vtime_account().
*/ */
#ifndef __ARCH_HAS_VTIME_ACCOUNT #ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_common_account_irq_enter(struct task_struct *tsk) void vtime_account_irq_enter(struct task_struct *tsk)
{ {
if (!in_interrupt()) { if (!in_interrupt() && is_idle_task(tsk))
/* vtime_account_idle(tsk);
* If we interrupted user, context_tracking_in_user() else
* is 1 because the context tracking don't hook vtime_account_system(tsk);
* on irq entry/exit. This way we know if
* we need to flush user time on kernel entry.
*/
if (context_tracking_in_user()) {
vtime_account_user(tsk);
return;
}
if (is_idle_task(tsk)) {
vtime_account_idle(tsk);
return;
}
}
vtime_account_system(tsk);
} }
EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */ #endif /* __ARCH_HAS_VTIME_ACCOUNT */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{ {
*ut = p->utime; *ut = p->utime;
...@@ -466,7 +474,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime ...@@ -466,7 +474,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
*/ */
void account_process_tick(struct task_struct *p, int user_tick) void account_process_tick(struct task_struct *p, int user_tick)
{ {
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); cputime_t cputime, scaled, steal;
struct rq *rq = this_rq(); struct rq *rq = this_rq();
if (vtime_accounting_cpu_enabled()) if (vtime_accounting_cpu_enabled())
...@@ -477,16 +485,21 @@ void account_process_tick(struct task_struct *p, int user_tick) ...@@ -477,16 +485,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
return; return;
} }
if (steal_account_process_tick(ULONG_MAX)) cputime = cputime_one_jiffy;
steal = steal_account_process_time(cputime);
if (steal >= cputime)
return; return;
cputime -= steal;
scaled = cputime_to_scaled(cputime);
if (user_tick) if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); account_user_time(p, cputime, scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
one_jiffy_scaled);
else else
account_idle_time(cputime_one_jiffy); account_idle_time(cputime);
} }
/* /*
...@@ -681,14 +694,14 @@ static cputime_t vtime_delta(struct task_struct *tsk) ...@@ -681,14 +694,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
static cputime_t get_vtime_delta(struct task_struct *tsk) static cputime_t get_vtime_delta(struct task_struct *tsk)
{ {
unsigned long now = READ_ONCE(jiffies); unsigned long now = READ_ONCE(jiffies);
unsigned long delta_jiffies, steal_jiffies; cputime_t delta, other;
delta_jiffies = now - tsk->vtime_snap; delta = jiffies_to_cputime(now - tsk->vtime_snap);
steal_jiffies = steal_account_process_tick(delta_jiffies); other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap = now; tsk->vtime_snap = now;
return jiffies_to_cputime(delta_jiffies - steal_jiffies); return delta - other;
} }
static void __vtime_account_system(struct task_struct *tsk) static void __vtime_account_system(struct task_struct *tsk)
...@@ -708,16 +721,6 @@ void vtime_account_system(struct task_struct *tsk) ...@@ -708,16 +721,6 @@ void vtime_account_system(struct task_struct *tsk)
write_seqcount_end(&tsk->vtime_seqcount); write_seqcount_end(&tsk->vtime_seqcount);
} }
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
write_seqcount_begin(&tsk->vtime_seqcount);
if (vtime_delta(tsk))
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_account_user(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk)
{ {
cputime_t delta_cpu; cputime_t delta_cpu;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment