Commit 3992c032 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer core changes from Ingo Molnar:
 "Continued cleanups of the core time and NTP code, plus more nohz work
  preparing for tick-less userspace execution."

* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  time: Rework timekeeping functions to take timekeeper ptr as argument
  time: Move xtime_nsec adjustment underflow handling timekeeping_adjust
  time: Move arch_gettimeoffset() usage into timekeeping_get_ns()
  time: Refactor accumulation of nsecs to secs
  time: Condense timekeeper.xtime into xtime_sec
  time: Explicitly use u32 instead of int for shift values
  time: Whitespace cleanups per Ingo%27s requests
  nohz: Move next idle expiry time record into idle logic area
  nohz: Move ts->idle_calls incrementation into strict idle logic
  nohz: Rename ts->idle_tick to ts->last_tick
  nohz: Make nohz API agnostic against idle ticks cputime accounting
  nohz: Separate idle sleeping time accounting from nohz logic
  timers: Improve get_next_timer_interrupt()
  timers: Add accounting of non deferrable timers
  timers: Consolidate base->next_timer update
  timers: Create detach_if_pending() and use it
parents 55acdddb eec19d1a
......@@ -31,10 +31,10 @@ enum tick_nohz_mode {
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
* @idle_tick: Store the last idle tick expiry time when the tick
* timer is modified for idle sleeps. This is necessary
* @last_tick: Store the last tick expiry time when the tick
* timer is modified for nohz sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from idle
* when the CPU returns from nohz sleep.
* @tick_stopped: Indicator that the idle tick has been stopped
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_calls: Total number of idle calls
......@@ -51,7 +51,7 @@ struct tick_sched {
struct hrtimer sched_timer;
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
ktime_t idle_tick;
ktime_t last_tick;
int inidle;
int tick_stopped;
unsigned long idle_jiffies;
......
......@@ -271,50 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
ktime_t last_update, expires, ret = { .tv64 = 0 };
unsigned long rcu_delta_jiffies;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
int cpu;
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
now = tick_nohz_start_idle(cpu, ts);
/*
* If this cpu is offline and it is the one which updates
* jiffies, then give up the assignment and let it be taken by
* the cpu which runs the tick timer next. If we don't drop
* this here the jiffies might be stale and do_timer() never
* invoked.
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
return;
if (need_resched())
return;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
return;
}
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&xtime_lock);
......@@ -397,6 +362,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
goto out;
ret = expires;
/*
* nohz_stop_sched_tick can be called several times before
* the nohz_restart_sched_tick is called. This happens when
......@@ -408,16 +375,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
select_nohz_load_balancer(1);
calc_load_enter_idle();
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
ts->idle_jiffies = last_jiffies;
}
ts->idle_sleeps++;
/* Mark expires */
ts->idle_expires = expires;
/*
* If the expiration time == KTIME_MAX, then
* in this case we simply stop the tick timer.
......@@ -448,6 +409,65 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
ts->next_jiffies = next_jiffies;
ts->last_jiffies = last_jiffies;
ts->sleep_length = ktime_sub(dev->next_event, now);
return ret;
}
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
* If this cpu is offline and it is the one which updates
* jiffies, then give up the assignment and let it be taken by
* the cpu which runs the tick timer next. If we don't drop
* this here the jiffies might be stale and do_timer() never
* invoked.
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
return false;
if (need_resched())
return false;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
return false;
}
return true;
}
static void __tick_nohz_idle_enter(struct tick_sched *ts)
{
ktime_t now, expires;
int cpu = smp_processor_id();
now = tick_nohz_start_idle(cpu, ts);
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
ts->idle_calls++;
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
if (expires.tv64 > 0LL) {
ts->idle_sleeps++;
ts->idle_expires = expires;
}
if (!was_stopped && ts->tick_stopped)
ts->idle_jiffies = ts->last_jiffies;
}
}
/**
......@@ -485,7 +505,7 @@ void tick_nohz_idle_enter(void)
* update of the idle time accounting in tick_nohz_start_idle().
*/
ts->inidle = 1;
tick_nohz_stop_sched_tick(ts);
__tick_nohz_idle_enter(ts);
local_irq_enable();
}
......@@ -505,7 +525,7 @@ void tick_nohz_irq_exit(void)
if (!ts->inidle)
return;
tick_nohz_stop_sched_tick(ts);
__tick_nohz_idle_enter(ts);
}
/**
......@@ -523,7 +543,7 @@ ktime_t tick_nohz_get_sleep_length(void)
static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
{
hrtimer_cancel(&ts->sched_timer);
hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
while (1) {
/* Forward the time to expire in the future */
......@@ -546,6 +566,41 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
}
}
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
select_nohz_load_balancer(0);
tick_do_update_jiffies64(now);
update_cpu_load_nohz();
touch_softlockup_watchdog();
/*
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
ts->idle_exittime = now;
tick_nohz_restart(ts, now);
}
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting. Enforce that this is accounted to idle !
*/
ticks = jiffies - ts->idle_jiffies;
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
#endif
}
/**
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
......@@ -557,9 +612,6 @@ void tick_nohz_idle_exit(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
#endif
ktime_t now;
local_irq_disable();
......@@ -574,40 +626,11 @@ void tick_nohz_idle_exit(void)
if (ts->idle_active)
tick_nohz_stop_idle(cpu, now);
if (!ts->tick_stopped) {
local_irq_enable();
return;
if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
/* Update jiffies first */
select_nohz_load_balancer(0);
tick_do_update_jiffies64(now);
update_cpu_load_nohz();
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting. Enforce that this is accounted to idle !
*/
ticks = jiffies - ts->idle_jiffies;
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
#endif
calc_load_exit_idle();
touch_softlockup_watchdog();
/*
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
ts->idle_exittime = now;
tick_nohz_restart(ts, now);
local_irq_enable();
}
......@@ -811,6 +834,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog();
if (idle_cpu(cpu))
ts->idle_jiffies++;
}
update_process_times(user_mode(regs));
......
This diff is collapsed.
......@@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(idle_tick);
P_ns(last_tick);
P(tick_stopped);
P(idle_jiffies);
P(idle_calls);
......@@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
SEQ_printf(m, "Timer List Version: v0.6\n");
SEQ_printf(m, "Timer List Version: v0.7\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
......
......@@ -77,6 +77,7 @@ struct tvec_base {
struct timer_list *running_timer;
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
......@@ -330,7 +331,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
}
EXPORT_SYMBOL_GPL(set_timer_slack);
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
static void
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
......@@ -372,6 +374,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
__internal_add_timer(base, timer);
/*
* Update base->active_timers and base->next_timer
*/
if (!tbase_get_deferrable(timer->base)) {
if (time_before(timer->expires, base->next_timer))
base->next_timer = timer->expires;
base->active_timers++;
}
}
#ifdef CONFIG_TIMER_STATS
void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
{
......@@ -654,8 +669,7 @@ void init_timer_deferrable_key(struct timer_list *timer,
}
EXPORT_SYMBOL(init_timer_deferrable_key);
static inline void detach_timer(struct timer_list *timer,
int clear_pending)
static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
struct list_head *entry = &timer->entry;
......@@ -667,6 +681,29 @@ static inline void detach_timer(struct timer_list *timer,
entry->prev = LIST_POISON2;
}
static inline void
detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
{
detach_timer(timer, true);
if (!tbase_get_deferrable(timer->base))
timer->base->active_timers--;
}
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
bool clear_pending)
{
if (!timer_pending(timer))
return 0;
detach_timer(timer, clear_pending);
if (!tbase_get_deferrable(timer->base)) {
timer->base->active_timers--;
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
return 1;
}
/*
* We are using hashed locking: holding per_cpu(tvec_bases).lock
* means that all timers which are tied to this base via timer->base are
......@@ -712,16 +749,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
detach_timer(timer, 0);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1;
} else {
if (pending_only)
ret = detach_if_pending(timer, base, false);
if (!ret && pending_only)
goto out_unlock;
}
debug_activate(timer, expires);
......@@ -752,9 +782,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
}
timer->expires = expires;
if (time_before(timer->expires, base->next_timer) &&
!tbase_get_deferrable(timer->base))
base->next_timer = timer->expires;
internal_add_timer(base, timer);
out_unlock:
......@@ -920,9 +947,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_lock_irqsave(&base->lock, flags);
timer_set_base(timer, base);
debug_activate(timer, timer->expires);
if (time_before(timer->expires, base->next_timer) &&
!tbase_get_deferrable(timer->base))
base->next_timer = timer->expires;
internal_add_timer(base, timer);
/*
* Check whether the other CPU is idle and needs to be
......@@ -959,13 +983,7 @@ int del_timer(struct timer_list *timer)
timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
detach_timer(timer, 1);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1;
}
ret = detach_if_pending(timer, base, true);
spin_unlock_irqrestore(&base->lock, flags);
}
......@@ -990,19 +1008,10 @@ int try_to_del_timer_sync(struct timer_list *timer)
base = lock_timer_base(timer, &flags);
if (base->running_timer == timer)
goto out;
if (base->running_timer != timer) {
timer_stats_timer_clear_start_info(timer);
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1;
ret = detach_if_pending(timer, base, true);
}
out:
spin_unlock_irqrestore(&base->lock, flags);
return ret;
......@@ -1089,7 +1098,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
*/
list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
BUG_ON(tbase_get_base(timer->base) != base);
internal_add_timer(base, timer);
/* No accounting, while moving them */
__internal_add_timer(base, timer);
}
return index;
......@@ -1178,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base)
timer_stats_account_timer(timer);
base->running_timer = timer;
detach_timer(timer, 1);
detach_expired_timer(timer, base);
spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn, data);
......@@ -1316,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
unsigned long get_next_timer_interrupt(unsigned long now)
{
struct tvec_base *base = __this_cpu_read(tvec_bases);
unsigned long expires;
unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
/*
* Pretend that there is no timer pending if the cpu is offline.
* Possible pending timers will be migrated later to an active cpu.
*/
if (cpu_is_offline(smp_processor_id()))
return now + NEXT_TIMER_MAX_DELTA;
return expires;
spin_lock(&base->lock);
if (base->active_timers) {
if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
expires = base->next_timer;
}
spin_unlock(&base->lock);
if (time_before_eq(expires, now))
......@@ -1704,6 +1717,7 @@ static int __cpuinit init_timers_cpu(int cpu)
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
base->active_timers = 0;
return 0;
}
......@@ -1714,11 +1728,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
while (!list_empty(head)) {
timer = list_first_entry(head, struct timer_list, entry);
detach_timer(timer, 0);
/* We ignore the accounting on the dying cpu */
detach_timer(timer, false);
timer_set_base(timer, new_base);
if (time_before(timer->expires, new_base->next_timer) &&
!tbase_get_deferrable(timer->base))
new_base->next_timer = timer->expires;
internal_add_timer(new_base, timer);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment