Commit 6ff7041d authored by Thomas Gleixner's avatar Thomas Gleixner

hrtimer: Fix migration expiry check

The timer migration expiry check should prevent the migration of a
timer to another CPU when the timer expires before the next event is
scheduled on the other CPU. Migrating the timer might delay it because
we can not reprogram the clock event device on the other CPU. But the
code implementing that check has two flaws:

- for !HIGHRES the check compares the expiry value with the clock
  events device expiry value which is wrong for CLOCK_REALTIME based
  timers.

- the check is racy. It holds the hrtimer base lock of the target CPU,
  but the clock event device expiry value can be modified
  nevertheless, e.g. by an timer interrupt firing.

The !HIGHRES case is easy to fix as we can enqueue the timer on the
cpu which was selected by the load balancer. It runs the idle
balancing code once per jiffy anyway. So the maximum delay for the
timer is the same as when we keep the tick on the current cpu going.

In the HIGHRES case we can get the next expiry value from the hrtimer
cpu_base of the target CPU and serialize the update with the cpu_base
lock. This moves the lock section in hrtimer_interrupt() so we can set
next_event to KTIME_MAX while we are handling the expired timers and
set it to the next expiry value after we handled the timers under the
base lock. While the expired timers are processed timer migration is
blocked because the expiry time of the timer is always <= KTIME_MAX.

Also remove the now useless clockevents_get_next_event() function.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 7e0c5086
...@@ -143,12 +143,3 @@ extern void clockevents_notify(unsigned long reason, void *arg); ...@@ -143,12 +143,3 @@ extern void clockevents_notify(unsigned long reason, void *arg);
#endif #endif
#endif #endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern ktime_t clockevents_get_next_event(int cpu);
#else
static inline ktime_t clockevents_get_next_event(int cpu)
{
return (ktime_t) { .tv64 = KTIME_MAX };
}
#endif
...@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, ...@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
} }
} }
/*
* Get the preferred target CPU for NOHZ
*/
static int hrtimer_get_target(int this_cpu, int pinned)
{
#ifdef CONFIG_NO_HZ
if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
int preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
return preferred_cpu;
}
#endif
return this_cpu;
}
/*
* With HIGHRES=y we do not migrate the timer when it is expiring
* before the next event on the target cpu because we cannot reprogram
* the target cpu hardware and we would cause it to fire late.
*
* Called with cpu_base->lock of target cpu held.
*/
static int
hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
{
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires;
if (!new_base->cpu_base->hres_active)
return 0;
expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
#else
return 0;
#endif
}
/* /*
* Switch the timer base to the current CPU when possible. * Switch the timer base to the current CPU when possible.
*/ */
...@@ -200,27 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, ...@@ -200,27 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{ {
struct hrtimer_clock_base *new_base; struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base; struct hrtimer_cpu_base *new_cpu_base;
int cpu, preferred_cpu = -1; int this_cpu = smp_processor_id();
int cpu = hrtimer_get_target(this_cpu, pinned);
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0) {
/*
* We must not check the expiry value when
* preferred_cpu is the current cpu. If base
* != new_base we would loop forever when the
* timer expires before the current programmed
* next timer event.
*/
if (preferred_cpu != cpu)
cpu = preferred_cpu;
else
preferred_cpu = -1;
}
}
#endif
again: again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu); new_cpu_base = &per_cpu(hrtimer_bases, cpu);
...@@ -228,7 +249,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, ...@@ -228,7 +249,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
if (base != new_base) { if (base != new_base) {
/* /*
* We are trying to schedule the timer on the local CPU. * We are trying to move timer to new_base.
* However we can't change timer's base while it is running, * However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming * so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq * the event source in the high resolution case. The softirq
...@@ -244,39 +265,13 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, ...@@ -244,39 +265,13 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
spin_unlock(&base->cpu_base->lock); spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock); spin_lock(&new_base->cpu_base->lock);
/* Optimized away for NOHZ=n SMP=n */ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
if (cpu == preferred_cpu) { cpu = this_cpu;
/* Calculate clock monotonic expiry time */
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
new_base->offset);
#else
ktime_t expires = hrtimer_get_expires(timer);
#endif
/*
* Get the next event on target cpu from the
* clock events layer.
* This covers the highres=off nohz=on case as well.
*/
ktime_t next = clockevents_get_next_event(cpu);
ktime_t delta = ktime_sub(expires, next);
/*
* We do not migrate the timer when it is expiring
* before the next event on the target cpu because
* we cannot reprogram the target cpu hardware and
* we would cause it to fire late.
*/
if (delta.tv64 < 0) {
cpu = smp_processor_id();
spin_unlock(&new_base->cpu_base->lock); spin_unlock(&new_base->cpu_base->lock);
spin_lock(&base->cpu_base->lock); spin_lock(&base->cpu_base->lock);
timer->base = base; timer->base = base;
goto again; goto again;
} }
}
timer->base = new_base; timer->base = new_base;
} }
return new_base; return new_base;
...@@ -1287,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) ...@@ -1287,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX; expires_next.tv64 = KTIME_MAX;
spin_lock(&cpu_base->lock);
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
* the migration code. This does not affect enqueueing of
* timers which run their callback and need to be requeued on
* this CPU.
*/
cpu_base->expires_next.tv64 = KTIME_MAX;
base = cpu_base->clock_base; base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow; ktime_t basenow;
struct rb_node *node; struct rb_node *node;
spin_lock(&cpu_base->lock);
basenow = ktime_add(now, base->offset); basenow = ktime_add(now, base->offset);
while ((node = base->first)) { while ((node = base->first)) {
...@@ -1327,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) ...@@ -1327,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer); __run_hrtimer(timer);
} }
spin_unlock(&cpu_base->lock);
base++; base++;
} }
/*
* Store the new expiry value so the migration code can verify
* against it.
*/
cpu_base->expires_next = expires_next; cpu_base->expires_next = expires_next;
spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */ /* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) { if (expires_next.tv64 != KTIME_MAX) {
......
...@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg) ...@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock); spin_unlock(&clockevents_lock);
} }
EXPORT_SYMBOL_GPL(clockevents_notify); EXPORT_SYMBOL_GPL(clockevents_notify);
ktime_t clockevents_get_next_event(int cpu)
{
struct tick_device *td;
struct clock_event_device *dev;
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
return dev->next_event;
}
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment