Commit b5199515 authored by Thomas Gleixner's avatar Thomas Gleixner

clocksource: Make watchdog robust vs. interruption

The clocksource watchdog code is interruptible and it has been
observed that this can trigger false positives which disable the TSC.

The reason is that an interrupt storm or a long running interrupt
handler between the read of the watchdog source and the read of the
TSC brings the two far enough apart that the delta is larger than the
unstable treshold. Move both reads into a short interrupt disabled
region to avoid that.
Reported-and-tested-by: default avatarVernon Mauery <vernux@us.ibm.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
parent 1123d939
...@@ -188,6 +188,7 @@ struct clocksource { ...@@ -188,6 +188,7 @@ struct clocksource {
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
/* Watchdog related data, used by the framework */ /* Watchdog related data, used by the framework */
struct list_head wd_list; struct list_head wd_list;
cycle_t cs_last;
cycle_t wd_last; cycle_t wd_last;
#endif #endif
} ____cacheline_aligned; } ____cacheline_aligned;
......
...@@ -185,7 +185,6 @@ static struct clocksource *watchdog; ...@@ -185,7 +185,6 @@ static struct clocksource *watchdog;
static struct timer_list watchdog_timer; static struct timer_list watchdog_timer;
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock); static DEFINE_SPINLOCK(watchdog_lock);
static cycle_t watchdog_last;
static int watchdog_running; static int watchdog_running;
static int clocksource_watchdog_kthread(void *data); static int clocksource_watchdog_kthread(void *data);
...@@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) ...@@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data)
if (!watchdog_running) if (!watchdog_running)
goto out; goto out;
wdnow = watchdog->read(watchdog);
wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
watchdog->mult, watchdog->shift);
watchdog_last = wdnow;
list_for_each_entry(cs, &watchdog_list, wd_list) { list_for_each_entry(cs, &watchdog_list, wd_list) {
/* Clocksource already marked unstable? */ /* Clocksource already marked unstable? */
...@@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) ...@@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data)
continue; continue;
} }
local_irq_disable();
csnow = cs->read(cs); csnow = cs->read(cs);
wdnow = watchdog->read(watchdog);
local_irq_enable();
/* Clocksource initialized ? */ /* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = csnow; cs->wd_last = wdnow;
cs->cs_last = csnow;
continue; continue;
} }
/* Check the deviation from the watchdog clocksource. */ wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & watchdog->mult, watchdog->shift);
cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
cs->mask, cs->mult, cs->shift); cs->mask, cs->mult, cs->shift);
cs->wd_last = csnow; cs->cs_last = csnow;
cs->wd_last = wdnow;
/* Check the deviation from the watchdog clocksource. */
if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
clocksource_unstable(cs, cs_nsec - wd_nsec); clocksource_unstable(cs, cs_nsec - wd_nsec);
continue; continue;
...@@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) ...@@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void)
return; return;
init_timer(&watchdog_timer); init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog; watchdog_timer.function = clocksource_watchdog;
watchdog_last = watchdog->read(watchdog);
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
watchdog_running = 1; watchdog_running = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment