Commit 332962f2 authored by Thomas Gleixner's avatar Thomas Gleixner

clocksource: Reselect clocksource when watchdog validated high-res capability

Up to commit 5d33b883 (clocksource: Always verify highres capability)
we had no sanity check when selecting a clocksource, which prevented
that a non highres capable clocksource is used when the system already
switched to highres/nohz mode.

The new sanity check works as Alex and Tim found out. It prevents the
TSC from being used. This happens because on x86 the boot process
looks like this:

 tsc_start_freqency_validation(TSC);
 clocksource_register(HPET);
 clocksource_done_booting();
	clocksource_select()
		Selects HPET which is valid for high-res

 switch_to_highres();

 clocksource_register(TSC);
 	TSC is not selected, because it is not yet
	flagged as VALID_HIGH_RES

 clocksource_watchdog()
	Validates TSC for highres, but that does not make TSC
	the current clocksource.

Before the sanity check was added, we installed TSC unvalidated which
worked most of the time. If the TSC was really detected as unstable,
then the unstable logic removed it and installed HPET again.

The sanity check is correct and needed. So the watchdog needs to kick
a reselection of the clocksource, when it qualifies TSC as a valid
high res clocksource.

To solve this, we mark the clocksource which got the flag
CLOCK_SOURCE_VALID_FOR_HRES set by the watchdog with an new flag
CLOCK_SOURCE_RESELECT and trigger the watchdog thread. The watchdog
thread evaluates the flag and invokes clocksource_select() when set.

To avoid that the clocksource_done_booting() code, which is about to
install the first real clocksource anyway, needs to go through
clocksource_select and tick_oneshot_notify() pointlessly, split out
the clocksource_watchdog_kthread() list walk code and invoke the
select/notify only when called from clocksource_watchdog_kthread().

So clocksource_done_booting() can utilize the same splitout code
without the select/notify invocation and the clocksource_mutex
unlock/relock dance.
Reported-and-tested-by: default avatarAlex Shi <alex.shi@intel.com>
Cc: Hans Peter Anvin <hpa@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Tested-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307042239150.11637@ionos.tec.linutronix.deSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 2b0f8931
...@@ -210,6 +210,7 @@ struct clocksource { ...@@ -210,6 +210,7 @@ struct clocksource {
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20
#define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_UNSTABLE 0x40
#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
#define CLOCK_SOURCE_RESELECT 0x100
/* simplify initialization of mask field */ /* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
......
...@@ -181,6 +181,7 @@ static int finished_booting; ...@@ -181,6 +181,7 @@ static int finished_booting;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work); static void clocksource_watchdog_work(struct work_struct *work);
static void clocksource_select(void);
static LIST_HEAD(watchdog_list); static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog; static struct clocksource *watchdog;
...@@ -301,13 +302,30 @@ static void clocksource_watchdog(unsigned long data) ...@@ -301,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
(cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
/* Mark it valid for high-res. */
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* clocksource_done_booting() will sort it if
* finished_booting is not set yet.
*/
if (!finished_booting)
continue;
/* /*
* We just marked the clocksource as highres-capable, * If this is not the current clocksource let
* notify the rest of the system as well so that we * the watchdog thread reselect it. Due to the
* transition into high-res mode: * change to high res this clocksource might
* be preferred now. If it is the current
* clocksource let the tick code know about
* that change.
*/ */
tick_clock_notify(); if (cs != curr_clocksource) {
cs->flags |= CLOCK_SOURCE_RESELECT;
schedule_work(&watchdog_work);
} else {
tick_clock_notify();
}
} }
} }
...@@ -404,19 +422,25 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) ...@@ -404,19 +422,25 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
spin_unlock_irqrestore(&watchdog_lock, flags); spin_unlock_irqrestore(&watchdog_lock, flags);
} }
static int clocksource_watchdog_kthread(void *data) static int __clocksource_watchdog_kthread(void)
{ {
struct clocksource *cs, *tmp; struct clocksource *cs, *tmp;
unsigned long flags; unsigned long flags;
LIST_HEAD(unstable); LIST_HEAD(unstable);
int select = 0;
mutex_lock(&clocksource_mutex);
spin_lock_irqsave(&watchdog_lock, flags); spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
if (cs->flags & CLOCK_SOURCE_UNSTABLE) { if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list); list_del_init(&cs->wd_list);
list_add(&cs->wd_list, &unstable); list_add(&cs->wd_list, &unstable);
select = 1;
}
if (cs->flags & CLOCK_SOURCE_RESELECT) {
cs->flags &= ~CLOCK_SOURCE_RESELECT;
select = 1;
} }
}
/* Check if the watchdog timer needs to be stopped. */ /* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog(); clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags); spin_unlock_irqrestore(&watchdog_lock, flags);
...@@ -426,6 +450,14 @@ static int clocksource_watchdog_kthread(void *data) ...@@ -426,6 +450,14 @@ static int clocksource_watchdog_kthread(void *data)
list_del_init(&cs->wd_list); list_del_init(&cs->wd_list);
__clocksource_change_rating(cs, 0); __clocksource_change_rating(cs, 0);
} }
return select;
}
static int clocksource_watchdog_kthread(void *data)
{
mutex_lock(&clocksource_mutex);
if (__clocksource_watchdog_kthread())
clocksource_select();
mutex_unlock(&clocksource_mutex); mutex_unlock(&clocksource_mutex);
return 0; return 0;
} }
...@@ -445,7 +477,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) ...@@ -445,7 +477,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { } static inline void clocksource_resume_watchdog(void) { }
static inline int clocksource_watchdog_kthread(void *data) { return 0; } static inline int __clocksource_watchdog_kthread(void) { return 0; }
static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
...@@ -647,16 +679,11 @@ static int __init clocksource_done_booting(void) ...@@ -647,16 +679,11 @@ static int __init clocksource_done_booting(void)
{ {
mutex_lock(&clocksource_mutex); mutex_lock(&clocksource_mutex);
curr_clocksource = clocksource_default_clock(); curr_clocksource = clocksource_default_clock();
mutex_unlock(&clocksource_mutex);
finished_booting = 1; finished_booting = 1;
/* /*
* Run the watchdog first to eliminate unstable clock sources * Run the watchdog first to eliminate unstable clock sources
*/ */
clocksource_watchdog_kthread(NULL); __clocksource_watchdog_kthread();
mutex_lock(&clocksource_mutex);
clocksource_select(); clocksource_select();
mutex_unlock(&clocksource_mutex); mutex_unlock(&clocksource_mutex);
return 0; return 0;
...@@ -789,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) ...@@ -789,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
list_del(&cs->list); list_del(&cs->list);
cs->rating = rating; cs->rating = rating;
clocksource_enqueue(cs); clocksource_enqueue(cs);
clocksource_select();
} }
/** /**
...@@ -801,6 +827,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) ...@@ -801,6 +827,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
{ {
mutex_lock(&clocksource_mutex); mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating); __clocksource_change_rating(cs, rating);
clocksource_select();
mutex_unlock(&clocksource_mutex); mutex_unlock(&clocksource_mutex);
} }
EXPORT_SYMBOL(clocksource_change_rating); EXPORT_SYMBOL(clocksource_change_rating);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment