Commit f7dd3b17 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer updates from Thomas Gleixner:
 "This is the last functional update from the tip tree for 4.10. It got
  delayed due to a newly reported and anlyzed variant of BIOS bug and
  the resulting wreckage:

   - Seperation of TSC being marked realiable and the fact that the
     platform provides the TSC frequency via CPUID/MSRs and making use
     for it for GOLDMONT.

   - TSC adjust MSR validation and sanitizing:

     The TSC adjust MSR contains the offset to the hardware counter. The
     sum of the adjust MSR and the counter is the TSC value which is
     read via RDTSC.

     On at least two machines from different vendors the BIOS sets the
     TSC adjust MSR to negative values. This happens on cold and warm
     boot. While on cold boot the offset is a few milliseconds, on warm
     boot it basically compensates the power on time of the system. The
     BIOSes are not even using the adjust MSR to set all CPUs in the
     package to the same offset. The offsets are different which renders
     the TSC unusable,

     What's worse is that the TSC deadline timer has a HW feature^Wbug.
     It malfunctions when the TSC adjust value is negative or greater
     equal 0x80000000 resulting in silent boot failures, hard lockups or
     non firing timers. This looks like some hardware internal 32/64bit
     issue with a sign extension problem. Intel has been silent so far
     on the issue.

     The update contains sanity checks and keeps the adjust register
     within working limits and in sync on the package.

     As it looks like this disease is spreading via BIOS crapware, we
     need to address this urgently as the boot failures are hard to
     debug for users"

* 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Limit the adjust value further
  x86/tsc: Annotate printouts as firmware bug
  x86/tsc: Force TSC_ADJUST register to value >= zero
  x86/tsc: Validate TSC_ADJUST after resume
  x86/tsc: Validate cpumask pointer before accessing it
  x86/tsc: Fix broken CONFIG_X86_TSC=n build
  x86/tsc: Try to adjust TSC if sync test fails
  x86/tsc: Prepare warp test for TSC adjustment
  x86/tsc: Move sync cleanup to a safe place
  x86/tsc: Sync test only for the first cpu in a package
  x86/tsc: Verify TSC_ADJUST from idle
  x86/tsc: Store and check TSC ADJUST MSR
  x86/tsc: Detect random warps
  x86/tsc: Use X86_FEATURE_TSC_ADJUST in detect_art()
  x86/tsc: Finalize the split of the TSC_RELIABLE flag
  x86/tsc: Set TSC_KNOWN_FREQ and TSC_RELIABLE flags on Intel Atom SoCs
  x86/tsc: Mark Intel ATOM_GOLDMONT TSC reliable
  x86/tsc: Mark TSC frequency determined by CPUID as known
  x86/tsc: Add X86_FEATURE_TSC_KNOWN_FREQ flag
parents 1bbb05f5 8c9b9d87
...@@ -105,6 +105,7 @@ ...@@ -105,6 +105,7 @@
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
......
...@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable; ...@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable;
* Boot-time check whether the TSCs are synchronized across * Boot-time check whether the TSCs are synchronized across
* all CPUs/cores: * all CPUs/cores:
*/ */
#ifdef CONFIG_X86_TSC
extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
extern void tsc_verify_tsc_adjust(bool resume);
extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void); extern void check_tsc_sync_target(void);
#else
static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
static inline void tsc_verify_tsc_adjust(bool resume) { }
static inline void check_tsc_sync_source(int cpu) { }
static inline void check_tsc_sync_target(void) { }
#endif
extern int notsc_setup(char *); extern int notsc_setup(char *);
extern void tsc_save_sched_clock_state(void); extern void tsc_save_sched_clock_state(void);
......
...@@ -75,7 +75,7 @@ apm-y := apm_32.o ...@@ -75,7 +75,7 @@ apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += tsc_sync.o obj-$(CONFIG_X86_TSC) += tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_SMP) += setup_percpu.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/ obj-y += apic/
......
...@@ -235,6 +235,7 @@ static inline void play_dead(void) ...@@ -235,6 +235,7 @@ static inline void play_dead(void)
void arch_cpu_idle_enter(void) void arch_cpu_idle_enter(void)
{ {
tsc_verify_tsc_adjust(false);
local_touch_nmi(); local_touch_nmi();
} }
......
...@@ -702,6 +702,20 @@ unsigned long native_calibrate_tsc(void) ...@@ -702,6 +702,20 @@ unsigned long native_calibrate_tsc(void)
} }
} }
/*
* TSC frequency determined by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
* is considered a known frequency.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
/*
* For Atom SoCs TSC is the only reliable clocksource.
* Mark TSC reliable so no watchdog on it.
*/
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return crystal_khz * ebx_numerator / eax_denominator; return crystal_khz * ebx_numerator / eax_denominator;
} }
...@@ -1043,18 +1057,20 @@ static void detect_art(void) ...@@ -1043,18 +1057,20 @@ static void detect_art(void)
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
return; return;
cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
&art_to_tsc_numerator, unused, unused+1);
/* Don't enable ART in a VM, non-stop TSC required */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
art_to_tsc_denominator < ART_MIN_DENOMINATOR) !boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return; return;
if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
&art_to_tsc_numerator, unused, unused+1);
if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
return; return;
rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
/* Make this sticky over multiple CPU init calls */ /* Make this sticky over multiple CPU init calls */
setup_force_cpu_cap(X86_FEATURE_ART); setup_force_cpu_cap(X86_FEATURE_ART);
} }
...@@ -1064,6 +1080,11 @@ static void detect_art(void) ...@@ -1064,6 +1080,11 @@ static void detect_art(void)
static struct clocksource clocksource_tsc; static struct clocksource clocksource_tsc;
static void tsc_resume(struct clocksource *cs)
{
tsc_verify_tsc_adjust(true);
}
/* /*
* We used to compare the TSC to the cycle_last value in the clocksource * We used to compare the TSC to the cycle_last value in the clocksource
* structure to avoid a nasty time-warp. This can be observed in a * structure to avoid a nasty time-warp. This can be observed in a
...@@ -1096,6 +1117,7 @@ static struct clocksource clocksource_tsc = { ...@@ -1096,6 +1117,7 @@ static struct clocksource clocksource_tsc = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS | .flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY, CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC }, .archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
}; };
void mark_tsc_unstable(char *reason) void mark_tsc_unstable(char *reason)
...@@ -1283,10 +1305,10 @@ static int __init init_tsc_clocksource(void) ...@@ -1283,10 +1305,10 @@ static int __init init_tsc_clocksource(void)
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
/* /*
* Trust the results of the earlier calibration on systems * When TSC frequency is known (retrieved via MSR or CPUID), we skip
* exporting a reliable TSC. * the refined calibration and directly register it as a clocksource.
*/ */
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_register_khz(&clocksource_tsc, tsc_khz);
return 0; return 0;
} }
...@@ -1363,6 +1385,8 @@ void __init tsc_init(void) ...@@ -1363,6 +1385,8 @@ void __init tsc_init(void)
if (unsynchronized_tsc()) if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized"); mark_tsc_unstable("TSCs unsynchronized");
else
tsc_store_and_check_tsc_adjust(true);
check_system_tsc_reliable(); check_system_tsc_reliable();
......
...@@ -100,5 +100,24 @@ unsigned long cpu_khz_from_msr(void) ...@@ -100,5 +100,24 @@ unsigned long cpu_khz_from_msr(void)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
lapic_timer_frequency = (freq * 1000) / HZ; lapic_timer_frequency = (freq * 1000) / HZ;
#endif #endif
/*
* TSC frequency determined by MSR is always considered "known"
* because it is reported by HW.
* Another fact is that on MSR capable platforms, PIT/HPET is
* generally not available so calibration won't work at all.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
/*
* Unfortunately there is no way for hardware to tell whether the
* TSC is reliable. We were told by silicon design team that TSC
* on Atom SoCs are always "reliable". TSC is also the only
* reliable clocksource on these SoCs (HPET is either not present
* or not functional) so mark TSC reliable which removes the
* requirement for a watchdog clocksource.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return res; return res;
} }
This diff is collapsed.
...@@ -49,8 +49,13 @@ static unsigned long __init mfld_calibrate_tsc(void) ...@@ -49,8 +49,13 @@ static unsigned long __init mfld_calibrate_tsc(void)
fast_calibrate = ratio * fsb; fast_calibrate = ratio * fsb;
pr_debug("read penwell tsc %lu khz\n", fast_calibrate); pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
lapic_timer_frequency = fsb * 1000 / HZ; lapic_timer_frequency = fsb * 1000 / HZ;
/* mark tsc clocksource as reliable */
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); /*
* TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate; return fast_calibrate;
} }
......
...@@ -78,8 +78,12 @@ static unsigned long __init tangier_calibrate_tsc(void) ...@@ -78,8 +78,12 @@ static unsigned long __init tangier_calibrate_tsc(void)
pr_debug("Setting lapic_timer_frequency = %d\n", pr_debug("Setting lapic_timer_frequency = %d\n",
lapic_timer_frequency); lapic_timer_frequency);
/* mark tsc clocksource as reliable */ /*
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); * TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate; return fast_calibrate;
} }
......
...@@ -252,6 +252,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) ...@@ -252,6 +252,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
fix_processor_context(); fix_processor_context();
do_fpu_end(); do_fpu_end();
tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state(); x86_platform.restore_sched_clock_state();
mtrr_bp_restore(); mtrr_bp_restore();
perf_restore_debug_store(); perf_restore_debug_store();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment