Commit fc1dc0d5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:

 - Use the topology information of number of packages for making the
   decision about TSC trust instead of using the number of online nodes
   which is not reflecting the real topology.

 - Stop the PIT timer 0 when its not in use as to stop pointless
   emulation in the VMM.

 - Fix the PIT timer stop sequence for timer 0 so it truly stops both
   real hardware and buggy VMM emulations.

* tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Check for sockets instead of CPUs to make code match comment
  clockevents/drivers/i8253: Fix stop sequence for timer 0
  x86/i8253: Disable PIT timer 0 when not in use
  x86/tsc: Use topology_max_packages() to get package number
parents b5075354 e7ff4ebf
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h> #include <linux/random.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
...@@ -537,16 +536,6 @@ static void __init ms_hyperv_init_platform(void) ...@@ -537,16 +536,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT)) if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason; x86_platform.get_nmi_reason = hv_get_nmi_reason;
/*
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
* counter register during PIT shutdown restarts the PIT. So it
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
* to false tells pit_shutdown() not to zero the counter so that
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
* and setting this value has no effect.
*/
i8253_clear_counter_on_shutdown = false;
#if IS_ENABLED(CONFIG_HYPERV) #if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
ms_hyperv.paravisor_present) ms_hyperv.paravisor_present)
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/i8253.h> #include <linux/i8253.h>
#include <asm/hypervisor.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/hpet.h> #include <asm/hpet.h>
#include <asm/time.h> #include <asm/time.h>
...@@ -39,9 +40,15 @@ static bool __init use_pit(void) ...@@ -39,9 +40,15 @@ static bool __init use_pit(void)
bool __init pit_timer_init(void) bool __init pit_timer_init(void)
{ {
if (!use_pit()) if (!use_pit()) {
/*
* Don't just ignore the PIT. Ensure it's stopped, because
* VMMs otherwise steal CPU time just to pointlessly waggle
* the (masked) IRQ.
*/
clockevent_i8253_disable();
return false; return false;
}
clockevent_i8253_init(true); clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent; global_clock_event = &i8253_clockevent;
return true; return true;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/i8259.h> #include <asm/i8259.h>
#include <asm/topology.h>
#include <asm/uv/uv.h> #include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
...@@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void) ...@@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void)
* - TSC which does not stop in C-States * - TSC which does not stop in C-States
* - the TSC_ADJUST register which allows to detect even minimal * - the TSC_ADJUST register which allows to detect even minimal
* modifications * modifications
* - not more than two sockets. As the number of sockets cannot be * - not more than four packages
* evaluated at the early boot stage where this has to be
* invoked, check the number of online memory nodes as a
* fallback solution which is an reasonable estimate.
*/ */
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
boot_cpu_has(X86_FEATURE_TSC_ADJUST) && boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
nr_online_nodes <= 4) topology_max_packages() <= 4)
tsc_disable_clocksource_watchdog(); tsc_disable_clocksource_watchdog();
} }
...@@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void) ...@@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void)
*/ */
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */ /* assume multi socket systems are not synchronized: */
if (num_possible_cpus() > 1) if (topology_max_packages() > 1)
return 1; return 1;
} }
......
...@@ -20,13 +20,6 @@ ...@@ -20,13 +20,6 @@
DEFINE_RAW_SPINLOCK(i8253_lock); DEFINE_RAW_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock); EXPORT_SYMBOL(i8253_lock);
/*
* Handle PIT quirk in pit_shutdown() where zeroing the counter register
* restarts the PIT, negating the shutdown. On platforms with the quirk,
* platform specific code can set this to false.
*/
bool i8253_clear_counter_on_shutdown __ro_after_init = true;
#ifdef CONFIG_CLKSRC_I8253 #ifdef CONFIG_CLKSRC_I8253
/* /*
* Since the PIT overflows every tick, its not very useful * Since the PIT overflows every tick, its not very useful
...@@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void) ...@@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void)
#endif #endif
#ifdef CONFIG_CLKEVT_I8253 #ifdef CONFIG_CLKEVT_I8253
static int pit_shutdown(struct clock_event_device *evt) void clockevent_i8253_disable(void)
{ {
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
return 0;
raw_spin_lock(&i8253_lock); raw_spin_lock(&i8253_lock);
/*
* Writing the MODE register should stop the counter, according to
* the datasheet. This appears to work on real hardware (well, on
* modern Intel and AMD boxes; I didn't dig the Pegasos out of the
* shed).
*
* However, some virtual implementations differ, and the MODE change
* doesn't have any effect until either the counter is written (KVM
* in-kernel PIT) or the next interrupt (QEMU). And in those cases,
* it may not stop the *count*, only the interrupts. Although in
* the virt case, that probably doesn't matter, as the value of the
* counter will only be calculated on demand if the guest reads it;
* it's the interrupts which cause steal time.
*
* Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
* firing repeatedly if the counter is running. But it *does* do the
* right thing when the MODE register is written.
*
* So: write the MODE and then load the counter, which ensures that
* the IRQ is stopped on those buggy virt implementations. And then
* write the MODE again, which is the right way to stop it.
*/
outb_p(0x30, PIT_MODE); outb_p(0x30, PIT_MODE);
outb_p(0, PIT_CH0);
outb_p(0, PIT_CH0);
if (i8253_clear_counter_on_shutdown) { outb_p(0x30, PIT_MODE);
outb_p(0, PIT_CH0);
outb_p(0, PIT_CH0);
}
raw_spin_unlock(&i8253_lock); raw_spin_unlock(&i8253_lock);
}
static int pit_shutdown(struct clock_event_device *evt)
{
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
return 0;
clockevent_i8253_disable();
return 0; return 0;
} }
......
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
#define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ)
extern raw_spinlock_t i8253_lock; extern raw_spinlock_t i8253_lock;
extern bool i8253_clear_counter_on_shutdown;
extern struct clock_event_device i8253_clockevent; extern struct clock_event_device i8253_clockevent;
extern void clockevent_i8253_init(bool oneshot); extern void clockevent_i8253_init(bool oneshot);
extern void clockevent_i8253_disable(void);
extern void setup_pit_timer(void); extern void setup_pit_timer(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment