Commit 212f3000 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-idle-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 idle updates from Ingo Molnar:
 "There were two bigger changes in this development cycle:

   - remove idle notifiers:

       32 files changed, 74 insertions(+), 803 deletions(-)

     These notifiers were of questionable value and the main usecase,
     the i7300 driver, was essentially unmaintained and can be removed,
     plus modern power management concepts don't need the callback - so
     use this golden opportunity and get rid of this opaque and fragile
     callback from a latency sensitive code path.

     (Len Brown, Thomas Gleixner)

   - improve the AMD Erratum 400 workaround that used high overhead MSR
     polling in the idle loop (Borisla Petkov, Thomas Gleixner)"

* 'x86-idle-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86: Remove empty idle.h header
  x86/amd: Simplify AMD E400 aware idle routine
  x86/amd: Check for the C1E bug post ACPI subsystem init
  x86/bugs: Separate AMD E400 erratum and C1E bug
  x86/cpufeature: Provide helper to set bugs bits
  x86/idle: Remove enter_idle(), exit_idle()
  x86: Remove x86_test_and_clear_bit_percpu()
  x86/idle: Remove is_idle flag
  x86/idle: Remove idle_notifier
  i7300_idle: Remove this driver
parents 6f3be0f0 34bc3560
......@@ -6104,12 +6104,6 @@ S: Maintained
F: Documentation/cdrom/ide-cd
F: drivers/ide/ide-cd*
IDLE-I7300
M: Andy Henroid <andrew.d.henroid@intel.com>
L: linux-pm@vger.kernel.org
S: Supported
F: drivers/idle/i7300_idle.c
IEEE 802.15.4 SUBSYSTEM
M: Alexander Aring <aar@pengutronix.de>
M: Stefan Schmidt <stefan@osg.samsung.com>
......
......@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
boot_cpu_data.x86_model <= 0x05 &&
boot_cpu_data.x86_mask < 0x0A)
return 1;
else if (amd_e400_c1e_detected)
else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
return 1;
else
return max_cstate;
......
......@@ -11,7 +11,6 @@
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/idle.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
......@@ -640,7 +639,6 @@ extern void irq_exit(void);
static inline void entering_irq(void)
{
irq_enter();
exit_idle();
}
static inline void entering_ack_irq(void)
......
......@@ -204,6 +204,7 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
#define static_cpu_has_bug(bit) static_cpu_has((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
#define cpu_have_feature boot_cpu_has
......
......@@ -314,4 +314,6 @@
#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#endif /* _ASM_X86_CPUFEATURES_H */
#ifndef _ASM_X86_IDLE_H
#define _ASM_X86_IDLE_H
#define IDLE_START 1
#define IDLE_END 2
struct notifier_block;
void idle_notifier_register(struct notifier_block *n);
void idle_notifier_unregister(struct notifier_block *n);
#ifdef CONFIG_X86_64
void enter_idle(void);
void exit_idle(void);
#else /* !CONFIG_X86_64 */
static inline void enter_idle(void) { }
static inline void exit_idle(void) { }
static inline void __exit_idle(void) { }
#endif /* CONFIG_X86_64 */
void amd_e400_remove_cpu(int cpu);
#endif /* _ASM_X86_IDLE_H */
......@@ -507,17 +507,6 @@ do { \
#endif
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \
bool old__; \
asm volatile("btr %2,"__percpu_arg(1)"\n\t" \
CC_SET(c) \
: CC_OUT(c) (old__), "+m" (var) \
: "dIr" (bit)); \
old__; \
})
static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
......
......@@ -633,10 +633,9 @@ static inline void sync_core(void)
}
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void init_amd_e400_c1e_mask(void);
extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
extern bool amd_e400_c1e_detected;
enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
IDLE_POLL};
......
......@@ -48,7 +48,6 @@
#include <asm/io_apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/time.h>
#include <asm/smp.h>
......@@ -894,11 +893,13 @@ void __init setup_boot_APIC_clock(void)
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
amd_e400_c1e_apic_setup();
}
void setup_secondary_APIC_clock(void)
{
setup_APIC_timer();
amd_e400_c1e_apic_setup();
}
/*
......
......@@ -48,7 +48,6 @@
#include <linux/bootmem.h>
#include <asm/irqdomain.h>
#include <asm/idle.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/cpu.h>
......
......@@ -20,6 +20,10 @@
#include "cpu.h"
static const int amd_erratum_383[];
static const int amd_erratum_400[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
......@@ -592,11 +596,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
}
static const int amd_erratum_383[];
static const int amd_erratum_400[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
/*
* Check whether the machine is affected by erratum 400. This is
* used to select the proper idle routine and to enable the check
* whether the machine is affected in arch_post_acpi_init(), which
* sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check.
*/
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
}
static void init_amd_k8(struct cpuinfo_x86 *c)
{
......@@ -777,9 +786,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
/* 3DNow or LM implies PREFETCHW */
......
......@@ -1172,7 +1172,6 @@ void enable_sep_cpu(void)
void __init identify_boot_cpu(void)
{
identify_cpu(&boot_cpu_data);
init_amd_e400_c1e_mask();
#ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
......
......@@ -24,7 +24,6 @@
#include <asm/amd_nb.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
......
......@@ -26,7 +26,6 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
......
......@@ -6,7 +6,6 @@
#include <asm/irq_vectors.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
......
......@@ -25,7 +25,6 @@
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
#include <asm/desc.h>
#include <asm/idle.h>
#include <asm/irq_regs.h>
#include <asm/i8259.h>
#include <asm/apic.h>
......
......@@ -14,7 +14,6 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/desc.h>
......
......@@ -16,7 +16,6 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <asm/io_apic.h>
#include <asm/idle.h>
#include <asm/apic.h>
int sysctl_panic_on_stackoverflow;
......
......@@ -42,7 +42,6 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/idle.h>
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
......@@ -267,13 +266,11 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
exit_idle();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
break;
......
......@@ -23,7 +23,6 @@
#include <asm/cpu.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/mwait.h>
#include <asm/fpu/internal.h>
......@@ -65,23 +64,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
void idle_notifier_register(struct notifier_block *n)
{
atomic_notifier_chain_register(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_register);
void idle_notifier_unregister(struct notifier_block *n)
{
atomic_notifier_chain_unregister(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_unregister);
#endif
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
......@@ -251,39 +233,9 @@ static inline void play_dead(void)
}
#endif
#ifdef CONFIG_X86_64
void enter_idle(void)
{
this_cpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
/* Called from interrupts to signify idle end */
void exit_idle(void)
{
/* idle loop has pid 0 */
if (current->pid)
return;
__exit_idle();
}
#endif
void arch_cpu_idle_enter(void)
{
local_touch_nmi();
enter_idle();
}
void arch_cpu_idle_exit(void)
{
__exit_idle();
}
void arch_cpu_idle_dead(void)
......@@ -336,59 +288,33 @@ void stop_this_cpu(void *dummy)
halt();
}
bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected);
static cpumask_var_t amd_e400_c1e_mask;
void amd_e400_remove_cpu(int cpu)
{
if (amd_e400_c1e_mask != NULL)
cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
}
/*
* AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
* pending message MSR. If we detect C1E, then we handle it the same
* way as C3 power states (local apic timer and TSC stop)
* AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
* states (local apic timer and TSC stop).
*/
static void amd_e400_idle(void)
{
if (!amd_e400_c1e_detected) {
u32 lo, hi;
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
amd_e400_c1e_detected = true;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
pr_info("System has AMD C1E enabled\n");
}
/*
* We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
* gets set after static_cpu_has() places have been converted via
* alternatives.
*/
if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
default_idle();
return;
}
if (amd_e400_c1e_detected) {
int cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
cpumask_set_cpu(cpu, amd_e400_c1e_mask);
/* Force broadcast so ACPI can not interfere. */
tick_broadcast_force();
pr_info("Switch to broadcast mode on CPU%d\n", cpu);
}
tick_broadcast_enter();
default_idle();
/*
* The switch back from broadcast mode needs to be
* called with interrupts disabled.
* The switch back from broadcast mode needs to be called with
* interrupts disabled.
*/
local_irq_disable();
tick_broadcast_exit();
local_irq_enable();
} else
default_idle();
}
/*
......@@ -448,8 +374,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
if (x86_idle || boot_option_idle_override == IDLE_POLL)
return;
if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
} else if (prefer_mwait_c1_over_halt(c)) {
......@@ -459,11 +384,37 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
x86_idle = default_idle;
}
void __init init_amd_e400_c1e_mask(void)
void amd_e400_c1e_apic_setup(void)
{
if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id());
local_irq_disable();
tick_broadcast_force();
local_irq_enable();
}
}
void __init arch_post_acpi_subsys_init(void)
{
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
if (x86_idle == amd_e400_idle)
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
u32 lo, hi;
if (!boot_cpu_has_bug(X86_BUG_AMD_E400))
return;
/*
* AMD E400 detection needs to happen after ACPI has been enabled. If
* the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in
* MSR_K8_INT_PENDING_MSG.
*/
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
if (!(lo & K8_INTP_C1E_ACTIVE_MASK))
return;
boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E);
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
pr_info("System has AMD C1E enabled\n");
}
static int __init idle_setup(char *str)
......
......@@ -49,7 +49,6 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
......
......@@ -44,7 +44,6 @@
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
......
......@@ -58,7 +58,6 @@
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/realmode.h>
#include <asm/cpu.h>
#include <asm/numa.h>
......@@ -1596,7 +1595,6 @@ void play_dead_common(void)
{
idle_task_exit();
reset_lazy_tlbstate();
amd_e400_remove_cpu(raw_smp_processor_id());
/* Ack it */
(void)cpu_report_death();
......
......@@ -19,7 +19,6 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
......
......@@ -141,7 +141,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr,
if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
return;
if (amd_e400_c1e_detected)
if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
type = ACPI_STATE_C1;
/*
......
......@@ -106,8 +106,6 @@
#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
/* IOAT1 define left for i7300_idle driver to not fail compiling */
#define IOAT1_CHANSTS_OFFSET 0x04
#define IOAT_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
......
......@@ -8,20 +8,3 @@ config INTEL_IDLE
native Intel hardware idle features. The acpi_idle driver
can be configured at the same time, in order to handle
processors intel_idle does not support.
menu "Memory power savings"
depends on X86_64
config I7300_IDLE_IOAT_CHANNEL
bool
config I7300_IDLE
tristate "Intel chipset idle memory power saving driver"
select I7300_IDLE_IOAT_CHANNEL
help
Enable memory power savings when idle with certain Intel server
chipsets. The chipset must have I/O AT support, such as the
Intel 7300. The power savings depends on the type and quantity of
DRAM devices.
endmenu
obj-$(CONFIG_I7300_IDLE) += i7300_idle.o
obj-$(CONFIG_INTEL_IDLE) += intel_idle.o
/*
* (C) Copyright 2008 Intel Corporation
* Authors:
* Andy Henroid <andrew.d.henroid@intel.com>
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
*/
/*
* Save DIMM power on Intel 7300-based platforms when all CPUs/cores
* are idle, using the DIMM thermal throttling capability.
*
* This driver depends on the Intel integrated DMA controller (I/O AT).
* If the driver for I/O AT (drivers/dma/ioatdma*) is also enabled,
* this driver should work cooperatively.
*/
/* #define DEBUG */
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/cpumask.h>
#include <linux/ktime.h>
#include <linux/delay.h>
#include <linux/debugfs.h>
#include <linux/stop_machine.h>
#include <linux/i7300_idle.h>
#include <asm/idle.h>
#include "../dma/ioat/hw.h"
#include "../dma/ioat/registers.h"
#define I7300_IDLE_DRIVER_VERSION "1.55"
#define I7300_PRINT "i7300_idle:"
#define MAX_STOP_RETRIES 10
static int debug;
module_param_named(debug, debug, uint, 0644);
MODULE_PARM_DESC(debug, "Enable debug printks in this driver");
static int forceload;
module_param_named(forceload, forceload, uint, 0644);
MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000");
#define dprintk(fmt, arg...) \
do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0)
/*
* Value to set THRTLOW to when initiating throttling
* 0 = No throttling
* 1 = Throttle when > 4 activations per eval window (Maximum throttling)
* 2 = Throttle when > 8 activations
* 168 = Throttle when > 672 activations (Minimum throttling)
*/
#define MAX_THROTTLE_LOW_LIMIT 168
static uint throttle_low_limit = 1;
module_param_named(throttle_low_limit, throttle_low_limit, uint, 0644);
MODULE_PARM_DESC(throttle_low_limit,
"Value for THRTLOWLM activation field "
"(0 = disable throttle, 1 = Max throttle, 168 = Min throttle)");
/*
* simple invocation and duration statistics
*/
static unsigned long total_starts;
static unsigned long total_us;
#ifdef DEBUG
static unsigned long past_skip;
#endif
static struct pci_dev *fbd_dev;
static raw_spinlock_t i7300_idle_lock;
static int i7300_idle_active;
static u8 i7300_idle_thrtctl_saved;
static u8 i7300_idle_thrtlow_saved;
static u32 i7300_idle_mc_saved;
static cpumask_var_t idle_cpumask;
static ktime_t start_ktime;
static unsigned long avg_idle_us;
static struct dentry *debugfs_dir;
/* Begin: I/O AT Helper routines */
#define IOAT_CHANBASE(ioat_ctl, chan) (ioat_ctl + 0x80 + 0x80 * chan)
/* Snoop control (disable snoops when coherency is not important) */
#define IOAT_DESC_SADDR_SNP_CTL (1UL << 1)
#define IOAT_DESC_DADDR_SNP_CTL (1UL << 2)
static struct pci_dev *ioat_dev;
static struct ioat_dma_descriptor *ioat_desc; /* I/O AT desc & data (1 page) */
static unsigned long ioat_desc_phys;
static u8 *ioat_iomap; /* I/O AT memory-mapped control regs (aka CB_BAR) */
static u8 *ioat_chanbase;
/* Start I/O AT memory copy */
static int i7300_idle_ioat_start(void)
{
u32 err;
/* Clear error (due to circular descriptor pointer) */
err = readl(ioat_chanbase + IOAT_CHANERR_OFFSET);
if (err)
writel(err, ioat_chanbase + IOAT_CHANERR_OFFSET);
writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
return 0;
}
/* Stop I/O AT memory copy */
static void i7300_idle_ioat_stop(void)
{
int i;
u64 sts;
for (i = 0; i < MAX_STOP_RETRIES; i++) {
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
udelay(10);
sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_STATUS;
if (sts != IOAT_CHANSTS_ACTIVE)
break;
}
if (i == MAX_STOP_RETRIES) {
dprintk("failed to stop I/O AT after %d retries\n",
MAX_STOP_RETRIES);
}
}
/* Test I/O AT by copying 1024 byte from 2k to 1k */
static int __init i7300_idle_ioat_selftest(u8 *ctl,
struct ioat_dma_descriptor *desc, unsigned long desc_phys)
{
u64 chan_sts;
memset(desc, 0, 2048);
memset((u8 *) desc + 2048, 0xab, 1024);
desc[0].size = 1024;
desc[0].ctl = 0;
desc[0].src_addr = desc_phys + 2048;
desc[0].dst_addr = desc_phys + 1024;
desc[0].next = 0;
writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
udelay(1000);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_STATUS;
if (chan_sts != IOAT_CHANSTS_DONE) {
/* Not complete, reset the channel */
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
return -1;
}
if (*(u32 *) ((u8 *) desc + 3068) != 0xabababab ||
*(u32 *) ((u8 *) desc + 2044) != 0xabababab) {
dprintk("Data values src 0x%x, dest 0x%x, memset 0x%x\n",
*(u32 *) ((u8 *) desc + 2048),
*(u32 *) ((u8 *) desc + 1024),
*(u32 *) ((u8 *) desc + 3072));
return -1;
}
return 0;
}
static struct device dummy_dma_dev = {
.init_name = "fallback device",
.coherent_dma_mask = DMA_BIT_MASK(64),
.dma_mask = &dummy_dma_dev.coherent_dma_mask,
};
/* Setup and initialize I/O AT */
/* This driver needs I/O AT as the throttling takes effect only when there is
* some memory activity. We use I/O AT to set up a dummy copy, while all CPUs
* go idle and memory is throttled.
*/
static int __init i7300_idle_ioat_init(void)
{
u8 ver, chan_count, ioat_chan;
u16 chan_ctl;
ioat_iomap = (u8 *) ioremap_nocache(pci_resource_start(ioat_dev, 0),
pci_resource_len(ioat_dev, 0));
if (!ioat_iomap) {
printk(KERN_ERR I7300_PRINT "failed to map I/O AT registers\n");
goto err_ret;
}
ver = readb(ioat_iomap + IOAT_VER_OFFSET);
if (ver != IOAT_VER_1_2) {
printk(KERN_ERR I7300_PRINT "unknown I/O AT version (%u.%u)\n",
ver >> 4, ver & 0xf);
goto err_unmap;
}
chan_count = readb(ioat_iomap + IOAT_CHANCNT_OFFSET);
if (!chan_count) {
printk(KERN_ERR I7300_PRINT "unexpected # of I/O AT channels "
"(%u)\n",
chan_count);
goto err_unmap;
}
ioat_chan = chan_count - 1;
ioat_chanbase = IOAT_CHANBASE(ioat_iomap, ioat_chan);
chan_ctl = readw(ioat_chanbase + IOAT_CHANCTRL_OFFSET);
if (chan_ctl & IOAT_CHANCTRL_CHANNEL_IN_USE) {
printk(KERN_ERR I7300_PRINT "channel %d in use\n", ioat_chan);
goto err_unmap;
}
writew(IOAT_CHANCTRL_CHANNEL_IN_USE,
ioat_chanbase + IOAT_CHANCTRL_OFFSET);
ioat_desc = (struct ioat_dma_descriptor *)dma_alloc_coherent(
&dummy_dma_dev, 4096,
(dma_addr_t *)&ioat_desc_phys, GFP_KERNEL);
if (!ioat_desc) {
printk(KERN_ERR I7300_PRINT "failed to allocate I/O AT desc\n");
goto err_mark_unused;
}
writel(ioat_desc_phys & 0xffffffffUL,
ioat_chanbase + IOAT1_CHAINADDR_OFFSET_LOW);
writel(ioat_desc_phys >> 32,
ioat_chanbase + IOAT1_CHAINADDR_OFFSET_HIGH);
if (i7300_idle_ioat_selftest(ioat_iomap, ioat_desc, ioat_desc_phys)) {
printk(KERN_ERR I7300_PRINT "I/O AT self-test failed\n");
goto err_free;
}
/* Setup circular I/O AT descriptor chain */
ioat_desc[0].ctl = IOAT_DESC_SADDR_SNP_CTL | IOAT_DESC_DADDR_SNP_CTL;
ioat_desc[0].src_addr = ioat_desc_phys + 2048;
ioat_desc[0].dst_addr = ioat_desc_phys + 3072;
ioat_desc[0].size = 128;
ioat_desc[0].next = ioat_desc_phys + sizeof(struct ioat_dma_descriptor);
ioat_desc[1].ctl = ioat_desc[0].ctl;
ioat_desc[1].src_addr = ioat_desc[0].src_addr;
ioat_desc[1].dst_addr = ioat_desc[0].dst_addr;
ioat_desc[1].size = ioat_desc[0].size;
ioat_desc[1].next = ioat_desc_phys;
return 0;
err_free:
dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0);
err_mark_unused:
writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
err_unmap:
iounmap(ioat_iomap);
err_ret:
return -ENODEV;
}
/* Cleanup I/O AT */
static void __exit i7300_idle_ioat_exit(void)
{
int i;
u64 chan_sts;
i7300_idle_ioat_stop();
/* Wait for a while for the channel to halt before releasing */
for (i = 0; i < MAX_STOP_RETRIES; i++) {
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_STATUS;
if (chan_sts != IOAT_CHANSTS_ACTIVE) {
writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
break;
}
udelay(1000);
}
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_STATUS;
/*
* We tried to reset multiple times. If IO A/T channel is still active
* flag an error and return without cleanup. Memory leak is better
* than random corruption in that extreme error situation.
*/
if (chan_sts == IOAT_CHANSTS_ACTIVE) {
printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
" Not freeing resources\n");
return;
}
dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0);
iounmap(ioat_iomap);
}
/* End: I/O AT Helper routines */
#define DIMM_THRTLOW 0x64
#define DIMM_THRTCTL 0x67
#define DIMM_THRTCTL_THRMHUNT (1UL << 0)
#define DIMM_MC 0x40
#define DIMM_GTW_MODE (1UL << 17)
#define DIMM_GBLACT 0x60
/*
* Keep track of an exponential-decaying average of recent idle durations.
* The latest duration gets DURATION_WEIGHT_PCT percentage weight
* in this average, with the old average getting the remaining weight.
*
* High weights emphasize recent history, low weights include long history.
*/
#define DURATION_WEIGHT_PCT 55
/*
* When the decaying average of recent durations or the predicted duration
* of the next timer interrupt is shorter than duration_threshold, the
* driver will decline to throttle.
*/
#define DURATION_THRESHOLD_US 100
/* Store DIMM thermal throttle configuration */
static int i7300_idle_thrt_save(void)
{
u32 new_mc_val;
u8 gblactlm;
pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &i7300_idle_thrtctl_saved);
pci_read_config_byte(fbd_dev, DIMM_THRTLOW, &i7300_idle_thrtlow_saved);
pci_read_config_dword(fbd_dev, DIMM_MC, &i7300_idle_mc_saved);
/*
* Make sure we have Global Throttling Window Mode set to have a
* "short" window. This (mostly) works around an issue where
* throttling persists until the end of the global throttling window
* size. On the tested system, this was resulting in a maximum of
* 64 ms to exit throttling (average 32 ms). The actual numbers
* depends on system frequencies. Setting the short window reduces
* this by a factor of 4096.
*
* We will only do this only if the system is set for
* unlimited-activations while in open-loop throttling (i.e., when
* Global Activation Throttle Limit is zero).
*/
pci_read_config_byte(fbd_dev, DIMM_GBLACT, &gblactlm);
dprintk("thrtctl_saved = 0x%02x, thrtlow_saved = 0x%02x\n",
i7300_idle_thrtctl_saved,
i7300_idle_thrtlow_saved);
dprintk("mc_saved = 0x%08x, gblactlm = 0x%02x\n",
i7300_idle_mc_saved,
gblactlm);
if (gblactlm == 0) {
new_mc_val = i7300_idle_mc_saved | DIMM_GTW_MODE;
pci_write_config_dword(fbd_dev, DIMM_MC, new_mc_val);
return 0;
} else {
dprintk("could not set GTW_MODE = 1 (OLTT enabled)\n");
return -ENODEV;
}
}
/* Restore DIMM thermal throttle configuration */
static void i7300_idle_thrt_restore(void)
{
pci_write_config_dword(fbd_dev, DIMM_MC, i7300_idle_mc_saved);
pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved);
pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved);
}
/* Enable DIMM thermal throttling */
static void i7300_idle_start(void)
{
u8 new_ctl;
u8 limit;
new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT;
pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
limit = throttle_low_limit;
if (unlikely(limit > MAX_THROTTLE_LOW_LIMIT))
limit = MAX_THROTTLE_LOW_LIMIT;
pci_write_config_byte(fbd_dev, DIMM_THRTLOW, limit);
new_ctl = i7300_idle_thrtctl_saved | DIMM_THRTCTL_THRMHUNT;
pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
}
/* Disable DIMM thermal throttling */
static void i7300_idle_stop(void)
{
u8 new_ctl;
u8 got_ctl;
new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT;
pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved);
pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved);
pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &got_ctl);
WARN_ON_ONCE(got_ctl != i7300_idle_thrtctl_saved);
}
/*
* i7300_avg_duration_check()
* return 0 if the decaying average of recent idle durations is
* more than DURATION_THRESHOLD_US
*/
static int i7300_avg_duration_check(void)
{
if (avg_idle_us >= DURATION_THRESHOLD_US)
return 0;
#ifdef DEBUG
past_skip++;
#endif
return 1;
}
/* Idle notifier to look at idle CPUs */
static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
unsigned long flags;
ktime_t now_ktime;
static ktime_t idle_begin_time;
static int time_init = 1;
if (!throttle_low_limit)
return 0;
if (unlikely(time_init)) {
time_init = 0;
idle_begin_time = ktime_get();
}
raw_spin_lock_irqsave(&i7300_idle_lock, flags);
if (val == IDLE_START) {
cpumask_set_cpu(smp_processor_id(), idle_cpumask);
if (cpumask_weight(idle_cpumask) != num_online_cpus())
goto end;
now_ktime = ktime_get();
idle_begin_time = now_ktime;
if (i7300_avg_duration_check())
goto end;
i7300_idle_active = 1;
total_starts++;
start_ktime = now_ktime;
i7300_idle_start();
i7300_idle_ioat_start();
} else if (val == IDLE_END) {
cpumask_clear_cpu(smp_processor_id(), idle_cpumask);
if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) {
/* First CPU coming out of idle */
u64 idle_duration_us;
now_ktime = ktime_get();
idle_duration_us = ktime_to_us(ktime_sub
(now_ktime, idle_begin_time));
avg_idle_us =
((100 - DURATION_WEIGHT_PCT) * avg_idle_us +
DURATION_WEIGHT_PCT * idle_duration_us) / 100;
if (i7300_idle_active) {
ktime_t idle_ktime;
idle_ktime = ktime_sub(now_ktime, start_ktime);
total_us += ktime_to_us(idle_ktime);
i7300_idle_ioat_stop();
i7300_idle_stop();
i7300_idle_active = 0;
}
}
}
end:
raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
return 0;
}
static struct notifier_block i7300_idle_nb = {
.notifier_call = i7300_idle_notifier,
};
MODULE_DEVICE_TABLE(pci, pci_tbl);
static ssize_t stats_read_ul(struct file *fp, char __user *ubuf, size_t count,
loff_t *off)
{
unsigned long *p = fp->private_data;
char buf[32];
int len;
len = snprintf(buf, 32, "%lu\n", *p);
return simple_read_from_buffer(ubuf, count, off, buf, len);
}
static const struct file_operations idle_fops = {
.open = simple_open,
.read = stats_read_ul,
.llseek = default_llseek,
};
struct debugfs_file_info {
void *ptr;
char name[32];
struct dentry *file;
} debugfs_file_list[] = {
{&total_starts, "total_starts", NULL},
{&total_us, "total_us", NULL},
#ifdef DEBUG
{&past_skip, "past_skip", NULL},
#endif
{NULL, "", NULL}
};
static int __init i7300_idle_init(void)
{
raw_spin_lock_init(&i7300_idle_lock);
total_us = 0;
if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
return -ENODEV;
if (i7300_idle_thrt_save())
return -ENODEV;
if (i7300_idle_ioat_init())
return -ENODEV;
if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL))
return -ENOMEM;
debugfs_dir = debugfs_create_dir("i7300_idle", NULL);
if (debugfs_dir) {
int i = 0;
while (debugfs_file_list[i].ptr != NULL) {
debugfs_file_list[i].file = debugfs_create_file(
debugfs_file_list[i].name,
S_IRUSR,
debugfs_dir,
debugfs_file_list[i].ptr,
&idle_fops);
i++;
}
}
idle_notifier_register(&i7300_idle_nb);
printk(KERN_INFO "i7300_idle: loaded v%s\n", I7300_IDLE_DRIVER_VERSION);
return 0;
}
static void __exit i7300_idle_exit(void)
{
idle_notifier_unregister(&i7300_idle_nb);
free_cpumask_var(idle_cpumask);
if (debugfs_dir) {
int i = 0;
while (debugfs_file_list[i].file != NULL) {
debugfs_remove(debugfs_file_list[i].file);
i++;
}
debugfs_remove(debugfs_dir);
}
i7300_idle_thrt_restore();
i7300_idle_ioat_exit();
}
module_init(i7300_idle_init);
module_exit(i7300_idle_exit);
MODULE_AUTHOR("Andy Henroid <andrew.d.henroid@intel.com>");
MODULE_DESCRIPTION("Intel Chipset DIMM Idle Power Saving Driver v"
I7300_IDLE_DRIVER_VERSION);
MODULE_LICENSE("GPL");
......@@ -56,7 +56,6 @@
#include <asm/msr.h>
#include <asm/mwait.h>
#include <asm/cpu_device_id.h>
#include <asm/idle.h>
#include <asm/hardirq.h>
#define MAX_TARGET_RATIO (50U)
......
......@@ -37,7 +37,6 @@
#include <asm/desc.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/io_apic.h>
#include <asm/i8259.h>
#include <asm/xen/pci.h>
......@@ -1256,7 +1255,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
irq_enter();
#ifdef CONFIG_X86
exit_idle();
inc_irq_stat(irq_hv_callback_count);
#endif
......
......@@ -448,6 +448,8 @@ void __init parse_early_param(void)
done = 1;
}
void __init __weak arch_post_acpi_subsys_init(void) { }
void __init __weak smp_setup_processor_id(void)
{
}
......@@ -649,6 +651,7 @@ asmlinkage __visible void __init start_kernel(void)
check_bugs();
acpi_subsystem_init();
arch_post_acpi_subsys_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment