Commit 2617765b authored by Dave Jones's avatar Dave Jones Committed by Jaroslav Kysela

[PATCH] bluesmoke update.

o   P4 thermal throttling is now compile time option
o   ifdefs cleaned up due to above
o   Only poke the LVT if thermal throttling is enabled.
o   Remove bogus cache flushing as per previous discussion.
o   Replace hard coded timer values with MCE_RATE
o   Change default polling frequency from 5 to 15 seconds
o   SMP fixes. (Don't readd timer)
o   Compile time warning fixes.
o   Add config helptext
parent 45504663
...@@ -826,6 +826,10 @@ CONFIG_X86_MCE_NONFATAL ...@@ -826,6 +826,10 @@ CONFIG_X86_MCE_NONFATAL
or out-of-spec (ie, overclocked) hardware. or out-of-spec (ie, overclocked) hardware.
This option only does something on hardware with Intel P6 style MCE. This option only does something on hardware with Intel P6 style MCE.
(Pentium Pro and above, AMD Athlon/Duron) (Pentium Pro and above, AMD Athlon/Duron)
CONFIG_X86_MCE_P4THERMAL
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
CONFIG_TOSHIBA CONFIG_TOSHIBA
This adds a driver to safely access the System Management Mode of This adds a driver to safely access the System Management Mode of
......
...@@ -154,7 +154,9 @@ if [ "$CONFIG_MWINCHIP3D" = "y" ]; then ...@@ -154,7 +154,9 @@ if [ "$CONFIG_MWINCHIP3D" = "y" ]; then
fi fi
bool 'Machine Check Exception' CONFIG_X86_MCE bool 'Machine Check Exception' CONFIG_X86_MCE
dep_bool 'Check for non-fatal errors' CONFIG_X86_MCE_NONFATAL $CONFIG_X86_MCE dep_bool 'Check for non-fatal errors on Athlon/Duron' CONFIG_X86_MCE_NONFATAL $CONFIG_X86_MCE
dep_bool 'check for P4 thermal throttling interrupt.' CONFIG_X86_MCE_P4THERMAL $CONFIG_X86_MCE $CONFIG_X86_LOCAL_APIC
tristate 'Toshiba Laptop support' CONFIG_TOSHIBA tristate 'Toshiba Laptop support' CONFIG_TOSHIBA
tristate 'Dell laptop support' CONFIG_I8K tristate 'Dell laptop support' CONFIG_I8K
......
...@@ -58,7 +58,8 @@ CONFIG_X86_TSC=y ...@@ -58,7 +58,8 @@ CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_MCE=y CONFIG_X86_MCE=y
# CONFIG_X86_MCE_NONFATAL is not set CONFIG_X86_MCE_NONFATAL=y
CONFIG_X86_MCE_P4THERMAL=y
# CONFIG_TOSHIBA is not set # CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set # CONFIG_I8K is not set
# CONFIG_MICROCODE is not set # CONFIG_MICROCODE is not set
......
...@@ -78,6 +78,13 @@ void clear_local_APIC(void) ...@@ -78,6 +78,13 @@ void clear_local_APIC(void)
apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
} }
/* lets not touch this if we didn't frob it */
#ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5) {
v = apic_read(APIC_LVTTHMR);
apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
}
#endif
/* /*
* Clean APIC state for other OSs: * Clean APIC state for other OSs:
*/ */
...@@ -88,6 +95,11 @@ void clear_local_APIC(void) ...@@ -88,6 +95,11 @@ void clear_local_APIC(void)
apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4) if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
#ifdef CONFIG_X86_MCE_P4THERMAL
if (maxlvt >= 5)
apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
#endif
v = GET_APIC_VERSION(apic_read(APIC_LVR)); v = GET_APIC_VERSION(apic_read(APIC_LVR));
if (APIC_INTEGRATED(v)) { /* !82489DX */ if (APIC_INTEGRATED(v)) { /* !82489DX */
if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
...@@ -472,6 +484,7 @@ static void apic_pm_suspend(void *data) ...@@ -472,6 +484,7 @@ static void apic_pm_suspend(void *data)
apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
__save_flags(flags); __save_flags(flags);
__cli(); __cli();
disable_local_APIC(); disable_local_APIC();
......
/*
* arch/i386/kernel/bluesmoke.c - x86 Machine Check Exception Reporting
*/
#include <linux/init.h> #include <linux/init.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -19,21 +22,12 @@ static int mce_disabled __initdata = 0; ...@@ -19,21 +22,12 @@ static int mce_disabled __initdata = 0;
static int banks; static int banks;
/*
* If we get an MCE, we don't know what state the caches/TLB's are
* going to be in, so we throw them all away.
*/
static void inline flush_all (void)
{
__asm__ __volatile__ ("invd": : );
__flush_tlb();
}
#ifdef CONFIG_X86_MCE_P4THERMAL
/* /*
* P4/Xeon Thermal transition interrupt handler * P4/Xeon Thermal transition interrupt handler
*/ */
#ifdef CONFIG_X86_LOCAL_APIC
static void intel_thermal_interrupt(struct pt_regs *regs) static void intel_thermal_interrupt(struct pt_regs *regs)
{ {
u32 l, h; u32 l, h;
...@@ -49,7 +43,6 @@ static void intel_thermal_interrupt(struct pt_regs *regs) ...@@ -49,7 +43,6 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
printk(KERN_INFO "CPU#%d: Temperature/speed normal\n", cpu); printk(KERN_INFO "CPU#%d: Temperature/speed normal\n", cpu);
} }
} }
#endif
static void unexpected_thermal_interrupt(struct pt_regs *regs) static void unexpected_thermal_interrupt(struct pt_regs *regs)
{ {
...@@ -71,14 +64,13 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs regs) ...@@ -71,14 +64,13 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs regs)
static void __init intel_init_thermal(struct cpuinfo_x86 *c) static void __init intel_init_thermal(struct cpuinfo_x86 *c)
{ {
#ifdef CONFIG_X86_LOCAL_APIC
u32 l, h; u32 l, h;
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
/* Thermal monitoring */ /* Thermal monitoring */
if (!test_bit(X86_FEATURE_ACPI, c->x86_capability)) if (!test_bit(X86_FEATURE_ACPI, c->x86_capability))
return; /* -ENODEV */ return; /* -ENODEV */
/* Clock modulation */ /* Clock modulation */
if (!test_bit(X86_FEATURE_ACC, c->x86_capability)) if (!test_bit(X86_FEATURE_ACC, c->x86_capability))
return; /* -ENODEV */ return; /* -ENODEV */
...@@ -96,16 +88,16 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c) ...@@ -96,16 +88,16 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c)
printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu); printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
} }
/* check wether a vector already exists */ /* check whether a vector already exists */
l = apic_read(APIC_LVTTHMR); l = apic_read(APIC_LVTTHMR);
if (l & 0xff) { if (l & 0xff) {
printk(KERN_DEBUG "CPU#%d: Thermal LVT already handled\n", cpu); printk(KERN_DEBUG "CPU#%d: Thermal LVT already handled\n", cpu);
return; /* -EBUSY */ return; /* -EBUSY */
} }
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu); printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
/* The temperature transition interrupt handler setup */ /* The temperature transition interrupt handler setup */
l = THERMAL_APIC_VECTOR; /* our delivery vector */ l = THERMAL_APIC_VECTOR; /* our delivery vector */
l |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ l |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
...@@ -120,8 +112,9 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c) ...@@ -120,8 +112,9 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c)
apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
return; return;
#endif
} }
#endif /* CONFIG_X86_MCE_P4THERMAL */
/* /*
* Machine Check Handler For PII/PIII * Machine Check Handler For PII/PIII
...@@ -134,32 +127,26 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) ...@@ -134,32 +127,26 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
u32 mcgstl, mcgsth; u32 mcgstl, mcgsth;
int i; int i;
flush_all();
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if(mcgstl&(1<<0)) /* Recoverable ? */ if(mcgstl&(1<<0)) /* Recoverable ? */
recover=0; recover=0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
for(i=0;i<banks;i++) for (i=0;i<banks;i++) {
{
rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high); rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
if(high&(1<<31)) if(high&(1<<31)) {
{
if(high&(1<<29)) if(high&(1<<29))
recover|=1; recover|=1;
if(high&(1<<25)) if(high&(1<<25))
recover|=2; recover|=2;
printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low); printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high&=~(1<<31); high&=~(1<<31);
if(high&(1<<27)) if(high&(1<<27)) {
{
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
printk("[%08x%08x]", ahigh, alow); printk("[%08x%08x]", ahigh, alow);
} }
if(high&(1<<26)) if(high&(1<<26)) {
{
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
printk(" at %08x%08x", ahigh, alow); printk(" at %08x%08x", ahigh, alow);
} }
...@@ -170,7 +157,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) ...@@ -170,7 +157,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
wmb(); wmb();
} }
} }
if(recover&2) if(recover&2)
panic("CPU context corrupt"); panic("CPU context corrupt");
if(recover&1) if(recover&1)
...@@ -183,7 +170,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) ...@@ -183,7 +170,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
/* /*
* Machine check handler for Pentium class Intel * Machine check handler for Pentium class Intel
*/ */
static void pentium_machine_check(struct pt_regs * regs, long error_code) static void pentium_machine_check(struct pt_regs * regs, long error_code)
{ {
u32 loaddr, hi, lotype; u32 loaddr, hi, lotype;
...@@ -197,7 +184,7 @@ static void pentium_machine_check(struct pt_regs * regs, long error_code) ...@@ -197,7 +184,7 @@ static void pentium_machine_check(struct pt_regs * regs, long error_code)
/* /*
* Machine check handler for WinChip C6 * Machine check handler for WinChip C6
*/ */
static void winchip_machine_check(struct pt_regs * regs, long error_code) static void winchip_machine_check(struct pt_regs * regs, long error_code)
{ {
printk(KERN_EMERG "CPU#%d: Machine Check Exception.\n", smp_processor_id()); printk(KERN_EMERG "CPU#%d: Machine Check Exception.\n", smp_processor_id());
...@@ -225,47 +212,50 @@ asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) ...@@ -225,47 +212,50 @@ asmlinkage void do_machine_check(struct pt_regs * regs, long error_code)
#ifdef CONFIG_X86_MCE_NONFATAL #ifdef CONFIG_X86_MCE_NONFATAL
struct timer_list mce_timer; static struct timer_list mce_timer;
static int timerset = 0;
#define MCE_RATE 15*HZ /* timer rate is 15s */
static void mce_checkregs (unsigned int cpu) static void mce_checkregs (void *info)
{ {
u32 low, high; u32 low, high;
int i; int i;
unsigned int *cpu = info;
if (cpu!=smp_processor_id()) BUG_ON (*cpu != smp_processor_id());
BUG();
for (i=0; i<banks; i++) { for (i=0; i<banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if ((low | high) != 0) { if ((low | high) != 0) {
flush_all();
printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occured on CPU %d.\n", smp_processor_id()); printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occured on CPU %d.\n", smp_processor_id());
printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low); printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);
/* Scrub the error so we don't pick it up in 5 seconds time. */ /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
/* Serialize */ /* Serialize */
wmb(); wmb();
} }
} }
/* Refresh the timer. */
mce_timer.expires = jiffies + 5 * HZ;
add_timer (&mce_timer);
} }
static void mce_timerfunc (unsigned long data) static void mce_timerfunc (unsigned long data)
{ {
int i; unsigned int i;
for (i=0; i<smp_num_cpus; i++) { for (i=0; i<smp_num_cpus; i++) {
if (i == smp_processor_id()) if (i == smp_processor_id())
mce_checkregs(i); mce_checkregs(&i);
else else
smp_call_function (mce_checkregs, i, 1, 1); smp_call_function (mce_checkregs, &i, 1, 1);
} }
/* Refresh the timer. */
mce_timer.expires = jiffies + MCE_RATE;
add_timer (&mce_timer);
} }
#endif #endif
...@@ -286,11 +276,11 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c) ...@@ -286,11 +276,11 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
if( !test_bit(X86_FEATURE_MCE, c->x86_capability) ) if( !test_bit(X86_FEATURE_MCE, c->x86_capability) )
return; return;
/* /*
* Pentium machine check * Pentium machine check
*/ */
if(c->x86 == 5) if(c->x86 == 5)
{ {
/* Default P5 to off as its often misconnected */ /* Default P5 to off as its often misconnected */
...@@ -308,20 +298,20 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c) ...@@ -308,20 +298,20 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
return; return;
} }
/* /*
* Check for PPro style MCA * Check for PPro style MCA
*/ */
if( !test_bit(X86_FEATURE_MCA, c->x86_capability) ) if( !test_bit(X86_FEATURE_MCA, c->x86_capability) )
return; return;
/* Ok machine check is available */ /* Ok machine check is available */
machine_check_vector = intel_machine_check; machine_check_vector = intel_machine_check;
wmb(); wmb();
if(done==0) if(done==0)
printk(KERN_INFO "Intel machine check architecture supported.\n"); printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h); rdmsr(MSR_IA32_MCG_CAP, l, h);
...@@ -343,8 +333,12 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c) ...@@ -343,8 +333,12 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
set_in_cr4(X86_CR4_MCE); set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
intel_init_thermal(c); #ifdef CONFIG_X86_MCE_P4THERMAL
/* Only enable thermal throttling warning on Pentium 4. */
if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 15)
intel_init_thermal(c);
#endif
done=1; done=1;
} }
...@@ -352,7 +346,7 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c) ...@@ -352,7 +346,7 @@ static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
/* /*
* Set up machine check reporting on the Winchip C6 series * Set up machine check reporting on the Winchip C6 series
*/ */
static void __init winchip_mcheck_init(struct cpuinfo_x86 *c) static void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
{ {
u32 lo, hi; u32 lo, hi;
...@@ -377,6 +371,7 @@ static void __init winchip_mcheck_init(struct cpuinfo_x86 *c) ...@@ -377,6 +371,7 @@ static void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
void __init mcheck_init(struct cpuinfo_x86 *c) void __init mcheck_init(struct cpuinfo_x86 *c)
{ {
if(mce_disabled==1) if(mce_disabled==1)
return; return;
...@@ -387,12 +382,17 @@ void __init mcheck_init(struct cpuinfo_x86 *c) ...@@ -387,12 +382,17 @@ void __init mcheck_init(struct cpuinfo_x86 *c)
if(c->x86 == 6) { if(c->x86 == 6) {
intel_mcheck_init(c); intel_mcheck_init(c);
#ifdef CONFIG_X86_MCE_NONFATAL #ifdef CONFIG_X86_MCE_NONFATAL
/* Set the timer to check for non-fatal errors every 5 seconds */ if (timerset == 0) {
init_timer (&mce_timer); /* Set the timer to check for non-fatal
mce_timer.expires = jiffies + 5 * HZ; errors every MCE_RATE seconds */
mce_timer.data = 0; init_timer (&mce_timer);
mce_timer.function = &mce_timerfunc; mce_timer.expires = jiffies + MCE_RATE;
add_timer (&mce_timer); mce_timer.data = 0;
mce_timer.function = &mce_timerfunc;
add_timer (&mce_timer);
timerset = 1;
printk(KERN_INFO "Machine check exception polling timer started.\n");
}
#endif #endif
} }
break; break;
......
...@@ -391,6 +391,11 @@ void __init init_IRQ(void) ...@@ -391,6 +391,11 @@ void __init init_IRQ(void)
/* IPI vectors for APIC spurious and error interrupts */ /* IPI vectors for APIC spurious and error interrupts */
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* thermal monitor LVT interrupt */
#ifdef CONFIG_X86_MCE_P4THERMAL
set_intr_gate(THERMAL_APIC_VECTOR, smp_thermal_interrupt);
#endif
#endif #endif
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment