Commit e30b878a authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] critical x86-64 merge

There were some nasty bugs in the x86-64 code, including one race that
could cause random reboots, especially on Intel machines, with the NMI
watchdog.  This patch fixes them and also includes some harmless
cleanups.

Main fixes were for some buglets in the IOMMU code and the plugging of a
race in the exception stack handling.  Also disables an broken MCE on K8
explicitely.

Also finally the preempt compile issues are fixed.

 - Declare hpet interrupt separately in drivers/char/rtc.c
 - Fix rtc.h/hpet.h to not depend on interrupt.h
 - Finally include smp_lock.h in hardirq.h
 - Update defconfig
 - Export bad_dma_address
 - Merge with 2.6.5rc2
 - Never schedule on interrupt stacks.
 - Add option to force software iotlb (iommu=soft)
 - Add ifdefs to gsi patch to match i386 (Bjorn Helgaas)
 - Fix K8 GART TLB MCE workaround to actually work
 - Fix dwarf2 unwind table in SAVE_ARGS (Jim Houston)
 - Disable APIC on VIA/NVidia even with acpi=off (Gwenole Beauchesne)
 - Fix parsing bug in "apic" option (Gwenole Beauchesne)
 - Fix dma mask handling in pci_alloc_consistent
 - Make pci_alloc_consistent more robust in low memory situations.
 - Print version number in oopses (from i386)
 - ACPI GSI cleanup (Bjorn Helgaas)
 - Disable K8 GART TLB walk error MCE explicitely
 - Add support to disable individual MCEs in the various banks.
parent b66506ce
This diff is collapsed.
......@@ -366,6 +366,17 @@ __setup("acpi_pic_sci=", acpi_pic_sci_setup);
#endif /* CONFIG_ACPI_BUS */
int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
{
#ifdef CONFIG_X86_IO_APIC
if (use_pci_vector() && !platform_legacy_irq(gsi))
*irq = IO_APIC_VECTOR(gsi);
else
#endif
*irq = gsi;
return 0;
}
static unsigned long __init
acpi_scan_rsdp (
unsigned long start,
......
......@@ -402,9 +402,9 @@ ENTRY(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
CFI_STARTPROC simple
CFI_DEF_CFA rsp,(SS-ORIG_RAX)
CFI_OFFSET rsp,(RSP-SS)
CFI_OFFSET rip,(RIP-SS)
CFI_DEF_CFA rsp,(SS-RDI)
CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
cld
#ifdef CONFIG_DEBUG_INFO
SAVE_ALL
......@@ -805,6 +805,8 @@ ENTRY(debug)
paranoidentry do_debug
/* switch back to process stack to restore the state ptrace touched */
movq %rax,%rsp
testl $3,CS(%rsp)
jnz paranoid_userspace
jmp paranoid_exit
CFI_ENDPROC
......@@ -816,8 +818,6 @@ ENTRY(nmi)
paranoidentry do_nmi
/* ebx: no swapgs flag */
paranoid_exit:
testl $3,CS(%rsp)
jnz paranoid_userspace
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
paranoid_swapgs:
......@@ -870,6 +870,8 @@ ENTRY(double_fault)
CFI_STARTPROC
paranoidentry do_double_fault
movq %rax,%rsp
testl $3,CS(%rsp)
jnz paranoid_userspace
jmp paranoid_exit
CFI_ENDPROC
......@@ -884,6 +886,8 @@ ENTRY(stack_segment)
CFI_STARTPROC
paranoidentry do_stack_segment
movq %rax,%rsp
testl $3,CS(%rsp)
jnz paranoid_userspace
jmp paranoid_exit
CFI_ENDPROC
......
......@@ -22,12 +22,13 @@
#include <asm/uaccess.h>
#define MISC_MCELOG_MINOR 227
#define NR_BANKS 5
static int mce_disabled __initdata;
/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic */
static int tolerant = 2;
static int banks;
static unsigned long disabled_banks;
static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
/*
* Lockless MCE logging infrastructure.
......@@ -144,7 +145,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
mb();
for (i = 0; i < banks; i++) {
if (test_bit(i, &disabled_banks))
if (!bank[i])
continue;
rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
......@@ -179,7 +180,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
confused it's normally not necessary to panic, unless you are
paranoid (tolerant == 0) */
if (!user_space && (panic_on_oops || tolerant < 2))
mce_panic("Uncorrected machine check in kernel", &m, mcestart);
mce_panic("Uncorrected machine check", &m, mcestart);
/* do_exit takes an awful lot of locks and has as slight risk
of deadlocking. If you don't want that don't set tolerant >= 2 */
......@@ -238,17 +239,31 @@ static void mce_init(void *dummy)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
banks = cap & 0xff;
if (banks > NR_BANKS) {
printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
banks = NR_BANKS;
}
mce_clear_all();
for (i = 0; i < banks; i++) {
u64 val = test_bit(i, &disabled_banks) ? 0 : ~0UL;
wrmsrl(MSR_IA32_MC0_CTL+4*i, val);
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
set_in_cr4(X86_CR4_MCE);
}
/* Add per CPU specific workarounds here */
static void __init mce_cpu_quirks(struct cpuinfo_x86 *c)
{
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
/* disable GART TBL walk error reporting, which trips off
incorrectly with the IOMMU & 3ware & Cerberus. */
clear_bit(10, &bank[4]);
}
}
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off.
......@@ -257,6 +272,8 @@ void __init mcheck_init(struct cpuinfo_x86 *c)
{
static unsigned long mce_cpus __initdata = 0;
mce_cpu_quirks(c);
if (test_and_set_bit(smp_processor_id(), &mce_cpus) || !mce_available(c))
return;
......@@ -343,23 +360,9 @@ static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned
}
}
#if 0 /* for testing */
static ssize_t mce_write(struct file *f, const char __user *buf, size_t sz, loff_t *off)
{
struct mce m;
if (sz != sizeof(struct mce))
return -EINVAL;
copy_from_user(&m, buf, sizeof(struct mce));
m.finished = 0;
mce_log(&m);
return sizeof(struct mce);
}
#endif
static struct file_operations mce_chrdev_ops = {
.read = mce_read,
.ioctl = mce_ioctl,
//.write = mce_write
};
static struct miscdevice mce_log_device = {
......@@ -425,23 +428,27 @@ static struct sys_device device_mce = {
};
/* Why are there no generic functions for this? */
#define ACCESSOR(name, start) \
#define ACCESSOR(name, var, start) \
static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
return sprintf(buf, "%lu\n", (unsigned long)name); \
return sprintf(buf, "%lu\n", (unsigned long)var); \
} \
static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
char *end; \
unsigned long new = simple_strtoul(buf, &end, 0); \
if (end == buf) return -EINVAL; \
name = new; \
var = new; \
start; \
return end-buf; \
} \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
ACCESSOR(disabled_banks,mce_restart())
ACCESSOR(tolerant,)
ACCESSOR(check_interval,mce_restart())
ACCESSOR(bank0ctl,bank[0],mce_restart())
ACCESSOR(bank1ctl,bank[1],mce_restart())
ACCESSOR(bank2ctl,bank[2],mce_restart())
ACCESSOR(bank3ctl,bank[3],mce_restart())
ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(tolerant,tolerant,)
ACCESSOR(check_interval,check_interval,mce_restart())
static __init int mce_init_device(void)
{
......@@ -453,7 +460,11 @@ static __init int mce_init_device(void)
err = sysdev_register(&device_mce);
if (!err) {
/* could create per CPU objects, but is not worth it. */
sysdev_create_file(&device_mce, &attr_disabled_banks);
sysdev_create_file(&device_mce, &attr_bank0ctl);
sysdev_create_file(&device_mce, &attr_bank1ctl);
sysdev_create_file(&device_mce, &attr_bank2ctl);
sysdev_create_file(&device_mce, &attr_bank3ctl);
sysdev_create_file(&device_mce, &attr_bank4ctl);
sysdev_create_file(&device_mce, &attr_tolerant);
sysdev_create_file(&device_mce, &attr_check_interval);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment