Commit 8b49c803 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 merge for 2.6.0test3

Without these changes an x86-64 NUMA kernel won't boot in many
configurations.

The main change is the improved IOMMU code which supports merging of
mappings and has various bugfixes.

 - Update defconfig
 - Use argument ptregs in 32bit elf_core_copy_task_fpregs
 - Harden aperture fixup code: read aperture from the AGP bridge if needed,
   better error checking.
 - Support nmi_watchdog=panic to panic on watchdog trigger
 - IOMMU: Support panic on IOMMU overflow (iommu=panic)
 - IOMMU: Force SAC for mappings >40bits when iommu=force is active
   (this can potentially give better performance)
 - IOMMU: Cache northbridges for faster TLB flush
 - IOMMU: Fix SMP race in TLB flush
 - IOMMU: Merge pci_alloc_consistent and pci_map_single
 - IOMMU: Clean up leak tracing
 - IOMMU: Rewrite pci_map_sg, support merging of mappings
   On overflow fall back to piece-by-piece mapping.
 - IOMMU: Tell block layer to assume merging when iommu force is active
   (this gives better performance with MTP fusion, drawback is that the
   overflow/fragmentation handling of the IOMMU area is still a big
   dubious with that)
 - Fix/clean up per cpu data
 - Add 64bit clean time(2)
 - Export cpu_callout_map for IPv6
 - Handle nodes with no own memory in NUMA discovery.
   This fixes boot on various newer Opteron motherboards where the memory
   is only connected to a single CPU.
 - Fix fallback path for failed NUMA discovery. numnodes has to be reset.
 - Check for enabled nodes in NUMA discovery (Eric Biederman)
 - Remove NUMA emunodes support. Has badly bitrotted.
 - Add __clear_bit_string for IOMMU code
 - Add new 32bit system calls to ia32_unistd.h
 - Remove duplicate default_do_nmi prototype
 - Make PCI_DMA_BUS_IS_PHYS dependent on no_iommu
 - Fix padding length of siginfo_t to match glibc
 - More pci direct access functions.
parent 7d701696
......@@ -22,7 +22,7 @@ CONFIG_SWAP=y
CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
CONFIG_LOG_BUF_SHIFT=16
CONFIG_LOG_BUF_SHIFT=18
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
CONFIG_FUTEX=y
......@@ -149,17 +149,12 @@ CONFIG_LBD=y
# ATA/ATAPI/MFM/RLL support
#
CONFIG_IDE=y
#
# IDE, ATA and ATAPI Block devices
#
CONFIG_BLK_DEV_IDE=y
#
# Please see Documentation/ide.txt for help/info on IDE drives
#
# CONFIG_BLK_DEV_HD_IDE is not set
# CONFIG_BLK_DEV_HD is not set
CONFIG_BLK_DEV_IDEDISK=y
CONFIG_IDEDISK_MULTI_MODE=y
# CONFIG_IDEDISK_STROKE is not set
......@@ -174,15 +169,16 @@ CONFIG_BLK_DEV_IDECD=y
#
# CONFIG_BLK_DEV_CMD640 is not set
CONFIG_BLK_DEV_IDEPCI=y
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_IDEPCI_SHARE_IRQ is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDE_TCQ is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_PCI_WIP is not set
CONFIG_BLK_DEV_ADMA=y
# CONFIG_BLK_DEV_AEC62XX is not set
......@@ -192,23 +188,25 @@ CONFIG_BLK_DEV_AMD74XX=y
# CONFIG_BLK_DEV_TRIFLEX is not set
# CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
# CONFIG_BLK_DEV_PIIX is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
# CONFIG_BLK_DEV_PDC202XX_NEW is not set
# CONFIG_BLK_DEV_RZ1000 is not set
# CONFIG_BLK_DEV_SVWKS is not set
# CONFIG_BLK_DEV_SIIMAGE is not set
# CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set
# CONFIG_BLK_DEV_VIA82CXXX is not set
CONFIG_IDEDMA_AUTO=y
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
CONFIG_IDEDMA_AUTO=y
# CONFIG_DMA_NONPCI is not set
# CONFIG_BLK_DEV_HD is not set
#
# SCSI device support
......@@ -251,7 +249,7 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_EATA_PIO is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set
CONFIG_SCSI_IPS=m
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
......@@ -301,7 +299,6 @@ CONFIG_NET=y
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
# CONFIG_NETLINK_DEV is not set
# CONFIG_NETFILTER is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
......@@ -317,12 +314,10 @@ CONFIG_IP_MULTICAST=y
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
CONFIG_IPV6=y
CONFIG_IPV6_PRIVACY=y
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6 is not set
# CONFIG_DECNET is not set
# CONFIG_BRIDGE is not set
# CONFIG_NETFILTER is not set
# CONFIG_XFRM_USER is not set
#
......@@ -333,8 +328,6 @@ CONFIG_IPV6_SCTP__=y
# CONFIG_ATM is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_LLC is not set
# CONFIG_DECNET is not set
# CONFIG_BRIDGE is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set
......@@ -546,11 +539,7 @@ CONFIG_UNIX98_PTY_COUNT=256
#
# IPMI
#
CONFIG_IPMI_HANDLER=y
CONFIG_IPMI_PANIC_EVENT=y
CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_KCS=y
CONFIG_IPMI_WATCHDOG=y
# CONFIG_IPMI_HANDLER is not set
#
# Watchdog Cards
......@@ -570,12 +559,7 @@ CONFIG_RTC=y
# CONFIG_FTAPE is not set
CONFIG_AGP=y
CONFIG_AGP_AMD_8151=y
CONFIG_DRM=y
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_GAMMA is not set
# CONFIG_DRM_R128 is not set
CONFIG_DRM_RADEON=y
# CONFIG_DRM_MGA is not set
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
CONFIG_RAW_DRIVER=y
CONFIG_HANGCHECK_TIMER=y
......@@ -598,19 +582,25 @@ CONFIG_HANGCHECK_TIMER=y
# File systems
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
# CONFIG_EXT2_FS_SECURITY is not set
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
# CONFIG_EXT3_FS_SECURITY is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
CONFIG_REISERFS_FS=y
# CONFIG_REISERFS_CHECK is not set
# CONFIG_REISERFS_PROC_INFO is not set
# CONFIG_JFS_FS is not set
CONFIG_XFS_FS=m
# CONFIG_XFS_RT is not set
# CONFIG_XFS_QUOTA is not set
# CONFIG_XFS_POSIX_ACL is not set
CONFIG_JFS_FS=y
CONFIG_JFS_POSIX_ACL=y
# CONFIG_JFS_DEBUG is not set
# CONFIG_JFS_STATISTICS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_QUOTA is not set
......@@ -684,6 +674,49 @@ CONFIG_SUNRPC=y
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_NLS=y
#
# Native Language Support
#
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set
#
# Graphics support
......@@ -759,8 +792,10 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_INIT_DEBUG is not set
# CONFIG_DEBUG_INFO is not set
# CONFIG_FRAME_POINTER is not set
# CONFIG_IOMMU_DEBUG is not set
CONFIG_IOMMU_DEBUG=y
CONFIG_IOMMU_LEAK=y
CONFIG_MCE_DEBUG=y
#
......@@ -771,21 +806,7 @@ CONFIG_MCE_DEBUG=y
#
# Cryptographic options
#
CONFIG_CRYPTO=y
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_SHA1 is not set
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_TEST is not set
# CONFIG_CRYPTO is not set
#
# Library routines
......
......@@ -204,10 +204,9 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t*
}
static inline int
elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *xregs, elf_fpregset_t *fpu)
elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
{
struct _fpstate_ia32 *fpstate = (void*)fpu;
struct pt_regs *regs = (struct pt_regs *)(tsk->thread.rsp0);
mm_segment_t oldfs = get_fs();
if (!tsk->used_math)
......
......@@ -6,7 +6,7 @@ extra-y := head.o head64.o init_task.o
EXTRA_AFLAGS := -traditional
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bluesmoke.o bootflag.o e820.o reboot.o warmreboot.o
obj-$(CONFIG_MTRR) += mtrr/
......@@ -19,7 +19,8 @@ obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_MODULES) += module.o
$(obj)/bootflag.c:
......
/*
* Firmware replacement code.
*
* Work around broken BIOSes that don't set an aperture.
* The IOMMU code needs an aperture even who no AGP is present in the system.
* Map the aperture over some low memory. This is cheaper than doing bounce
* buffering. The memory is lost. This is done at early boot because only
* the bootmem allocator can allocate 32+MB.
* Work around broken BIOSes that don't set an aperture or only set the
* aperture in the AGP bridge.
* If all fails map the aperture over some low memory. This is cheaper than
* doing bounce buffering. The memory is lost. This is done at early boot
* because only the bootmem allocator can allocate 32+MB.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
* $Id: aperture.c,v 1.2 2002/09/19 19:25:32 ak Exp $
* $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
*/
#include <linux/config.h>
#include <linux/kernel.h>
......@@ -17,6 +17,8 @@
#include <linux/bootmem.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/bitops.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/proto.h>
......@@ -45,10 +47,10 @@ static u32 __init allocate_aperture(void)
aper_size = (32 * 1024 * 1024) << fallback_aper_order;
/*
* Aperture has to be naturally aligned it seems. This means an
* 2GB aperture won't have much changes to succeed in the lower 4GB of
* memory. Unfortunately we cannot move it up because that would make
* the IOMMU useless.
* Aperture has to be naturally aligned. This means an 2GB aperture won't
* have much chances to find a place in the lower 4GB of memory.
* Unfortunately we cannot move it up because that would make the
* IOMMU useless.
*/
p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) {
......@@ -63,21 +65,136 @@ static u32 __init allocate_aperture(void)
return (u32)__pa(p);
}
static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
{
if (!aper_base)
return 0;
if (aper_size < 64*1024*1024) {
printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20);
return 0;
}
if (aper_base + aper_size >= 0xffffffff) {
printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
return 0;
}
if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
return 0;
}
return 1;
}
/* Find a PCI capability */
static __u32 __init find_cap(int num, int slot, int func, int cap)
{
if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
return 0;
u8 pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
int bytes;
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
pos &= ~3;
u8 id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
if (id == cap)
return pos;
pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT);
}
return 0;
}
/* Read a standard AGPv3 bridge header */
static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
{
printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
u32 apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
if (apsizereg == 0xffffffff) {
printk("APSIZE in AGP bridge unreadable\n");
return 0;
}
u32 apsize = apsizereg & 0xfff;
/* Some BIOS use weird encodings not in the AGPv3 table. */
if (apsize & 0xff)
apsize |= 0xf00;
int nbits = hweight16(apsize);
*order = 7 - nbits;
if ((int)*order < 0) /* < 32MB */
*order = 0;
u32 aper_low = read_pci_config(num,slot,func, 0x10);
u32 aper_hi = read_pci_config(num,slot,func,0x14);
u64 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
aper, 32 << *order, apsizereg);
if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
return 0;
return (u32)aper;
}
/* Look for an AGP bridge. Windows only expects the aperture in the
AGP bridge and some BIOS forget to initialize the Northbridge too.
Work around this here.
Do an PCI bus scan by hand because we're running before the PCI
subsystem.
All K8 AGP bridges are AGPv3 compliant, so we can do this scan
generically. It's probably overkill to always scan all slots because
the AGP bridges should be always an own bus on the HT hierarchy,
but do it here for future safety. */
static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
{
int num, slot, func;
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
u32 class, cap;
class = read_pci_config(num,slot,func,
PCI_CLASS_REVISION);
if (class == 0xffffffff)
break;
switch (class >> 16) {
case PCI_CLASS_BRIDGE_HOST:
case PCI_CLASS_BRIDGE_OTHER: /* needed? */
/* AGP bridge? */
cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
if (!cap)
break;
*valid_agp = 1;
return read_agp(num,slot,func,cap,order);
}
/* No multi-function device? */
u8 type = read_pci_config_byte(num,slot,func,
PCI_HEADER_TYPE);
if (!(type & 0x80))
break;
}
}
}
printk("No AGP bridge found\n");
return 0;
}
void __init iommu_hole_init(void)
{
int fix, num;
u32 aper_size, aper_alloc, aper_order;
u32 aper_size, aper_alloc = 0, aper_order;
u64 aper_base;
if (no_iommu)
return;
if (end_pfn < (0xffffffff>>PAGE_SHIFT) && !force_mmu)
return;
int valid_agp = 0;
printk("Checking aperture...\n");
fix = 0;
for (num = 24; num < 32; num++) {
char name[30];
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue;
......@@ -86,15 +203,12 @@ void __init iommu_hole_init(void)
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
aper_base <<= 25;
printk("CPU %d: aperture @ %Lx size %u KB\n", num-24,
aper_base, aper_size>>10);
if (!aper_base || aper_base + aper_size >= 0xffffffff) {
fix = 1;
break;
}
printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
aper_base, aper_size>>20);
if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture pointing to e820 RAM. Ignoring.\n");
sprintf(name, "northbridge cpu %d", num-24);
if (!aperture_valid(name, aper_base, aper_size)) {
fix = 1;
break;
}
......@@ -103,12 +217,40 @@ void __init iommu_hole_init(void)
if (!fix && !fallback_aper_force)
return;
if (!fallback_aper_force)
aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
} else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) ||
force_iommu ||
valid_agp ||
fallback_aper_force) {
/* When there is a AGP bridge in the system assume the
user wants to use the AGP driver too and needs an
aperture. However this case (AGP but no good
aperture) should only happen with a more broken than
usual BIOS, because it would even break Windows. */
printk("Your BIOS doesn't leave a aperture memory hole\n");
printk("Please enable the IOMMU option in the BIOS setup\n");
printk("This costs you %d MB of RAM\n", 32 << fallback_aper_order);
aper_order = fallback_aper_order;
aper_alloc = allocate_aperture();
if (!aper_alloc)
if (!aper_alloc) {
/* Could disable AGP and IOMMU here, but it's probably
not worth it. But the later users cannot deal with
bad apertures and turning on the aperture over memory
causes very strange problems, so it's better to
panic early. */
panic("Not enough memory for aperture");
}
} else {
return;
}
/* Fix up the north bridges */
for (num = 24; num < 32; num++) {
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue;
......@@ -116,7 +258,7 @@ void __init iommu_hole_init(void)
/* Don't enable translation yet. That is done later.
Assume this BIOS didn't initialise the GART so
just overwrite all previous bits */
write_pci_config(0, num, 3, 0x90, fallback_aper_order<<1);
write_pci_config(0, num, 3, 0x90, aper_order<<1);
write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
}
}
......@@ -40,6 +40,7 @@
* -1: the lapic NMI watchdog is disabled, but can be enabled
*/
static int nmi_active;
static int panic_on_timeout;
unsigned int nmi_watchdog = NMI_IO_APIC;
static unsigned int nmi_hz = HZ;
......@@ -115,6 +116,14 @@ static int __init setup_nmi_watchdog(char *str)
{
int nmi;
if (!strncmp(str,"panic",5)) {
panic_on_timeout = 1;
str = strchr(str, ',');
if (!str)
return 1;
++str;
}
get_option(&str, &nmi);
if (nmi >= NMI_INVALID)
......@@ -327,6 +336,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
bust_spinlocks(1);
printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
show_registers(regs);
if (panic_on_timeout)
panic("nmi watchdog");
printk("console shuts up ...\n");
console_silent();
spin_unlock(&nmi_print_lock);
......@@ -374,3 +385,4 @@ EXPORT_SYMBOL(disable_lapic_nmi_watchdog);
EXPORT_SYMBOL(enable_lapic_nmi_watchdog);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
EXPORT_SYMBOL(enable_timer_nmi_watchdog);
EXPORT_SYMBOL(touch_nmi_watchdog);
......@@ -9,8 +9,6 @@
#include <linux/module.h>
#include <asm/io.h>
dma_addr_t bad_dma_address = -1UL;
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scatter-gather version of the
* above pci_map_single interface. Here the scatter gather list
......@@ -34,16 +32,9 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
BUG_ON(direction == PCI_DMA_NONE);
for (i = 0; i < nents; i++ ) {
struct scatterlist *s = &sg[i];
BUG_ON(!s->page);
s->dma_address = pci_map_page(hwdev, s->page, s->offset,
s->length, direction);
if (unlikely(s->dma_address == bad_dma_address)) {
pci_unmap_sg(hwdev, sg, i, direction);
return 0;
}
}
return nents;
}
......
This diff is collapsed.
......@@ -33,15 +33,30 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size,
free_pages((unsigned long)vaddr, get_order(size));
}
int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
{
/*
* we fall back to GFP_DMA when the mask isn't all 1s,
* so we can't guarantee allocations that must be
* within a tighter range than GFP_DMA..
* RED-PEN this won't work for pci_map_single. Caller has to
* use GFP_DMA in the first place.
*/
if (mask < 0x00ffffff)
return 0;
return 1;
}
static void __init check_ram(void)
EXPORT_SYMBOL(pci_dma_supported);
static int __init check_ram(void)
{
if (end_pfn >= 0xffffffff>>PAGE_SHIFT) {
printk(KERN_ERR "WARNING more than 4GB of memory but no IOMMU.\n"
KERN_ERR "WARNING 32bit PCI may malfunction.\n");
/* Could play with highmem_start_page here to trick some subsystems
into bounce buffers. Unfortunately that would require setting
CONFIG_HIGHMEM too.
*/
}
return 0;
}
__initcall(check_ram);
......@@ -131,14 +131,16 @@ void __init setup_per_cpu_areas(void)
size = PERCPU_ENOUGH_ROOM;
#endif
/* We don't support CPU hotplug, so only allocate as much as needed here */
int maxi = max_t(unsigned, numnodes, num_online_cpus());
for (i = 0; i < maxi; i++) {
for (i = 0; i < NR_CPUS; i++) {
unsigned char *ptr;
/* If possible allocate on the node of the CPU.
In case it doesn't exist round-robin nodes. */
unsigned char *ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size);
if (!NODE_DATA(i % numnodes)) {
printk("cpu with no node %d, numnodes %d\n", i, numnodes);
ptr = alloc_bootmem(size);
} else {
ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size);
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
cpu_pda[i].data_offset = ptr - __per_cpu_start;
......@@ -158,7 +160,6 @@ void pda_init(int cpu)
pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->data_offset = 0;
pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
pda->active_mm = &init_mm;
......@@ -170,14 +171,14 @@ void pda_init(int cpu)
pda->irqstackptr = boot_cpu_stack;
level4 = init_level4_pgt;
} else {
level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
if (!level4)
panic("Cannot allocate top level page for cpu %d", cpu);
pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d\n", cpu);
level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
panic("cannot allocate irqstack for cpu %d", cpu);
}
if (!level4)
panic("Cannot allocate top level page for cpu %d", cpu);
pda->level4_pgt = (unsigned long *)level4;
if (level4 != init_level4_pgt)
......
......@@ -122,3 +122,17 @@ asmlinkage long wrap_sys_shmat(int shmid, char *shmaddr, int shmflg)
unsigned long raddr;
return sys_shmat(shmid,shmaddr,shmflg,&raddr) ?: (long)raddr;
}
asmlinkage long sys_time64(long * tloc)
{
struct timeval now;
int i;
do_gettimeofday(&now);
i = now.tv_sec;
if (tloc) {
if (put_user(i,tloc))
i = -EFAULT;
}
return i;
}
......@@ -121,6 +121,7 @@ EXPORT_SYMBOL_NOVERS(__read_lock_failed);
EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(smp_call_function);
EXPORT_SYMBOL(cpu_callout_map);
#endif
#ifdef CONFIG_VT
......
......@@ -47,6 +47,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
struct node nodes[MAXNODE];
int nodeid, i, nb;
int found = 0;
int nmax;
nb = find_northbridge();
if (nb < 0)
......@@ -54,22 +55,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3));
printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1);
nmax = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3));
numnodes = nmax;
memset(&nodes,0,sizeof(nodes));
prevbase = 0;
for (i = 0; i < numnodes; i++) {
for (i = 0; i < 8; i++) {
unsigned long base,limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 3;
if ((base & 3) == 0) {
if (i < nmax)
printk("Skipping disabled node %d\n", i);
continue;
}
if (!limit) {
printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i, base);
return -1;
printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
......@@ -77,7 +84,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
if ((1UL << nodeid) & nodes_present) {
printk(KERN_INFO "Node %d already present. Skipping\n", nodeid);
printk(KERN_INFO "Node %d already present. Skipping\n",
nodeid);
continue;
}
......@@ -104,7 +112,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (limit < base) {
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit);
return -1;
continue;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
......@@ -135,11 +143,26 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
}
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
for (i = 0; i < numnodes; i++) {
for (i = 0; i < MAXNODE; i++) {
if (nodes[i].start != nodes[i].end)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
/* There are unfortunately some poorly designed mainboards around
that only connect memory to a single CPU. This breaks the 1:1 cpu->node
mapping. To avoid this fill in the mapping for all possible
CPUs, as the number of CPUs is not known yet.
We round robin the existing nodes. */
int rr = 0;
for (i = 0; i < MAXNODE; i++) {
if (nodes_present & (1UL<<i))
continue;
if ((nodes_present >> rr) == 0)
rr = 0;
rr = ffz(~nodes_present >> rr);
node_data[i] = node_data[rr];
rr++;
}
return 0;
}
......@@ -26,8 +26,6 @@ static int numa_off __initdata;
unsigned long nodes_present;
static int emunodes __initdata;
int __init compute_hash_shift(struct node *nodes)
{
int i;
......@@ -103,11 +101,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
if (nodeid + 1 > numnodes) {
if (nodeid + 1 > numnodes)
numnodes = nodeid + 1;
printk(KERN_INFO
"setup_node_bootmem: enlarging numnodes to %d\n", numnodes);
}
nodes_present |= (1UL << nodeid);
}
......@@ -149,26 +144,6 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
if (!numa_off && emunodes > 0) {
struct node nodes[MAXNODE];
unsigned long nodesize = (end_pfn << PAGE_SHIFT) / emunodes;
int i;
if (emunodes > MAXNODE)
emunodes = MAXNODE;
memset(&nodes, 0, sizeof(nodes));
printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20);
for (i = 0; i < emunodes; i++) {
unsigned long end = (i+1)*nodesize;
if (i == emunodes-1)
end = end_pfn << PAGE_SHIFT;
nodes[i].start = i * nodesize;
nodes[i].end = end;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
memnode_shift = compute_hash_shift(nodes);
return 0;
}
printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT);
......@@ -176,6 +151,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
fake_node = 1;
memnode_shift = 63;
memnodemap[0] = 0;
numnodes = 1;
setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return -1;
}
......@@ -199,13 +175,10 @@ void __init paging_init(void)
}
/* [numa=off] */
/* [numa=emunodes] */
__init int numa_setup(char *opt)
{
if (!strncmp(opt,"off",3))
numa_off = 1;
if (isdigit(opt[0]))
emunodes = simple_strtoul(opt, NULL, 10);
return 1;
}
......
......@@ -402,12 +402,12 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
}
}
static inline void clear_bit_string(unsigned long *bitmap, unsigned long i,
static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i,
int len)
{
unsigned long end = i + len;
while (i < end) {
clear_bit(i, bitmap);
__clear_bit(i, bitmap);
i++;
}
}
......
......@@ -264,7 +264,20 @@
#define __NR_ia32_sys_epoll_wait 256
#define __NR_ia32_remap_file_pages 257
#define __NR_ia32_set_tid_address 258
#define __NR_ia32_timer_create 259
#define __NR_ia32_timer_settime (__NR_ia32_timer_create+1)
#define __NR_ia32_timer_gettime (__NR_ia32_timer_create+2)
#define __NR_ia32_timer_getoverrun (__NR_ia32_timer_create+3)
#define __NR_ia32_timer_delete (__NR_ia32_timer_create+4)
#define __NR_ia32_clock_settime (__NR_ia32_timer_create+5)
#define __NR_ia32_clock_gettime (__NR_ia32_timer_create+6)
#define __NR_ia32_clock_getres (__NR_ia32_timer_create+7)
#define __NR_ia32_clock_nanosleep (__NR_ia32_timer_create+8)
#define __NR_ia32_statfs64 268
#define __NR_ia32_fstatfs64 269
#define __NR_ia32_tgkill 270
#define __NR_ia32_utimes 271
#define IA32_NR_syscalls 265 /* must be > than biggest syscall! */
#define IA32_NR_syscalls 275 /* must be > than biggest syscall! */
#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
......@@ -301,6 +301,12 @@ static inline int isa_check_signature(unsigned long io_addr,
#define flush_write_buffers()
/* Disable vmerge for now. Need to fix the block layer code
to check for non iommu addresses first.
When the IOMMU is force it is safe to enable. */
extern int force_iommu;
#define BIO_VERMGE_BOUNDARY (force_iommu ? 4096 : 0)
#endif /* __KERNEL__ */
#endif
......@@ -48,6 +48,4 @@ static inline void unset_nmi_pm_callback(struct pm_dev * dev)
extern void default_do_nmi(struct pt_regs *);
extern void default_do_nmi(struct pt_regs *);
#endif /* ASM_NMI_H */
......@@ -14,7 +14,26 @@ static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
u32 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inl(0xcfc);
PDprintk("%x reading from %x: %x\n", slot, offset, v);
if (v != 0xffffffff)
PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
return v;
}
static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
{
u8 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inb(0xcfc + (offset&3));
PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
return v;
}
static inline u8 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
{
u16 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inw(0xcfc + (offset&2));
PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
return v;
}
......
......@@ -8,9 +8,6 @@
#include <linux/mm.h> /* for struct page */
extern dma_addr_t bad_dma_address;
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
......@@ -21,6 +18,8 @@ extern unsigned int pcibios_assign_all_busses(void);
#define pcibios_assign_all_busses() 0
#endif
extern int no_iommu, force_iommu;
extern unsigned long pci_mem_start;
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM (pci_mem_start)
......@@ -46,6 +45,9 @@ struct pci_dev;
extern int iommu_setup(char *opt);
extern dma_addr_t bad_dma_address;
#define pci_dma_error(x) ((x) == bad_dma_address)
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices,
* NULL for PCI-like buses (ISA, EISA).
......@@ -119,10 +121,16 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
* this boolean for bounce buffer decisions.
* this boolean for bounce buffer decisions
*
* On AMD64 it mostly equals, but we set it to zero to tell some subsystems
* that an IOMMU is available.
*/
#define PCI_DMA_BUS_IS_PHYS (0)
#define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0)
/* We lie slightly when the IOMMU is forced to get the device to
use SAC instead of DAC. */
#define pci_dac_dma_supported(pci_dev, mask) (force_iommu ? 0 : 1)
#else
static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
......@@ -206,6 +214,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
#define PCI_DMA_BUS_IS_PHYS 1
#define pci_dac_dma_supported(pci_dev, mask) 1
#endif
extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
......@@ -220,21 +229,7 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
* only drive the low 24-bits during PCI bus mastering, then
* you would pass 0x00ffffff as the mask to this function.
*/
static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
{
/*
* we fall back to GFP_DMA when the mask isn't all 1s,
* so we can't guarantee allocations that must be
* within a tighter range than GFP_DMA..
*/
if(mask < 0x00ffffff)
return 0;
return 1;
}
/* This is always fine. */
#define pci_dac_dma_supported(pci_dev, mask) (1)
extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
static __inline__ dma64_addr_t
pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
......
#ifndef _ASM_X8664_PERCPU_H_
#define _ASM_X8664_PERCPU_H_
#include <linux/compiler.h>
#include <asm/pda.h>
/* Same as asm-generic/percpu.h, except that we store the per cpu offset
in the PDA. Longer term the PDA and every per cpu variable
should be just put into a single section and referenced directly
from %gs */
#ifdef CONFIG_SMP
/* Same as the generic code except that we cache the per cpu offset
in the pda. This gives an 3 instruction reference for per cpu data */
#include <linux/compiler.h>
#include <asm/pda.h>
#define __my_cpu_offset() read_pda(data_offset)
#define __per_cpu_offset(cpu) (cpu_pda[cpu].data_offset)
#define __my_cpu_offset() read_pda(data_offset)
/* Separate out the type, so (int[3], foo) works. */
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) \
(*RELOC_HIDE(&var##__per_cpu, __my_cpu_offset()))
static inline void percpu_modcopy(void *pcpudst, const void *src,
unsigned long size)
{
unsigned int i;
for (i = 0; i < NR_CPUS; i++)
if (cpu_possible(i))
memcpy(pcpudst + __per_cpu_offset(i), src, size);
}
extern void setup_per_cpu_areas(void);
#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
/* A macro to avoid #include hell... */
#define percpu_modcopy(pcpudst, src, size) \
do { \
unsigned int __i; \
for (__i = 0; __i < NR_CPUS; __i++) \
if (cpu_possible(__i)) \
memcpy((pcpudst)+__per_cpu_offset(__i), \
(src), (size)); \
} while (0)
#else /* ! SMP */
#define DEFINE_PER_CPU(type, name) \
__typeof__(type) name##__per_cpu
__typeof__(type) per_cpu__##name
#define per_cpu(var, cpu) ((void)cpu, var##__per_cpu)
#define __get_cpu_var(var) var##__per_cpu
#define per_cpu(var, cpu) ((void)cpu, per_cpu__##var)
#define __get_cpu_var(var) per_cpu__##var
#endif /* SMP */
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
DECLARE_PER_CPU(struct x8664_pda, per_cpu_pda);
extern void setup_per_cpu_areas(void);
#endif
#endif /* _ASM_X8664_PERCPU_H_ */
......@@ -77,7 +77,7 @@ extern unsigned long end_pfn;
extern unsigned long table_start, table_end;
extern int exception_trace;
extern int no_iommu, force_mmu;
extern int force_iommu, no_iommu;
extern int using_apic_timer;
extern int disable_apic;
extern unsigned cpu_khz;
......
#ifndef _X8664_SIGINFO_H
#define _X8664_SIGINFO_H
#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
#include <asm-generic/siginfo.h>
#endif
......@@ -461,7 +461,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
#define __NR_tkill 200
__SYSCALL(__NR_tkill, sys_tkill)
#define __NR_time 201
__SYSCALL(__NR_time, sys_time)
__SYSCALL(__NR_time, sys_time64)
#define __NR_futex 202
__SYSCALL(__NR_futex, sys_futex)
#define __NR_sched_setaffinity 203
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment