Commit e4b9e2aa authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] 2.5.21 x86-64 jumbo patch - arch specific changes

Here is the big 2.5.21 x86-64 sync patch. It only touches arch/x86_64
and include/asm-x86_64. It requires a few other changes that I'm sending
in separate mail.

Changes:
- merge wit 2.5.21
- merge from 2.5.21/i386 (new PCI code, new LDT code etc.)
- sync with 2.4-x86_64 tree.
- minor updates to 32bit emulation
- better early console; including serial support.
- now set up dummy PDA for booting to avoid problems
- Fix GS reloading in context switch one instruction race
- Remove hardcoded names from mpparse code
- Fix inline assembly for RAID-5 xor (similar change needed for i386)
- Real per cpu data support based on PDA field
- Cleanup of offset.c generation requested by Kai: it only puts structure
  offsets into offset.h now.
- Fix i387 fxsave signal frame problems.
- Add uname emulation via personality ("linux32")
- New SSE optimized checksum-copy, copy*user, memcpy, clear_page, copy_page
  functions. Other tunings/cleanups in checksum and other user memory
  access function.
- check if exception table is really sorted
- Cleanups in page table handling in preparation of non executable pages
  support.
- Cleanup PDA access to not require offset.h (thanks to kai for kicking me
  to this)
- use long long for u64/s64 to avoid more warnings
- remove CONFIG_ISA
- fix various bugs and other cleanups
parent cc9af0c5
......@@ -23,7 +23,7 @@
#
# early bootup linking needs 32bit. You can either use real 32bit tools
# here or 64bit tools switch to 32bit mode.
# here or 64bit tools in 32bit mode.
#
IA32_CC := $(CROSS_COMPILE)gcc -m32 -O2 -fomit-frame-pointer -nostdinc -I $(HPATH)
IA32_LD := $(CROSS_COMPILE)ld -m elf_i386
......@@ -41,11 +41,12 @@ LINKFLAGS =-T $(TOPDIR)/arch/x86_64/vmlinux.lds $(LDFLAGS)
CFLAGS += -mno-red-zone
CFLAGS += -mcmodel=kernel
CFLAGS += -pipe
# this makes reading assembly source easier
CFLAGS += -fno-reorder-blocks
# needed for later gcc 3.1
CFLAGS += -finline-limit=2000
# needed for earlier gcc 3.1
CFLAGS += -fno-strength-reduce
#CFLAGS += -fno-strength-reduce
#CFLAGS += -g
# prevent gcc from keeping the stack 16 byte aligned (FIXME)
......@@ -63,9 +64,9 @@ SUBDIRS += arch/x86_64/ia32
CORE_FILES += arch/x86_64/ia32/ia32.o
endif
ifdef CONFIG_HOSTFS
SUBDIRS += arch/x86_64/hostfs
core-$(CONFIG_HOSTFS) += arch/x86_64/hostfs/hostfs.o
ifdef CONFIG_PCI
SUBDIRS += arch/x86_64/pci
DRIVERS += arch/x86_64/pci/pci.o
endif
CORE_FILES += $(core-y)
......@@ -77,7 +78,7 @@ vmlinux: arch/x86_64/vmlinux.lds
.PHONY: zImage bzImage compressed zlilo bzlilo zdisk bzdisk install \
clean archclean archmrproper archdep checkoffset
checkoffset: FORCE
checkoffset: FORCE include/asm
make -C arch/$(ARCH)/tools $(TOPDIR)/include/asm-x86_64/offset.h
bzImage: checkoffset vmlinux
......
......@@ -452,7 +452,7 @@ no_psmouse:
cmpw $0, %cs:realmode_swtch
jz rmodeswtch_normal
lcall %cs:realmode_swtch
lcall *%cs:realmode_swtch
jmp rmodeswtch_end
......
......@@ -437,6 +437,7 @@ setalias:
# Setting of user mode (AX=mode ID) => CF=success
mode_set:
movw %ax, %fs:(0x01fa)
movw %ax, %bx
cmpb $0xff, %ah
jz setalias
......
......@@ -2,12 +2,17 @@
# For a description of the syntax of this configuration file,
# see Documentation/kbuild/config-language.txt.
#
# Note: ISA is disabled and will hopefully never be enabled.
# If you managed to buy an ISA x86-64 box you'll have to fix all the
# ISA drivers you need yourself.
#
mainmenu_name "Linux Kernel Configuration"
define_bool CONFIG_X86_64 y
define_bool CONFIG_X86 y
define_bool CONFIG_ISA y
define_bool CONFIG_ISA n
define_bool CONFIG_SBUS n
define_bool CONFIG_UID16 y
......@@ -22,7 +27,8 @@ source init/Config.in
mainmenu_option next_comment
comment 'Processor type and features'
choice 'Processor family' \
"AMD-Hammer CONFIG_MK8" CONFIG_MK8
"AMD-Hammer CONFIG_MK8 \
Generic-x86-64 CONFIG_GENERIC_CPU" AMD-Hammer
#
# Define implied options from the CPU selection here
......@@ -44,8 +50,10 @@ define_bool CONFIG_X86_LOCAL_APIC y
#currently broken:
#bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
if [ "$CONFIG_SMP" = "n" ]; then
bool 'Preemptible Kernel' CONFIG_PREEMPT
fi
if [ "$CONFIG_SMP" = "y" ]; then
define_bool CONFIG_HAVE_DEC_LOCK y
fi
......@@ -56,10 +64,18 @@ endmenu
mainmenu_option next_comment
comment 'General options'
comment 'Power management options'
bool 'Power Management support' CONFIG_PM
source drivers/acpi/Config.in
endmenu
mainmenu_option next_comment
comment 'Bus options (PCI etc.)'
bool 'PCI support' CONFIG_PCI
if [ "$CONFIG_PCI" = "y" ]; then
# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
......@@ -77,6 +93,12 @@ else
define_bool CONFIG_PCMCIA n
fi
endmenu
mainmenu_option next_comment
comment 'Executable file formats / Emulations'
if [ "$CONFIG_PROC_FS" = "y" ]; then
define_bool CONFIG_KCORE_ELF y
fi
......@@ -84,8 +106,6 @@ fi
tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
bool 'Power Management support' CONFIG_PM
bool 'IA32 Emulation' CONFIG_IA32_EMULATION
endmenu
......@@ -94,7 +114,7 @@ source drivers/mtd/Config.in
source drivers/parport/Config.in
source drivers/pnp/Config.in
#source drivers/pnp/Config.in
source drivers/block/Config.in
......@@ -142,9 +162,10 @@ if [ "$CONFIG_NET" = "y" ]; then
bool 'Network device support' CONFIG_NETDEVICES
if [ "$CONFIG_NETDEVICES" = "y" ]; then
source drivers/net/Config.in
if [ "$CONFIG_ATM" = "y" ]; then
source drivers/atm/Config.in
fi
# ATM seems to be largely 64bit unsafe and also unmaintained - disable it for now.
# if [ "$CONFIG_ATM" = "y" ]; then
# source drivers/atm/Config.in
# fi
fi
endmenu
fi
......@@ -155,14 +176,7 @@ source net/irda/Config.in
source drivers/isdn/Config.in
mainmenu_option next_comment
comment 'Old CD-ROM drivers (not SCSI, not IDE)'
bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI
if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
source drivers/cdrom/Config.in
fi
endmenu
# no support for non IDE/SCSI cdroms as they were all ISA only
#
# input before char - char/joystick depends on it. As does USB.
......
This diff is collapsed.
......@@ -2,11 +2,16 @@
# Makefile for the ia32 kernel emulation subsystem.
#
O_TARGET := ia32.o
USE_STANDARD_AS_RULE := true
export-objs := ia32_ioctl.o sys_ia32.o
export-objs := ia32_ioctl.o
all: ia32.o
O_TARGET := ia32.o
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \
ia32_binfmt.o fpu32.o socket32.o ptrace32.o
clean::
include $(TOPDIR)/Rules.make
......@@ -113,6 +113,7 @@ static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
return err;
}
#if 0
static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
{
mm_segment_t old_fs = get_fs();
......@@ -128,6 +129,7 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
return -EFAULT;
return err;
}
#endif
static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
......@@ -2971,11 +2973,6 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, struct blkpg_ioc
return err;
}
static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
}
/* SuSE extension */
#ifndef TIOCGDEV
#define TIOCGDEV _IOR('T',0x32, unsigned int)
......@@ -3087,19 +3084,13 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, void *ptr)
return err;
}
struct ioctl_trans {
unsigned long cmd;
int (*handler)(unsigned int, unsigned int, unsigned long, struct file * filp);
struct ioctl_trans *next;
};
/* generic function to change a single long put_user to arg to 32bit */
static int arg2long(unsigned int fd, unsigned int cmd, unsigned long arg)
static int generic_long_put(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int ret;
unsigned long val = 0;
mm_segment_t oldseg = get_fs();
set_fs(KERNEL_DS);
cmd = (cmd & 0xc000ffff) | (sizeof(unsigned long) << _IOC_SIZESHIFT);
ret = sys_ioctl(fd, cmd, (unsigned long)&val);
set_fs(oldseg);
if (!ret || val) {
......@@ -3109,6 +3100,29 @@ static int arg2long(unsigned int fd, unsigned int cmd, unsigned long arg)
return ret;
}
static int generic_long_get(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int ret;
unsigned int ival;
unsigned long val = 0;
mm_segment_t oldseg = get_fs();
if (get_user(ival, (unsigned int *)arg))
return -EFAULT;
val = ival;
set_fs(KERNEL_DS);
cmd = (cmd & 0xc000ffff) | (sizeof(unsigned long) << _IOC_SIZESHIFT);
ret = sys_ioctl(fd, cmd, (unsigned long)&val);
set_fs(oldseg);
return ret;
}
struct ioctl_trans {
unsigned long cmd;
int (*handler)(unsigned int, unsigned int, unsigned long, struct file * filp);
struct ioctl_trans *next;
};
#define REF_SYMBOL(handler) if (0) (void)handler;
#define HANDLE_IOCTL2(cmd,handler) REF_SYMBOL(handler); asm volatile(".quad %c0, " #handler ",0"::"i" (cmd));
#define HANDLE_IOCTL(cmd,handler) HANDLE_IOCTL2(cmd,handler)
......@@ -3316,10 +3330,6 @@ COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+5, int))
COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+6, int))
COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+7, int))
/* Little p (/dev/rtc, /dev/envctrl, etc.) */
#if 0
COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
#endif
COMPATIBLE_IOCTL(RTC_AIE_ON)
COMPATIBLE_IOCTL(RTC_AIE_OFF)
COMPATIBLE_IOCTL(RTC_UIE_ON)
......@@ -3334,10 +3344,14 @@ COMPATIBLE_IOCTL(RTC_RD_TIME)
COMPATIBLE_IOCTL(RTC_SET_TIME)
COMPATIBLE_IOCTL(RTC_WKALM_SET)
COMPATIBLE_IOCTL(RTC_WKALM_RD)
HANDLE_IOCTL(RTC_IRQP_READ,arg2long)
COMPATIBLE_IOCTL(RTC_IRQP_SET)
COMPATIBLE_IOCTL(RTC_EPOCH_READ)
COMPATIBLE_IOCTL(RTC_EPOCH_SET)
#define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */
HANDLE_IOCTL(RTC_IRQP_READ32,generic_long_put)
#define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */
HANDLE_IOCTL(RTC_IRQP_SET32,generic_long_get)
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned long) /* Read epoch */
#define RTC_EPOCH_SET32 _IOW('p', 0x0e, unsigned long) /* Set epoch */
HANDLE_IOCTL(RTC_EPOCH_READ32, generic_long_put)
HANDLE_IOCTL(RTC_EPOCH_SET32, generic_long_get)
/* Little m */
COMPATIBLE_IOCTL(MTIOCTOP)
/* Socket level stuff */
......@@ -3605,6 +3619,8 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, generic_long_get);
/* DEVFS */
COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV)
COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK)
......@@ -3671,14 +3687,6 @@ COMPATIBLE_IOCTL(DRM_IOCTL_LOCK)
COMPATIBLE_IOCTL(DRM_IOCTL_UNLOCK)
COMPATIBLE_IOCTL(DRM_IOCTL_FINISH)
#endif /* DRM */
#ifdef CONFIG_AUTOFS_FS
COMPATIBLE_IOCTL(AUTOFS_IOC_READY);
COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL);
COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC);
COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER);
COMPATIBLE_IOCTL(AUTOFS_IOC_SETTIMEOUT);
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE);
#endif
COMPATIBLE_IOCTL(REISERFS_IOC_UNPACK);
/* serial driver */
HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl);
......@@ -3771,8 +3779,6 @@ HANDLE_IOCTL(CDROMREADALL, cdrom_ioctl_trans)
HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl)
HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl)
HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
......
......@@ -241,7 +241,7 @@ asmlinkage int sys32_sigreturn(struct pt_regs regs)
return eax;
badframe:
force_sig(SIGSEGV, current);
signal_fault(&regs, frame, "32bit sigreturn");
return 0;
}
......@@ -280,7 +280,7 @@ asmlinkage int sys32_rt_sigreturn(struct pt_regs regs)
return eax;
badframe:
force_sig(SIGSEGV, current);
signal_fault(&regs, frame, "32bit rt sigreturn");
return 0;
}
......@@ -420,7 +420,7 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka,
give_sigsegv:
if (sig == SIGSEGV)
ka->sa.sa_handler = SIG_DFL;
force_sig(SIGSEGV, current);
signal_fault(regs,frame,"32bit signal setup");
}
void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
......@@ -493,6 +493,6 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
give_sigsegv:
if (sig == SIGSEGV)
ka->sa.sa_handler = SIG_DFL;
force_sig(SIGSEGV, current);
signal_fault(regs, frame, "32bit rt signal setup");
}
......@@ -24,9 +24,6 @@
/*
* 32bit SYSCALL instruction entry.
* It'll probably kill you because it destroys your segments.
* Should coredump here, but the next instruction will likely do
* that anyways.
*/
ENTRY(ia32_cstar_target)
movq $-ENOSYS,%rax
......@@ -117,6 +114,7 @@ ENTRY(ia32_ptregs_common)
.data
.align 8
.globl ia32_sys_call_table
ia32_sys_call_table:
.quad ni_syscall /* 0 - old "setup" system call*/
.quad sys_exit
......@@ -143,7 +141,7 @@ ia32_sys_call_table:
.quad sys_oldumount /* old_umount */
.quad sys_setuid16
.quad sys_getuid16
.quad ni_syscall /* stime */ /* 25 */
.quad sys_stime /* stime */ /* 25 */
.quad sys32_ptrace /* ptrace */
.quad sys_alarm /* XXX sign extension??? */
.quad ni_syscall /* (old)fstat */
......@@ -240,7 +238,7 @@ ia32_sys_call_table:
.quad stub32_sigreturn
.quad stub32_clone /* 120 */
.quad sys_setdomainname
.quad sys_newuname
.quad sys32_newuname
.quad sys_modify_ldt
.quad sys32_adjtimex
.quad sys32_mprotect /* 125 */
......
......@@ -2579,6 +2579,8 @@ int sys32_uname(struct old_utsname * name)
down_read(&uts_sem);
err=copy_to_user(name, &system_utsname, sizeof (*name));
up_read(&uts_sem);
if (current->personality == PER_LINUX32)
err |= copy_to_user(&name->machine, "i386", 5);
return err?-EFAULT:0;
}
......@@ -3125,3 +3127,6 @@ static int __init ia32_init (void)
}
__initcall(ia32_init);
extern unsigned long ia32_sys_call_table[];
EXPORT_SYMBOL(ia32_sys_call_table);
......@@ -12,11 +12,6 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bluesmoke.o bootflag.o
ifdef CONFIG_PCI
obj-y += pci-x86_64.o
obj-y += pci-pc.o pci-irq.o
endif
obj-$(CONFIG_MTRR) += mtrr.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
......
......@@ -48,7 +48,7 @@ static int __init sbf_struct_valid(unsigned long tptr)
unsigned int i;
struct sbf_boot sb;
memcpy_fromio(&sb, tptr, sizeof(sb));
memcpy_fromio(&sb, (void *)tptr, sizeof(sb));
if(sb.sbf_len != 40 && sb.sbf_len != 39)
// 39 on IBM ThinkPad A21m, BIOS version 1.02b (KXET24WW; 2000-12-19).
......@@ -238,6 +238,7 @@ static int __init sbf_init(void)
rp = (unsigned long)ioremap(rp, 4096);
if(rp == 0)
continue;
if(sbf_struct_valid(rp))
{
/* Found the BOOT table and processed it */
......
......@@ -94,15 +94,20 @@ static void early_serial_write(struct console *con, const char *s, unsigned n)
static __init void early_serial_init(char *opt)
{
static int bases[] = { 0x3f8, 0x2f8 };
unsigned char c;
unsigned divisor, baud = 38400;
char *s, *e;
if (*opt == ',')
++opt;
s = strsep(&opt, ",");
if (s != NULL) {
unsigned port;
++s;
if (!strncmp(s,"0x",2))
early_serial_base = simple_strtoul(s, &e, 16);
else {
static int bases[] = { 0x3f8, 0x2f8 };
if (!strncmp(s,"ttyS",4))
s+=4;
port = simple_strtoul(s, &e, 10);
......@@ -110,12 +115,11 @@ static __init void early_serial_init(char *opt)
port = 0;
early_serial_base = bases[port];
}
}
c = inb(early_serial_base + LCR);
outb(c & ~DLAB, early_serial_base + LCR);
outb(0x3, early_serial_base + LCR); /* 8n1 */
outb(0, early_serial_base + IER); /* no interrupt */
outb(0, early_serial_base + FCR); /* no fifo */
outb(0x3, early_serial_base + LCR); /* 8n1 */
outb(0x3, early_serial_base + MCR); /* DTR + RTS */
s = strsep(&opt, ",");
......@@ -155,33 +159,55 @@ void early_printk(const char *fmt, ...)
va_end(ap);
}
static int keep_early;
int __init setup_early_printk(char *opt)
{
char *space;
char buf[256];
if (early_console_initialized)
return;
early_console_initialized = 1;
return -1;
strncpy(buf,opt,256);
buf[255] = 0;
space = strchr(buf, ' ');
if (space)
*space = 0;
if (strstr(buf,"keep"))
keep_early = 1;
if (!strncmp(opt, "serial", 6)) {
early_serial_init(opt+7);
if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
early_console = &early_serial_console;
} else if (!strncmp(opt, "vga", 3))
} else if (!strncmp(buf, "vga", 3)) {
early_console = &early_vga_console;
else
} else {
early_console = NULL;
return -1;
}
early_console_initialized = 1;
register_console(early_console);
return 0;
}
void __init disable_early_printk(void)
{
if (early_console_initialized) {
if (!early_console_initialized || !early_console)
return;
if (!keep_early) {
printk("disabling early console...\n");
unregister_console(early_console);
early_console_initialized = 0;
} else {
printk("keeping early console.\n");
}
}
/* syntax: earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]]
Append ,keep to not disable it when the real console takes over.
Only vga or serial at a time, not both.
Currently only ttyS0 and ttyS1 are supported.
Interaction with the standard serial driver is not very good.
......
......@@ -40,6 +40,7 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
#include <asm/errno.h>
.code64
......@@ -270,8 +271,8 @@ int_very_careful:
/* Check for syscall exit trace */
bt $TIF_SYSCALL_TRACE,%edx
jnc int_signal
movq %rsp,%rdi # &ptregs -> arg1
pushq %rdi
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace
popq %rdi
btr $TIF_SYSCALL_TRACE,%edi
......@@ -563,16 +564,17 @@ ENTRY(kernel_thread)
# rdi: flags, rsi: usp, rdx: will be &pt_regs
movq %rdx,%rdi
orq $CLONE_VM, %rdi
orq kernel_thread_flags(%rip), %rdi
movq $-1, %rsi
movq %rsp, %rdx
# clone now
call do_fork_FIXME_NOW_RETURNS_TASK_STRUCT
# save retval on the stack so it's popped before `ret`
movq %rax, RAX(%rsp)
call do_fork
xorl %edi,%edi
cmpq $-1000,%rax
cmovb %rdi,%rax
movq %rax,RAX(%rsp)
/*
* It isn't worth to check for reschedule here,
......
......@@ -38,7 +38,11 @@ startup_32:
movl %ebx,%ebp /* Save trampoline flag */
/* First check if extended functions are implemented */
/* If the CPU doesn't support CPUID this will double fault.
* Unfortunately it is hard to check for CPUID without a stack.
*/
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
cmpl $0x80000000, %eax
......@@ -157,6 +161,17 @@ reach_long64:
*/
lgdt pGDT64
/*
* Setup up a dummy PDA. this is just for some early bootup code
* that does in_interrupt()
*/
movl $MSR_GS_BASE,%ecx
movq $empty_zero_page,%rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
movl %eax,%ds
movl %eax,%ss
......
......@@ -70,16 +70,18 @@ static void __init setup_boot_cpu_data(void)
boot_cpu_data.x86_mask = eax & 0xf;
}
extern void start_kernel(void), pda_init(int);
extern void start_kernel(void), pda_init(int), setup_early_printk(char *);
void __init x86_64_start_kernel(char * real_mode_data)
{
char *s;
clear_bss();
pda_init(0);
copy_bootdata(real_mode_data);
s = strstr(saved_command_line, "earlyprintk=");
if (s != NULL)
setup_early_printk(s+12);
setup_boot_cpu_data();
start_kernel();
}
......@@ -24,8 +24,6 @@
#include <asm/ptrace.h>
#include <asm/uaccess.h>
static struct i387_fxsave_struct init_fpu_env;
/*
* Called at bootup to set up the initial FPU state that is later cloned
* into all processes.
......@@ -77,6 +75,9 @@ int save_i387(struct _fpstate *buf)
bad_user_i387_struct();
}
if ((unsigned long)buf % 16)
printk("save_i387: bad fpstate %p\n",buf);
if (!tsk->used_math)
return 0;
tsk->used_math = 0; /* trigger finit */
......
......@@ -12,6 +12,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/kernel_stat.h>
#include <linux/device.h>
#include <asm/atomic.h>
#include <asm/system.h>
......@@ -319,6 +320,18 @@ void mask_and_ack_8259A(unsigned int irq)
}
}
static struct device device_i8259A = {
name: "i8259A",
bus_id: "0020",
};
static int __init init_8259A_devicefs(void)
{
return register_sys_device(&device_i8259A);
}
__initcall(init_8259A_devicefs);
void __init init_8259A(int auto_eoi)
{
unsigned long flags;
......
......@@ -163,7 +163,7 @@ int show_interrupts(struct seq_file *p, void *v)
}
seq_printf(p, "NMI: ");
for (j = 0; j < smp_num_cpus; j++)
seq_printf(p, "%10u ", nmi_count(cpu_logical_map(j)));
seq_printf(p, "%10u ", cpu_pda[cpu_logical_map(j)].__nmi_count);
seq_putc(p, '\n');
#if CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
......
......@@ -8,11 +8,6 @@
* This handles calls from both 32bit and 64bit mode.
*/
/*
* FIXME:
* Need to add locking for LAR in load_gs_index.
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/string.h>
......@@ -20,42 +15,164 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
void load_gs_index(unsigned gs)
{
int access;
struct task_struct *me = current;
if (me->mm)
read_lock(&me->mm->context.ldtlock);
asm volatile("pushf\n\t"
"cli\n\t"
"swapgs\n\t"
"lar %1,%0\n\t"
"jnz 1f\n\t"
"movl %1,%%eax\n\t"
"movl %%eax,%%gs\n\t"
"jmp 2f\n\t"
"1: movl %2,%%gs\n\t"
"2: swapgs\n\t"
"popf" : "=g" (access) : "g" (gs), "r" (0) : "rax");
if (me->mm)
read_unlock(&me->mm->context.ldtlock);
}
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *mm)
{
if (current->mm)
load_LDT(&current->mm->context);
}
#endif
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
{
void *oldldt;
void *newldt;
int oldsize;
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
mincount = (mincount+511)&(~511);
if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
else
newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
if (!newldt)
return -ENOMEM;
if (oldsize)
memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
oldldt = pc->ldt;
memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
wmb();
pc->ldt = newldt;
pc->size = mincount;
if (reload) {
load_LDT(pc);
#ifdef CONFIG_SMP
if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
smp_call_function(flush_ldt, 0, 1, 1);
#endif
}
wmb();
if (oldsize) {
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
kfree(oldldt);
}
return 0;
}
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
if (err < 0) {
printk(KERN_WARNING "ldt allocation failed\n");
new->size = 0;
return err;
}
memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
return 0;
}
/*
* read_ldt() is not really atomic - this is not a problem since
* synchronization of reads and writes done to the LDT has to be
* assured by user-space anyway. Writes are atomic, to protect
* the security checks done on new descriptors.
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
struct mm_struct * old_mm;
int retval = 0;
init_MUTEX(&mm->context.sem);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
retval = copy_ldt(&mm->context, &old_mm->context);
up(&old_mm->context.sem);
}
rwlock_init(&mm->context.ldtlock);
return retval;
}
/*
* No need to lock the MM as we are the last user
*/
void release_segments(struct mm_struct *mm)
{
if (mm->context.size) {
clear_LDT();
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
kfree(mm->context.ldt);
mm->context.size = 0;
}
}
static int read_ldt(void * ptr, unsigned long bytecount)
{
int err;
unsigned long size;
struct mm_struct * mm = current->mm;
err = 0;
if (!mm->context.segments)
goto out;
size = LDT_ENTRIES*LDT_ENTRY_SIZE;
if (!mm->context.size)
return 0;
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
down(&mm->context.sem);
size = mm->context.size*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
err = size;
if (copy_to_user(ptr, mm->context.segments, size))
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
out:
up(&mm->context.sem);
if (err < 0)
return err;
if (size != bytecount) {
/* zero-fill the rest */
clear_user(ptr+size, bytecount-size);
}
return bytecount;
}
static int read_default_ldt(void * ptr, unsigned long bytecount)
{
/* Arbitary number */
/* x86-64 default LDT is all zeros */
if (bytecount > 128)
bytecount = 128;
if (clear_user(ptr, bytecount))
......@@ -94,24 +211,14 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
me->thread.gs = 0;
me->thread.fs = 0;
/*
* the GDT index of the LDT is allocated dynamically, and is
* limited by MAX_LDT_DESCRIPTORS.
*/
down_write(&mm->mmap_sem);
if (!mm->context.segments) {
void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
error = -ENOMEM;
if (!segments)
down(&mm->context.sem);
if (ldt_info.entry_number >= mm->context.size) {
error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
if (error < 0)
goto out_unlock;
memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
wmb();
mm->context.segments = segments;
mm->context.cpuvalid = 1UL << smp_processor_id();
load_LDT(mm);
}
lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.segments);
lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
......@@ -146,12 +253,14 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
/* Install the new entry ... */
install:
write_lock(&mm->context.ldtlock);
*lp = entry_1;
*(lp+1) = entry_2;
write_unlock(&mm->context.ldtlock);
error = 0;
out_unlock:
up_write(&mm->mmap_sem);
up(&mm->context.sem);
out:
return error;
}
......
......@@ -95,42 +95,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
/*
* Processor encoding in an MP configuration block
*/
static char __init *mpc_family(int family,int model)
{
static char n[32];
static char *model_defs[]=
{
"80486DX","80486DX",
"80486SX","80486DX/2 or 80487",
"80486SL","80486SX/2",
"Unknown","80486DX/2-WB",
"80486DX/4","80486DX/4-WB"
};
switch (family) {
case 0x04:
if (model < 10)
return model_defs[model];
break;
case 0x05:
return("Pentium(tm)");
case 0x06:
return("Pentium(tm) Pro");
case 0x0F:
if (model == 0x0F)
return("Special controller");
}
sprintf(n,"Unknown CPU [%d:%d]",family, model);
return n;
}
static void __init MP_processor_info (struct mpc_config_processor *m)
{
int ver;
......@@ -138,10 +102,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (!(m->mpc_cpuflag & CPU_ENABLED))
return;
printk("Processor #%d %s APIC version %d\n",
printk("Processor #%d %d:%d APIC version %d\n",
m->mpc_apicid,
mpc_family( (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
(m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
(m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
(m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
m->mpc_apicver);
if (m->mpc_featureflag&(1<<0))
......
......@@ -41,6 +41,7 @@
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/cpufeature.h>
/* Note: "err" is handled in a funny way below. Otherwise one version
of gcc or another breaks. */
......@@ -57,7 +58,7 @@ static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .align 8\n"
" .quad 1b,3b\n"
".previous"
: "=&bDS" (err)
......@@ -236,7 +237,7 @@ static int msr_open(struct inode *inode, struct file *file)
if ( !(cpu_online_map & (1UL << cpu)) )
return -ENXIO; /* No such CPU */
if ( !test_bit(X86_FEATURE_MSR, &c->x86_capability) )
if ( !cpu_has(c, X86_FEATURE_MSR) )
return -EIO; /* MSR not supported */
return 0;
......
......@@ -139,7 +139,7 @@ static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
__cli();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (test_bit(X86_FEATURE_PGE, &boot_cpu_data.x86_capability)) {
if (cpu_has_ge) {
ctxt->cr4val = read_cr4();
write_cr4(ctxt->cr4val & ~(1UL << 7));
}
......@@ -170,7 +170,7 @@ static void set_mtrr_done (struct set_mtrr_context *ctxt)
write_cr0(read_cr0() & 0xbfffffff);
/* Restore value of CR4 */
if (test_bit(X86_FEATURE_PGE, &boot_cpu_data.x86_capability))
if (cpu_has_pge)
write_cr4 (ctxt->cr4val);
/* Re-enable interrupts locally (if enabled previously) */
......@@ -983,7 +983,7 @@ static ssize_t mtrr_write (struct file *file, const char *buf,
char *ptr;
char line[LINE_SIZE];
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* Can't seek (pwrite) on this device */
......@@ -1071,7 +1071,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
return -ENOIOCTLCMD;
case MTRRIOC_ADD_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1083,7 +1083,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_SET_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1093,7 +1093,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_DEL_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1103,7 +1103,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_KILL_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1134,7 +1134,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_ADD_PAGE_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1146,7 +1146,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_SET_PAGE_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1156,7 +1156,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_DEL_PAGE_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1166,7 +1166,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
break;
case MTRRIOC_KILL_PAGE_ENTRY:
if (!capable (CAP_SYS_ADMIN))
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
......@@ -1277,7 +1277,7 @@ static void __init mtrr_setup (void)
{
printk ("mtrr: v%s)\n", MTRR_VERSION);
if (test_bit (X86_FEATURE_MTRR, &boot_cpu_data.x86_capability)) {
if (cpu_has_mtrr) {
/* Query the width (in bits) of the physical
addressable memory on the Hammer family. */
if ((cpuid_eax (0x80000000) >= 0x80000008)) {
......
......@@ -50,15 +50,19 @@ int __init check_nmi_watchdog (void)
printk(KERN_INFO "testing NMI watchdog ... ");
for (j = 0; j < NR_CPUS; ++j)
counts[j] = cpu_pda[cpu_logical_map(j)].__nmi_count;
for (j = 0; j < NR_CPUS; ++j) {
cpu = cpu_logical_map(j);
counts[cpu] = cpu_pda[cpu].__nmi_count;
}
sti();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
for (j = 0; j < smp_num_cpus; j++) {
cpu = cpu_logical_map(j);
if (nmi_count(cpu) - counts[j] <= 5) {
printk("CPU#%d: NMI appears to be stuck!\n", cpu);
if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d)!\n",
cpu,
cpu_pda[cpu].__nmi_count);
return -1;
}
}
......
......@@ -57,6 +57,8 @@
asmlinkage extern void ret_from_fork(void);
unsigned long kernel_thread_flags = CLONE_VM;
int hlt_counter;
/*
......@@ -318,44 +320,11 @@ void show_regs(struct pt_regs * regs)
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
}
/*
* No need to lock the MM as we are the last user
*/
void release_segments(struct mm_struct *mm)
{
void * ldt = mm->context.segments;
/*
* free the LDT
*/
if (ldt) {
mm->context.segments = NULL;
clear_LDT();
vfree(ldt);
}
}
void load_gs_index(unsigned gs)
{
int access;
/* should load gs in syscall exit after swapgs instead */
/* XXX need to add LDT locking for SMP to protect against parallel changes */
asm volatile("pushf\n\t"
"cli\n\t"
"swapgs\n\t"
"lar %1,%0\n\t"
"jnz 1f\n\t"
"movl %1,%%eax\n\t"
"movl %%eax,%%gs\n\t"
"jmp 2f\n\t"
"1: movl %2,%%gs\n\t"
"2: swapgs\n\t"
"popf" : "=g" (access) : "g" (gs), "r" (0) : "rax");
}
#define __STR(x) #x
#define __STR2(x) __STR(x)
extern void load_gs_index(unsigned);
/*
* Free current thread data structures etc..
*/
......@@ -379,43 +348,16 @@ void flush_thread(void)
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
void * ldt = dead_task->mm->context.segments;
// temporary debugging check
if (ldt) {
printk("WARNING: dead process %8s still has LDT? <%p>\n",
dead_task->comm, ldt);
if (dead_task->mm->context.size) {
printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.size);
BUG();
}
}
}
/*
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
{
struct mm_struct * old_mm;
void *old_ldt, *ldt;
ldt = NULL;
old_mm = current->mm;
if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
/*
* Completely new LDT, we initialize it from the parent:
*/
ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
if (!ldt)
printk(KERN_WARNING "ldt allocation failed\n");
else
memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
}
new_mm->context.segments = ldt;
new_mm->context.cpuvalid = 0UL;
return;
}
int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
unsigned long unused,
struct task_struct * p, struct pt_regs * regs)
......
......@@ -28,7 +28,6 @@
#include <linux/delay.h>
#include <linux/config.h>
#include <linux/init.h>
#include <linux/apm_bios.h>
#ifdef CONFIG_BLK_DEV_RAM
#include <linux/blk.h>
#endif
......@@ -66,6 +65,8 @@ unsigned long mmu_cr4_features;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0x10000000;
unsigned long saved_video_mode;
/*
* Setup options
*/
......@@ -546,6 +547,8 @@ static inline void parse_mem_cmdline (char ** cmdline_p)
unsigned long start_pfn, end_pfn;
extern void exception_table_check(void);
void __init setup_arch(char **cmdline_p)
{
unsigned long bootmap_size, low_mem_size;
......@@ -555,6 +558,7 @@ void __init setup_arch(char **cmdline_p)
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
aux_device_present = AUX_DEVICE_INFO;
saved_video_mode = SAVED_VIDEO_MODE;
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
......@@ -783,6 +787,8 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
exception_table_check();
}
#ifndef CONFIG_X86_TSC
......@@ -1093,7 +1099,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
else
seq_printf(m, "stepping\t: unknown\n");
if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
if (cpu_has(c,X86_FEATURE_TSC)) {
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
cpu_khz / 1000, (cpu_khz % 1000));
}
......
......@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/bootmem.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
......@@ -31,42 +32,64 @@ extern void ia32_cstar_target(void);
extern struct task_struct init_task;
extern unsigned char __per_cpu_start[], __per_cpu_end[];
struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table };
struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
void __init setup_per_cpu_areas(void)
{
unsigned long size, i;
unsigned char *ptr;
/* Copy section for each CPU (we discard the original) */
size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
if (!size)
return;
ptr = alloc_bootmem(size * NR_CPUS);
for (i = 0; i < NR_CPUS; i++, ptr += size) {
cpu_pda[cpu_logical_map(i)].cpudata_offset = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, size);
}
}
void pda_init(int cpu)
{
pml4_t *level4;
struct x8664_pda *pda = &cpu_pda[cpu];
if (cpu == 0) {
/* others are initialized in smpboot.c */
cpu_pda[cpu].pcurrent = &init_task;
cpu_pda[cpu].irqstackptr = boot_cpu_stack;
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
level4 = init_level4_pgt;
} else {
cpu_pda[cpu].irqstackptr = (char *)
pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!cpu_pda[cpu].irqstackptr)
if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d\n", cpu);
level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
}
if (!level4)
panic("Cannot allocate top level page for cpu %d", cpu);
cpu_pda[cpu].level4_pgt = (unsigned long *)level4;
pda->level4_pgt = (unsigned long *)level4;
if (level4 != init_level4_pgt)
memcpy(level4, &init_level4_pgt, PAGE_SIZE);
set_pml4(level4 + 510, mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt)));
asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
cpu_pda[cpu].irqstackptr += IRQSTACKSIZE-64;
cpu_pda[cpu].cpunumber = cpu;
cpu_pda[cpu].irqcount = -1;
cpu_pda[cpu].kernelstack =
pda->irqstackptr += IRQSTACKSIZE-64;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
cpu_pda[cpu].me = &cpu_pda[cpu];
pda->me = pda;
pda->cpudata_offset = 0;
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
......@@ -75,7 +98,6 @@ void pda_init(int cpu)
#define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */
char boot_exception_stacks[N_EXCEPTION_STACKS*EXCEPTION_STKSZ];
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
......@@ -93,6 +115,7 @@ void __init cpu_init (void)
struct tss_struct * t = &init_tss[nr];
unsigned long v;
char *estacks;
struct task_struct *me;
/* CPU 0 is initialised in head64.c */
if (nr != 0) {
......@@ -103,6 +126,8 @@ void __init cpu_init (void)
} else
estacks = boot_exception_stacks;
me = current;
if (test_and_set_bit(nr, &cpu_initialized))
panic("CPU#%d already initialized!\n", nr);
......@@ -150,14 +175,14 @@ void __init cpu_init (void)
}
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
if(current->mm)
me->active_mm = &init_mm;
if (me->mm)
BUG();
enter_lazy_tlb(&init_mm, current, nr);
enter_lazy_tlb(&init_mm, me, nr);
set_tss_desc(nr, t);
load_TR(nr);
load_LDT(&init_mm);
load_LDT(&init_mm.context);
/*
* Clear all 6 debug registers:
......
......@@ -22,10 +22,8 @@
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/compiler.h>
#include <linux/binfmts.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
......@@ -86,7 +84,7 @@ struct rt_sigframe
char *pretcode;
struct ucontext uc;
struct siginfo info;
struct _fpstate fpstate __attribute__((aligned(8)));
struct _fpstate fpstate;
};
static int
......@@ -126,6 +124,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, unsigned long *p
{
struct _fpstate * buf;
err |= __get_user(buf, &sc->fpstate);
if (buf) {
if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
......@@ -147,10 +146,12 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
stack_t st;
long eax;
if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
if (verify_area(VERIFY_READ, frame, sizeof(*frame))) {
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
}
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) {
goto badframe;
}
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
......@@ -158,15 +159,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax)) {
goto badframe;
}
#if DEBUG_SIG
printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
#endif
if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) {
goto badframe;
}
/* It is more difficult to avoid calling this function than to
call it and ignore errors. */
do_sigaltstack(&st, NULL, regs.rsp);
......@@ -174,10 +177,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
return eax;
badframe:
#if DEBUG_SIG
printk("%d bad frame %p\n",current->pid,frame);
#endif
force_sig(SIGSEGV, current);
signal_fault(&regs,frame,"sigreturn");
return 0;
}
......@@ -233,8 +233,8 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
/*
* Determine which stack to use..
*/
static inline void *
get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
static inline struct rt_sigframe *
get_sigframe(struct k_sigaction *ka, struct pt_regs * regs)
{
unsigned long rsp;
......@@ -247,15 +247,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
rsp = current->sas_ss_sp + current->sas_ss_size;
}
{
extern void bad_sigframe(void);
/* beginning of sigframe is 8 bytes misaligned, but fpstate
must end up on a 16byte boundary */
if ((offsetof(struct rt_sigframe, fpstate) & 16) != 0)
bad_sigframe();
}
rsp = (rsp - sizeof(struct _fpstate)) & ~(15UL);
rsp -= offsetof(struct rt_sigframe, fpstate);
return (void *)((rsp - frame_size) & ~(15UL)) - 8;
return (struct rt_sigframe *) rsp;
}
static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
......@@ -264,16 +259,17 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct rt_sigframe *frame;
int err = 0;
frame = get_sigframe(ka, regs, sizeof(*frame));
frame = get_sigframe(ka, regs);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
if (err) {
goto give_sigsegv;
}
}
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
......@@ -285,9 +281,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
if (err) {
goto give_sigsegv;
}
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
......@@ -299,8 +296,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
}
if (err)
if (err) {
printk("fault 3\n");
goto give_sigsegv;
}
#if DEBUG_SIG
printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
......@@ -337,7 +336,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
give_sigsegv:
if (sig == SIGSEGV)
ka->sa.sa_handler = SIG_DFL;
force_sig(SIGSEGV, current);
signal_fault(regs,frame,"signal setup");
}
/*
......@@ -459,3 +458,15 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs,oldset);
}
extern int exception_trace;
void signal_fault(struct pt_regs *regs, void *frame, char *where)
{
struct task_struct *me = current;
if (exception_trace)
printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax);
force_sig(SIGSEGV, me);
}
......@@ -150,6 +150,7 @@ static void inline leave_mm (unsigned long cpu)
if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
BUG();
clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask);
__flush_tlb();
}
/*
......@@ -200,10 +201,12 @@ static void inline leave_mm (unsigned long cpu)
asmlinkage void smp_invalidate_interrupt (void)
{
unsigned long cpu = smp_processor_id();
unsigned long cpu;
cpu = get_cpu();
if (!test_bit(cpu, &flush_cpumask))
return;
goto out;
/*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
......@@ -224,6 +227,9 @@ asmlinkage void smp_invalidate_interrupt (void)
}
ack_APIC_irq();
clear_bit(cpu, &flush_cpumask);
out:
put_cpu();
}
static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
......@@ -273,16 +279,23 @@ static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
unsigned long cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
local_flush_tlb();
if (cpu_mask)
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm (struct mm_struct * mm)
{
unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
unsigned long cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
if (current->active_mm == mm) {
if (current->mm)
......@@ -292,12 +305,17 @@ void flush_tlb_mm (struct mm_struct * mm)
}
if (cpu_mask)
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
preempt_enable();
}
void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
unsigned long cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
if (current->active_mm == mm) {
if(current->mm)
......@@ -308,6 +326,8 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
if (cpu_mask)
flush_tlb_others(cpu_mask, mm, va);
preempt_enable();
}
static inline void do_flush_tlb_all_local(void)
......
......@@ -50,9 +50,6 @@
#include <asm/kdebug.h>
#include <asm/tlbflush.h>
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
/* Setup configured maximum number of CPUs to activate */
static int max_cpus = -1;
......@@ -151,17 +148,6 @@ void __init smp_store_cpu_info(int id)
*c = boot_cpu_data;
identify_cpu(c);
/*
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
c->x86 == 5 &&
c->x86_mask >= 1 && c->x86_mask <= 4 &&
c->x86_model <= 3)
/*
* Remember we have B step Pentia with bugs
*/
smp_b_stepping = 1;
}
/*
......@@ -772,7 +758,7 @@ unsigned long cache_decay_ticks;
static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
unsigned long bandwidth = 350; /* MB/s */
unsigned long bandwidth = 1000; /* MB/s */
/*
* Rough estimation for SMP scheduling, this is the number of
* cycles it takes for a fully memory-limited process to flush
......@@ -883,8 +869,7 @@ void __init smp_boot_cpus(void)
/*
* If we couldn't find a local APIC, then get out of here now!
*/
if (APIC_INTEGRATED(apic_version[boot_cpu_id]) &&
!test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_id);
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
......@@ -965,7 +950,7 @@ void __init smp_boot_cpus(void)
Dprintk("Before bogomips.\n");
if (!cpucount) {
printk(KERN_ERR "Error: only one processor found.\n");
printk(KERN_INFO "Only one processor found.\n");
} else {
unsigned long bogosum = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
......@@ -979,8 +964,6 @@ void __init smp_boot_cpus(void)
}
smp_num_cpus = cpucount + 1;
if (smp_b_stepping)
printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
Dprintk("Boot done.\n");
/*
......
......@@ -14,6 +14,7 @@
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/personality.h>
#include <asm/uaccess.h>
#include <asm/ipc.h>
......@@ -22,7 +23,7 @@
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way Unix traditionally does this, though.
*/
asmlinkage long sys_pipe(unsigned long * fildes)
asmlinkage long sys_pipe(int *fildes)
{
int fd[2];
int error;
......@@ -93,18 +94,15 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
}
}
/*
* Old cruft
*/
asmlinkage long sys_uname(struct old_utsname * name)
asmlinkage long sys_uname(struct new_utsname * name)
{
int err;
if (!name)
return -EFAULT;
down_read(&uts_sem);
err=copy_to_user(name, &system_utsname, sizeof (*name));
err = copy_to_user(name, &system_utsname, sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
if (current->personality == PER_LINUX32)
err |= copy_to_user(&name->machine, "i386", 5);
return err ? -EFAULT : 0;
}
asmlinkage long sys_pause(void)
......
......@@ -41,6 +41,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/device.h>
#include <asm/io.h>
#include <asm/smp.h>
......@@ -57,12 +58,8 @@
#include <asm/fixmap.h>
/*
* for x86_do_profile()
*/
#include <linux/irq.h>
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
/* Number of usecs that the last interrupt was delayed */
......@@ -79,9 +76,9 @@ unsigned int __fast_gettimeoffset_quotient __section_fast_gettimeoffset_quotient
extern rwlock_t xtime_lock;
struct timeval __xtime __section_xtime;
volatile unsigned long __jiffies __section_jiffies;
unsigned long __wall_jiffies __section_wall_jiffies;
struct timezone __sys_tz __section_sys_tz;
volatile unsigned long __jiffies __section_jiffies;
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
......@@ -527,3 +524,15 @@ void __init time_init(void)
setup_irq(0, &irq0);
}
static struct device device_i8253 = {
name: "i8253",
bus_id: "0040",
};
static int time_init_driverfs(void)
{
return register_sys_device(&device_i8253);
}
__initcall(time_init_driverfs);
......@@ -77,7 +77,7 @@ asmlinkage void machine_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void call_debug(void);
extern char iret_address[];
extern int exception_trace;
struct notifier_block *die_chain;
......@@ -172,6 +172,8 @@ void show_trace(unsigned long *stack)
i = 1;
if (stack >= irqstack && stack < irqstack_end) {
unsigned long *tstack;
printk("<IRQ> ");
while (stack < irqstack_end) {
addr = *stack++;
/*
......@@ -197,13 +199,11 @@ void show_trace(unsigned long *stack)
tstack = (unsigned long *)(current_thread_info()+1);
if (stack < tstack || (char*)stack > (char*)tstack+THREAD_SIZE)
printk("\n" KERN_DEBUG
"no stack at the end of irqstack; stack:%lx, curstack %lx\n",
"no stack at the end of irqstack; stack:%p, curstack %p\n",
stack, tstack);
#endif
}
while (((long) stack & (THREAD_SIZE-1)) != 0) {
addr = *stack++;
if (kernel_text_address(addr)) {
......@@ -263,7 +263,7 @@ void show_stack(unsigned long * rsp)
void show_registers(struct pt_regs *regs)
{
int i;
int in_kernel = 1;
int in_kernel = (regs->cs & 3) == 0;
unsigned long rsp;
#ifdef CONFIG_SMP
/* For SMP should get the APIC id here, just to protect against corrupted GS */
......@@ -273,11 +273,8 @@ void show_registers(struct pt_regs *regs)
#endif
struct task_struct *cur = cpu_pda[cpu].pcurrent;
rsp = (unsigned long) (&regs->rsp);
if (regs->rsp < TASK_SIZE) {
in_kernel = 0;
rsp = regs->rsp;
}
printk("CPU %d ", cpu);
show_regs(regs);
printk("Process %s (pid: %d, stackpage=%08lx)\n",
......@@ -383,7 +380,7 @@ static void do_trap(int trapnr, int signr, char *str,
if ((regs->cs & 3) != 0) {
struct task_struct *tsk = current;
if (trapnr != 3)
if (exception_trace && trapnr != 3)
printk("%s[%d] trap %s at rip:%lx rsp:%lx err:%lx\n",
tsk->comm, tsk->pid, str, regs->rip, regs->rsp, error_code);
......@@ -456,9 +453,14 @@ extern void dump_pagetable(unsigned long);
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
if ((regs->cs & 3)!=0) {
current->thread.error_code = error_code;
current->thread.trap_no = 13;
force_sig(SIGSEGV, current);
struct task_struct *tsk = current;
if (exception_trace)
printk("%s[%d] #gp at rip:%lx rsp:%lx err:%lx\n",
tsk->comm, tsk->pid, regs->rip, regs->rsp, error_code);
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 13;
force_sig(SIGSEGV, tsk);
return;
}
......@@ -509,8 +511,7 @@ asmlinkage void do_nmi(struct pt_regs * regs)
{
unsigned char reason = inb(0x61);
++nmi_count(smp_processor_id());
add_pda(__nmi_count,1);
if (!(reason & 0xc0)) {
#if CONFIG_X86_LOCAL_APIC
/*
......
......@@ -47,14 +47,14 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
//#define NO_VSYSCALL 1
#define NO_VSYSCALL 1
#ifdef NO_VSYSCALL
#include <asm/unistd.h>
static int errno __section_vxtime_sequence;
__syscall2(static inline int,int,gettimeofday,struct timeval *,tv,struct timezone *,tz)
static inline _syscall2(int,gettimeofday,struct timeval *,tv,struct timezone *,tz)
#else
static inline void timeval_normalize(struct timeval * tv)
......@@ -148,28 +148,11 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz
static time_t __vsyscall(1) vtime(time_t * t)
{
#ifdef NO_VSYSCALL
struct timeval tv;
gettimeofday(&tv,NULL);
if (t) *t = tv.tv_sec;
return tv.tv_sec;
#else
long sequence;
time_t __time;
do {
sequence = __vxtime_sequence[1];
rmb();
__time = __xtime.tv_sec;
rmb();
} while (sequence != __vxtime_sequence[0]);
vgettimeofday(&tv,NULL);
if (t)
*t = __time;
return __time;
#endif
*t = tv.tv_sec;
return tv.tv_sec;
}
static long __vsyscall(2) venosys_0(void)
......
......@@ -88,8 +88,9 @@ EXPORT_SYMBOL(strncpy_from_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(clear_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__generic_copy_from_user);
EXPORT_SYMBOL(__generic_copy_to_user);
EXPORT_SYMBOL(copy_user_generic);
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(copy_to_user);
EXPORT_SYMBOL(strnlen_user);
EXPORT_SYMBOL(pci_alloc_consistent);
......
#
# Makefile for x86_64-specific library files..
# Makefile for x86_64-specific library files.
#
USE_STANDARD_AS_RULE := true
EXTRA_CFLAGS_csum-partial.o := -funroll-loops
L_TARGET = lib.a
obj-y = generic-checksum.o old-checksum.o delay.o \
obj-y = csum-partial.o csum-copy.o csum-wrappers.o delay.o \
usercopy.o getuser.o putuser.o \
checksum_copy.o thunk.o mmx.o
thunk.o io.o clear_page.o copy_page.o
obj-y += memcpy.o
obj-y += memmove.o
#obj-y += memset.o
obj-y += copy_user.o
export-objs := io.o csum-wrappers.o csum-partial.o
obj-$(CONFIG_IO_DEBUG) += iodebug.o
obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
......
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IP/TCP/UDP checksumming routines
*
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Tom May, <ftom@netcom.com>
* Pentium Pro/II routines:
* Alexander Kjeldaas <astor@guardian.no>
* Finn Arne Gangstad <finnag@guardian.no>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
* Andi Kleen initial raw port to x86-64
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <asm/errno.h>
/* Version for PentiumII/PPro ported to x86-64. Still very raw and
does not exploit 64bit. */
#define SRC(y...) \
9999: y; \
.section __ex_table, "a"; \
.quad 9999b, 6001f ; \
.previous
#define DST(y...) \
9999: y; \
.section __ex_table, "a"; \
.quad 9999b, 6002f ; \
.previous
#define ROUND1(x) \
SRC(movl x(%rsi), %ebx ) ; \
addl %ebx, %eax ; \
DST(movl %ebx, x(%rdi) ) ;
#define ROUND(x) \
SRC(movl x(%rsi), %ebx ) ; \
adcl %ebx, %eax ; \
DST(movl %ebx, x(%rdi) ) ;
#define ARGBASE 0
/*
asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
int *src_err_ptr, int *dst_err_ptr);
rdi .. src
rsi .. dst (copy in r12)
rdx .. len (copy in r10)
rcx .. sum
r8 .. src_err_ptr
r9 .. dst_err_ptr
OPTIMIZEME: this routine should take advantage of checksumming 64bits at a time
*/
.globl csum_partial_copy_generic
csum_partial_copy_generic:
pushq %r10
pushq %r12
pushq %rbx
pushq %rbp
xchgq %rsi, %rdi
movq %rdx, %r10
movq %rsi, %r12
movq %rcx, %rax
movq %rdx, %rcx # And now it looks like PII case
movl %ecx, %ebx
movl %esi, %edx
shrl $6, %ecx
andl $0x3c, %ebx
negq %rbx
subq %rbx, %rsi
subq %rbx, %rdi
lea 3f(%rbx,%rbx), %rbx
testq %rsi, %rsi
jmp *%rbx
1: addq $64,%rsi
addq $64,%rdi
ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
3: adcl $0,%eax
addl $64,%edx
dec %ecx
jge 1b
4: movq %r10,%rdx
andl $3, %edx
jz 7f
cmpl $2, %edx
jb 5f
SRC( movw (%rsi), %dx )
leaq 2(%rsi), %rsi
DST( movw %dx, (%rdi) )
leaq 2(%rdi), %rdi
je 6f
shll $16,%edx
5:
SRC( movb (%rsi), %dl )
DST( movb %dl, (%rdi) )
6: addl %edx, %eax
adcl $0, %eax
7:
.section .fixup, "ax"
6001:
movl $-EFAULT, (%r8)
# zero the complete destination (computing the rest is too much work)
movq %r12,%rdi # dst
movq %r10,%rcx # len
xorl %eax,%eax
rep; stosb
jmp 7b
6002: movl $-EFAULT,(%r9)
jmp 7b
.previous
popq %rbp
popq %rbx
popq %r12
popq %r10
ret
#undef ROUND
#undef ROUND1
/*
* Copyright 2002 Andi Kleen, SuSE Labs.
*/
#include <linux/linkage.h>
/*
* Zero a page.
* rdi page
*/
ENTRY(clear_page)
xorl %eax,%eax
movl $4096/128,%ecx
movl $128,%edx
loop:
#define PUT(x) movnti %rax,x*8(%rdi)
PUT(0)
PUT(1)
PUT(2)
PUT(3)
PUT(4)
PUT(5)
PUT(6)
PUT(7)
PUT(8)
PUT(9)
PUT(10)
PUT(11)
PUT(12)
PUT(13)
PUT(14)
PUT(15)
addq %rdx,%rdi
decl %ecx
jnz loop
sfence
ret
/*
* Copyright 2002 Andi Kleen, SuSE Labs.
*/
#include <linux/linkage.h>
#include <linux/config.h>
#ifdef CONFIG_PREEMPT
#warning "check your fpu context saving!"
#endif
/*
* Copy a page.
*
* rdi destination page
* rsi source page
*
* src/dst must be aligned to 16 bytes.
*
* Warning: in case of super lazy FP save this needs to be preempt_stop
*/
ENTRY(copy_page)
prefetchnta (%rsi)
prefetchnta 64(%rsi)
movq %rsp,%rax
subq $16*4,%rsp
andq $~15,%rsp
movdqa %xmm0,(%rsp)
movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp)
movl $(4096/128)-2,%ecx
movl $128,%edx
loop:
prefetchnta (%rsi)
prefetchnta 64(%rsi)
loop_no_prefetch:
movdqa (%rsi),%xmm0
movdqa 1*16(%rsi),%xmm1
movdqa 2*16(%rsi),%xmm2
movdqa 3*16(%rsi),%xmm3
movntdq %xmm0,(%rdi)
movntdq %xmm1,16(%rdi)
movntdq %xmm2,2*16(%rdi)
movntdq %xmm3,3*16(%rdi)
movdqa 4*16(%rsi),%xmm0
movdqa 5*16(%rsi),%xmm1
movdqa 6*16(%rsi),%xmm2
movdqa 7*16(%rsi),%xmm3
movntdq %xmm0,4*16(%rdi)
movntdq %xmm1,5*16(%rdi)
movntdq %xmm2,6*16(%rdi)
movntdq %xmm3,7*16(%rdi)
addq %rdx,%rdi
addq %rdx,%rsi
decl %ecx
jns loop
cmpl $-1,%ecx
je loop_no_prefetch
sfence
movdqa (%rsp),%xmm0
movdqa 16(%rsp),%xmm1
movdqa 32(%rsp),%xmm2
movdqa 48(%rsp),%xmm3
movq %rax,%rsp
ret
/* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
#define FIX_ALIGNMENT 1
#include <asm/thread_info.h>
#include <asm/offset.h>
/* Standard copy_to_user with segment limit checking */
.globl copy_to_user
.p2align
copy_to_user:
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
jc bad_to_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_to_user
jmp copy_user_generic
/* Standard copy_from_user with segment limit checking */
.globl copy_from_user
.p2align
copy_from_user:
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_from_user
/* FALL THROUGH to copy_user_generic */
.section .fixup,"ax"
/* must zero dest */
bad_from_user:
movl %edx,%ecx
xorl %eax,%eax
rep
stosb
bad_to_user:
movl %edx,%eax
ret
.previous
/*
* copy_user_generic - memory copy with exception handling.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successfull.
*/
.globl copy_user_generic
copy_user_generic:
/* Put the first cacheline into cache. This should handle
the small movements in ioctls etc., but not penalize the bigger
filesystem data copies too much. */
pushq %rbx
prefetcht0 (%rsi)
xorl %eax,%eax /*zero for the exception handler */
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jnz bad_alignment
after_bad_alignment:
#endif
movq %rdx,%rcx
movl $64,%ebx
shrq $6,%rdx
decq %rdx
js handle_tail
jz loop_no_prefetch
loop:
prefetchnta 64(%rsi)
loop_no_prefetch:
s1: movq (%rsi),%r11
s2: movq 1*8(%rsi),%r8
s3: movq 2*8(%rsi),%r9
s4: movq 3*8(%rsi),%r10
d1: movnti %r11,(%rdi)
d2: movnti %r8,1*8(%rdi)
d3: movnti %r9,2*8(%rdi)
d4: movnti %r10,3*8(%rdi)
s5: movq 4*8(%rsi),%r11
s6: movq 5*8(%rsi),%r8
s7: movq 6*8(%rsi),%r9
s8: movq 7*8(%rsi),%r10
d5: movnti %r11,4*8(%rdi)
d6: movnti %r8,5*8(%rdi)
d7: movnti %r9,6*8(%rdi)
d8: movnti %r10,7*8(%rdi)
addq %rbx,%rsi
addq %rbx,%rdi
decq %rdx
jz loop_no_prefetch
jns loop
handle_tail:
movl %ecx,%edx
andl $63,%ecx
shrl $3,%ecx
jz handle_7
movl $8,%ebx
loop_8:
s9: movq (%rsi),%r8
d9: movnti %r8,(%rdi)
addq %rbx,%rdi
addq %rbx,%rsi
loop loop_8
handle_7:
movl %edx,%ecx
andl $7,%ecx
jz ende
loop_1:
s10: movb (%rsi),%bl
d10: movb %bl,(%rdi)
incq %rdi
incq %rsi
loop loop_1
ende:
sfence
popq %rbx
ret
#ifdef FIX_ALIGNMENT
/* align destination */
bad_alignment:
movl $8,%r9d
subl %ecx,%r9d
movl %r9d,%ecx
subq %r9,%rdx
jz small_align
js small_align
align_1:
s11: movb (%rsi),%bl
d11: movb %bl,(%rdi)
incq %rsi
incq %rdi
loop align_1
jmp after_bad_alignment
small_align:
addq %r9,%rdx
jmp handle_7
#endif
/* table sorted by exception address */
.section __ex_table,"a"
.align 8
.quad s1,s1e
.quad s2,s2e
.quad s3,s3e
.quad s4,s4e
.quad d1,s1e
.quad d2,s2e
.quad d3,s3e
.quad d4,s4e
.quad s5,s5e
.quad s6,s6e
.quad s7,s7e
.quad s8,s8e
.quad d5,s5e
.quad d6,s6e
.quad d7,s7e
.quad d8,s8e
.quad s9,e_quad
.quad d9,e_quad
.quad s10,e_byte
.quad d10,e_byte
#ifdef FIX_ALIGNMENT
.quad s11,e_byte
.quad d11,e_byte
#endif
.quad e5,e_zero
.previous
/* compute 64-offset for main loop. 8 bytes accuracy with error on the
pessimistic side. this is gross. it would be better to fix the
interface. */
/* eax: zero, ebx: 64 */
s1e: addl $8,%eax
s2e: addl $8,%eax
s3e: addl $8,%eax
s4e: addl $8,%eax
s5e: addl $8,%eax
s6e: addl $8,%eax
s7e: addl $8,%eax
s8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */
shlq $6,%rdx /* loop counter * 64 (stride length) */
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
jmp zero_rest
/* exception on quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
e_quad:
shll $3,%ecx
andl $7,%edx
addl %ecx,%edx
/* edx: bytes to zero, rdi: dest, eax:zero */
zero_rest:
movq %rdx,%rcx
e_byte:
xorl %eax,%eax
e5: rep
stosb
/* when there is another exception while zeroing the rest just return */
e_zero:
movq %rdx,%rax
jmp ende
/*
* Copyright 2002 Andi Kleen
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
// #define FIX_ALIGNMENT 1
/*
* Checksum copy with exception handling.
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
* destination is zeroed.
*
* Input
* rdi source
* rsi destination
* edx len (32bit)
* ecx sum (32bit)
* r8 src_err_ptr (int)
* r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
*
* Wrappers need to take care of valid exception sum and zeroing.
*/
.macro source
10:
.section __ex_table,"a"
.align 8
.quad 10b,bad_source
.previous
.endm
.macro dest
20:
.section __ex_table,"a"
.align 8
.quad 20b,bad_dest
.previous
.endm
.globl csum_partial_copy_generic
.p2align
csum_partial_copy_generic:
prefetchnta (%rdi)
pushq %rbx
pushq %r12
pushq %r14
pushq %r15
movq %r8,%r14
movq %r9,%r15
movl %ecx,%eax
movl %edx,%ecx
#ifdef FIX_ALIGNMENT
/* align source to 8 bytes */
movl %edi,%r8d
andl $7,%r8d
jnz bad_alignment
after_bad_alignment:
#endif
movl $64,%r10d
xorl %r9d,%r9d
movq %rcx,%r12
shrq $6,%r12
/* loopcounter is maintained as one less to test efficiently for the
previous to last iteration. This is needed to stop the prefetching. */
decq %r12
js handle_tail /* < 64 */
jz loop_no_prefetch /* = 64 + X */
/* main loop. clear in 64 byte blocks */
/* tries hard not to prefetch over the boundary */
/* r10: 64, r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
.p2align
loop:
/* Could prefetch more than one loop, but then it would be even
trickier to avoid prefetching over the boundary. The hardware prefetch
should take care of this anyways. The reason for this prefetch is
just the non temporal hint to avoid cache pollution. Hopefully this
will be handled properly by the hardware. */
prefetchnta 64(%rdi)
loop_no_prefetch:
source
movq (%rdi),%rbx
source
movq 8(%rdi),%r8
source
movq 16(%rdi),%r11
source
movq 24(%rdi),%rdx
dest
movnti %rbx,(%rsi)
dest
movnti %r8,8(%rsi)
dest
movnti %r11,16(%rsi)
dest
movnti %rdx,24(%rsi)
addq %rbx,%rax
adcq %r8,%rax
adcq %r11,%rax
adcq %rdx,%rax
source
movq 32(%rdi),%rbx
source
movq 40(%rdi),%r8
source
movq 48(%rdi),%r11
source
movq 56(%rdi),%rdx
dest
movnti %rbx,32(%rsi)
dest
movnti %r8,40(%rsi)
dest
movnti %r11,48(%rsi)
dest
movnti %rdx,56(%rsi)
adcq %rbx,%rax
adcq %r8,%rax
adcq %r11,%rax
adcq %rdx,%rax
adcq %r9,%rax /* add in carry */
addq %r10,%rdi
addq %r10,%rsi
decq %r12
jz loop_no_prefetch /* previous to last iteration? */
jns loop
/* do last upto 56 bytes */
handle_tail:
/* ecx: count */
movl %ecx,%r10d
andl $63,%ecx
shrl $3,%ecx
jz fold
clc
movl $8,%edx
loop_8:
source
movq (%rdi),%rbx
adcq %rbx,%rax
dest
movnti %rbx,(%rsi)
leaq (%rsi,%rdx),%rsi /* preserve carry */
leaq (%rdi,%rdx),%rdi
loop loop_8
adcq %r9,%rax /* add in carry */
fold:
movl %eax,%ebx
shrq $32,%rax
addq %rbx,%rax
/* do last upto 6 bytes */
handle_7:
movl %r10d,%ecx
andl $7,%ecx
shrl $1,%ecx
jz handle_1
movl $2,%edx
xorl %ebx,%ebx
clc
loop_1:
source
movw (%rdi),%bx
adcq %rbx,%rax
dest
movw %bx,(%rsi)
addq %rdx,%rdi
addq %rdx,%rsi
loop loop_1
adcw %r9w,%ax /* add in carry */
/* handle last odd byte */
handle_1:
testl $1,%r10d
jz ende
xorl %ebx,%ebx
source
movb (%rdi),%bl
dest
movb %bl,(%rsi)
addw %bx,%ax
adcw %r9w,%ax /* carry */
ende:
sfence
popq %r15
popq %r14
popq %r12
popq %rbx
ret
#ifdef FIX_ALIGNMENT
/* align source to 8 bytes. */
/* r8d: unalignedness, ecx len */
bad_alignment:
testl $1,%edi
jnz odd_source
/* compute distance to next aligned position */
movl $8,%r8d
xchgl %r8d,%ecx
subl %r8d,%ecx
/* handle unaligned part */
shrl $1,%ecx
xorl %ebx,%ebx
movl $2,%r10d
align_loop:
source
movw (%rdi),%bx
addq %rbx,%rax /* carry cannot happen */
dest
movw %bx,(%rsi)
addq %r10,%rdi
addq %r10,%rsi
loop align_loop
jmp after_bad_alignment
/* weird case. need to swap the sum at the end because the spec requires
16 bit words of the sum to be always paired.
handle it recursively because it should be rather rare. */
odd_source:
/* copy odd byte */
xorl %ebx,%ebx
source
movb (%rdi),%bl
addl %ebx,%eax /* add to old checksum */
adcl $0,%ecx
dest
movb %al,(%rsi)
/* fix arguments */
movl %eax,%ecx
incq %rsi
incq %rdi
decq %rdx
call csum_partial_copy_generic
bswap %eax /* this should work, but check */
jmp ende
#endif
/* Exception handlers. Very simple, zeroing is done in the wrappers */
bad_source:
movl $-EFAULT,(%r14)
jmp ende
bad_dest:
movl $-EFAULT,(%r15)
jmp ende
/*
* arch/x86_64/lib/checksum.c
* arch/x86_64/lib/csum-partial.c
*
* This file contains network checksum routines that are better done
* in an architecture-specific manner due to speed..
* in an architecture-specific manner due to speed.
*/
#include <linux/string.h>
#include <asm/byteorder.h>
#include <linux/compiler.h>
#include <linux/module.h>
/* Better way for this sought */
static inline unsigned short from64to16(unsigned long x)
{
/* add up 32-bit words for 33 bits */
......@@ -22,13 +23,13 @@ static inline unsigned short from64to16(unsigned long x)
}
/*
* Do a 64-bit checksum on an arbitrary memory area..
* Do a 64-bit checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
* This isn't a great routine, but it's not _horrible_ either. The
* inner loop could be unrolled a bit further, and there are better
* ways to do the carry, but this is reasonable.
* This isn't a great routine, but it's not _horrible_ either.
* We rely on the compiler to unroll.
*/
static inline unsigned long do_csum(const unsigned char * buff, int len)
static inline unsigned do_csum(const unsigned char * buff, int len)
{
int odd, count;
unsigned long result = 0;
......@@ -36,7 +37,7 @@ static inline unsigned long do_csum(const unsigned char * buff, int len)
if (len <= 0)
goto out;
odd = 1 & (unsigned long) buff;
if (odd) {
if (unlikely(odd)) {
result = *buff << 8;
len--;
buff++;
......@@ -59,16 +60,15 @@ static inline unsigned long do_csum(const unsigned char * buff, int len)
}
count >>= 1; /* nr of 64-bit words.. */
if (count) {
unsigned long carry = 0;
unsigned long zero = 0;
do {
unsigned long w = *(unsigned long *) buff;
asm(" addq %1,%0\n"
" adcq %2,%0\n"
: "=r" (result)
: "m" (*buff), "r" (zero), "0" (result));
count--;
buff += 8;
result += carry;
result += w;
carry = (w > result);
} while (count);
result += carry;
result = (result & 0xffffffff) + (result >> 32);
}
if (len & 4) {
......@@ -84,8 +84,8 @@ static inline unsigned long do_csum(const unsigned char * buff, int len)
if (len & 1)
result += *buff;
result = from64to16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
if (unlikely(odd))
return ((result >> 8) & 0xff) | ((result & 0xff) << 8);
out:
return result;
}
......@@ -100,25 +100,27 @@ static inline unsigned long do_csum(const unsigned char * buff, int len)
* this function must be called with even lengths, except
* for the last fragment, which may be odd
*
* it's best to have buff aligned on a 32-bit boundary
* it's best to have buff aligned on a 64-bit boundary
*/
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
{
unsigned long result = do_csum(buff, len);
unsigned result = do_csum(buff, len);
/* add in old sum, and carry.. */
result += sum;
/* 32+c bits -> 32 bits */
result = (result & 0xffffffff) + (result >> 32);
asm("addl %1,%0\n\t"
"adcl $0,%0" : "=r" (result) : "r" (sum), "0" (result));
return result;
}
//EXPORT_SYMBOL(csum_partial);
/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
unsigned short ip_compute_csum(unsigned char * buff, int len)
{
return ~from64to16(do_csum(buff,len));
return ~csum_partial(buff,len,0);
}
EXPORT_SYMBOL(ip_compute_csum);
/* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v.2
*
* Wrappers of assembly checksum functions for x86-64.
*/
#include <asm/checksum.h>
#include <linux/module.h>
/* Better way for this sought */
static inline unsigned from64to32(unsigned long x)
{
/* add up 32-bit words for 33 bits */
x = (x & 0xffffffff) + (x >> 32);
/* add up 16-bit and 17-bit words for 17+c bits */
x = (x & 0xffff) + (x >> 16);
/* add up 16-bit and 2-bit for 16+c bit */
x = (x & 0xffff) + (x >> 16);
return x;
}
/**
* csum_partial_copy_from_user - Copy and checksum from user space.
* @src: source address (user space)
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad source address.
*
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
unsigned int
csum_partial_copy_from_user(const char *src, char *dst,
int len, unsigned int isum, int *errp)
{
*errp = 0;
if (likely(access_ok(VERIFY_READ,src, len))) {
unsigned long sum;
sum = csum_partial_copy_generic(src,dst,len,isum,errp,NULL);
if (likely(*errp == 0))
return from64to32(sum);
}
*errp = -EFAULT;
memset(dst,0,len);
return 0;
}
EXPORT_SYMBOL(csum_partial_copy_from_user);
/**
* csum_partial_copy_to_user - Copy and checksum to user space.
* @src: source address
* @dst: destination address (user space)
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad destination address.
*
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
unsigned int
csum_partial_copy_to_user(const char *src, char *dst,
int len, unsigned int isum, int *errp)
{
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
*errp = -EFAULT;
return 0;
}
*errp = 0;
return from64to32(csum_partial_copy_generic(src,dst,len,isum,NULL,errp));
}
EXPORT_SYMBOL(csum_partial_copy_to_user);
/**
* csum_partial_copy_nocheck - Copy and checksum.
* @src: source address
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
*
* Returns an 32bit unfolded checksum of the buffer.
*/
unsigned int
csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum)
{
return from64to32(csum_partial_copy_generic(src,dst,len,sum,NULL,NULL));
}
//EXPORT_SYMBOL(csum_partial_copy_nocheck);
unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
__u32 len, unsigned short proto, unsigned int sum)
{
__u64 rest, sum64;
rest = (__u64)htonl(len) + (__u64)htons(proto) + (__u64)sum;
asm(" addq (%[saddr]),%[sum]\n"
" adcq 8(%[saddr]),%[sum]\n"
" adcq (%[daddr]),%[sum]\n"
" adcq 8(%[daddr]),%[sum]\n"
" adcq $0,%[sum]\n"
: [sum] "=r" (sum64)
: "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
return csum_fold(from64to32(sum64));
}
EXPORT_SYMBOL(csum_ipv6_magic);
#include <linux/string.h>
#include <asm/io.h>
#include <linux/module.h>
void *memcpy_toio(void *dst,void*src,unsigned len)
{
return __inline_memcpy(__io_virt(dst),src,len);
}
void *memcpy_fromio(void *dst,void*src,unsigned len)
{
return __inline_memcpy(dst,__io_virt(src),len);
}
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
/* Copyright 2002 Andi Kleen */
/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
.globl __memcpy
.globl memcpy
.p2align
__memcpy:
memcpy:
pushq %rbx
prefetcht0 (%rsi) /*for more hopefully the hw prefetch will kick in*/
movq %rdi,%rax
movl %edi,%ecx
andl $7,%ecx
jnz bad_alignment
after_bad_alignment:
movq %rdx,%rcx
movl $64,%ebx
shrq $6,%rcx
jz handle_tail
loop_64:
/* no prefetch because we assume the hw prefetcher does it already
and we have no specific temporal hint to give. XXX or give a nta
hint for the source? */
movq (%rsi),%r11
movq 8(%rsi),%r8
movq 2*8(%rsi),%r9
movq 3*8(%rsi),%r10
movnti %r11,(%rdi)
movnti %r8,1*8(%rdi)
movnti %r9,2*8(%rdi)
movnti %r10,3*8(%rdi)
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
movq 6*8(%rsi),%r9
movq 7*8(%rsi),%r10
movnti %r11,4*8(%rdi)
movnti %r8,5*8(%rdi)
movnti %r9,6*8(%rdi)
movnti %r10,7*8(%rdi)
addq %rbx,%rsi
addq %rbx,%rdi
loop loop_64
handle_tail:
movl %edx,%ecx
andl $63,%ecx
shrl $3,%ecx
jz handle_7
movl $8,%ebx
loop_8:
movq (%rsi),%r8
movnti %r8,(%rdi)
addq %rbx,%rdi
addq %rbx,%rsi
loop loop_8
handle_7:
movl %edx,%ecx
andl $7,%ecx
jz ende
loop_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
loop loop_1
ende:
sfence
popq %rbx
ret
/* align destination */
/* This is simpleminded. For bigger blocks it may make sense to align
src and dst to their aligned subset and handle the rest separately */
bad_alignment:
movl $8,%r9d
subl %ecx,%r9d
movl %r9d,%ecx
subq %r9,%rdx
js small_alignment
jz small_alignment
align_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
loop align_1
jmp after_bad_alignment
small_alignment:
addq %r9,%rdx
jmp handle_7
/* Normally compiler builtins are used, but sometimes the compiler calls out
of line code. Based on asm-i386/string.h.
*/
#define _STRING_C
#include <linux/string.h>
#undef memmove
void *memmove(void * dest,const void *src,size_t count)
{
if (dest < src) {
__inline_memcpy(dest,src,count);
} else {
/* Could be more clever and move longs */
unsigned long d0, d1, d2;
__asm__ __volatile__(
"std\n\t"
"rep\n\t"
"movsb\n\t"
"cld"
: "=&c" (d0), "=&S" (d1), "=&D" (d2)
:"0" (count),
"1" (count-1+(const char *)src),
"2" (count-1+(char *)dest)
:"memory");
}
return dest;
}
/* Copyright 2002 Andi Kleen, SuSE Labs */
// #define FIX_ALIGNMENT 1
/*
* ISO C memset - set a memory block to a byte value.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.globl ____memset
.p2align
____memset:
movq %rdi,%r10 /* save destination for return address */
movq %rdx,%r11 /* save count */
/* expand byte value */
movzbl %sil,%ecx /* zero extend char value */
movabs $0x0101010101010101,%rax /* expansion pattern */
mul %rcx /* expand with rax, clobbers rdx */
#ifdef FIX_ALIGNMENT
/* align dst */
movl %edi,%r9d
andl $7,%r9d /* test unaligned bits */
jnz bad_alignment
after_bad_alignment:
#endif
movq %r11,%rcx /* restore count */
shrq $6,%rcx /* divide by 64 */
jz handle_tail /* block smaller than 64 bytes? */
movl $64,%r8d /* CSE loop block size */
loop_64:
movnti %rax,0*8(%rdi)
movnti %rax,1*8(%rdi)
movnti %rax,2*8(%rdi)
movnti %rax,3*8(%rdi)
movnti %rax,4*8(%rdi)
movnti %rax,5*8(%rdi)
movnti %rax,6*8(%rdi)
movnti %rax,7*8(%rdi) /* clear 64 byte blocks */
addq %r8,%rdi /* increase pointer by 64 bytes */
loop loop_64 /* decrement rcx and if not zero loop */
/* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */
handle_tail:
movl %r11d,%ecx
andl $63,%ecx
shrl $3,%ecx
jz handle_7
loop_8:
movnti %rax,(%rdi) /* long words */
addq $8,%rdi
loop loop_8
handle_7:
movl %r11d,%ecx
andl $7,%ecx
jz ende
loop_1:
movb %al,(%rdi) /* bytes */
incq %rdi
loop loop_1
ende:
movq %r10,%rax
ret
#ifdef FIX_ALIGNMENT
bad_alignment:
andq $-8,%r11 /* shorter than 8 bytes */
jz handle_7 /* if yes handle it in the tail code */
movnti %rax,(%rdi) /* unaligned store of 8 bytes */
movq $8,%r8
subq %r9,%r8 /* compute alignment (8-misalignment) */
addq %r8,%rdi /* fix destination */
subq %r8,%r11 /* fix count */
jmp after_bad_alignment
#endif
#include <linux/types.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/compiler.h>
#include <asm/i387.h>
#include <asm/hardirq.h>
#include <asm/page.h>
/*
* MMX 3DNow! library helper functions
*
* To do:
* We can use MMX just for prefetch in IRQ's. This may be a win.
* (reported so on K6-III)
* We should use a better code neutral filler for the short jump
* leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
* We also want to clobber the filler register so we dont get any
* register forwarding stalls on the filler.
*
* Add *user handling. Checksums are not a win with MMX on any CPU
* tested so far for any MMX solution figured.
*
* 22/09/2000 - Arjan van de Ven
* Improved for non-egineering-sample Athlons
*
* 2002 Andi Kleen. Some cleanups and changes for x86-64.
* Not really tuned yet. Using the Athlon version for now.
* This currenly uses MMX for 8 byte stores, but on hammer we could
* use integer 8 byte stores too and avoid the FPU save overhead.
* Disadvantage is that the integer load/stores have strong ordering
* model and may be slower.
*
* $Id$
*/
#ifdef MMX_MEMCPY_THRESH
void *_mmx_memcpy(void *to, const void *from, size_t len)
{
void *p;
int i;
p = to;
if (unlikely(in_interrupt()))
goto standard;
/* XXX: check if this is still memory bound with unaligned to/from.
if not align them here to 8bytes. */
i = len >> 6; /* len/64 */
kernel_fpu_begin();
__asm__ __volatile__ (
" prefetch (%0)\n" /* This set is 28 bytes */
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
"\n"
: : "r" (from) );
for(; i>5; i--)
{
__asm__ __volatile__ (
" prefetch 320(%0)\n"
" movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
for(; i>0; i--)
{
__asm__ __volatile__ (
" movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
len &= 63;
kernel_fpu_end();
/*
* Now do the tail of the block
*/
standard:
__inline_memcpy(to, from, len);
return p;
}
#endif
static inline void fast_clear_page(void *page)
{
int i;
kernel_fpu_begin();
__asm__ __volatile__ (
" pxor %%mm0, %%mm0\n" : :
);
for(i=0;i<4096/64;i++)
{
__asm__ __volatile__ (
" movntq %%mm0, (%0)\n"
" movntq %%mm0, 8(%0)\n"
" movntq %%mm0, 16(%0)\n"
" movntq %%mm0, 24(%0)\n"
" movntq %%mm0, 32(%0)\n"
" movntq %%mm0, 40(%0)\n"
" movntq %%mm0, 48(%0)\n"
" movntq %%mm0, 56(%0)\n"
: : "r" (page) : "memory");
page+=64;
}
/* since movntq is weakly-ordered, a "sfence" is needed to become
* ordered again.
*/
__asm__ __volatile__ (
" sfence \n" : :
);
kernel_fpu_end();
}
static inline void fast_copy_page(void *to, void *from)
{
int i;
kernel_fpu_begin();
/* maybe the prefetch stuff can go before the expensive fnsave...
* but that is for later. -AV
*/
__asm__ __volatile__ (
" prefetch (%0)\n"
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
: : "r" (from) );
for(i=0; i<(4096-320)/64; i++)
{
__asm__ __volatile__ (
" prefetch 320(%0)\n"
" movq (%0), %%mm0\n"
" movntq %%mm0, (%1)\n"
" movq 8(%0), %%mm1\n"
" movntq %%mm1, 8(%1)\n"
" movq 16(%0), %%mm2\n"
" movntq %%mm2, 16(%1)\n"
" movq 24(%0), %%mm3\n"
" movntq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm4\n"
" movntq %%mm4, 32(%1)\n"
" movq 40(%0), %%mm5\n"
" movntq %%mm5, 40(%1)\n"
" movq 48(%0), %%mm6\n"
" movntq %%mm6, 48(%1)\n"
" movq 56(%0), %%mm7\n"
" movntq %%mm7, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
for(i=(4096-320)/64; i<4096/64; i++)
{
__asm__ __volatile__ (
"2: movq (%0), %%mm0\n"
" movntq %%mm0, (%1)\n"
" movq 8(%0), %%mm1\n"
" movntq %%mm1, 8(%1)\n"
" movq 16(%0), %%mm2\n"
" movntq %%mm2, 16(%1)\n"
" movq 24(%0), %%mm3\n"
" movntq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm4\n"
" movntq %%mm4, 32(%1)\n"
" movq 40(%0), %%mm5\n"
" movntq %%mm5, 40(%1)\n"
" movq 48(%0), %%mm6\n"
" movntq %%mm6, 48(%1)\n"
" movq 56(%0), %%mm7\n"
" movntq %%mm7, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
/* since movntq is weakly-ordered, a "sfence" is needed to become
* ordered again.
*/
__asm__ __volatile__ (
" sfence \n" : :
);
kernel_fpu_end();
}
void mmx_clear_page(void * page)
{
#if 1
__builtin_memset(page,0,PAGE_SIZE);
#else
/* AK: these in_interrupt checks should not be needed. */
if(unlikely(in_interrupt()))
__builtin_memset(page,0,PAGE_SIZE);
else
fast_clear_page(page);
#endif
}
void mmx_copy_page(void *to, void *from)
{
#if 1
__builtin_memcpy(to,from,PAGE_SIZE);
#else
/* AK: these in_interrupt checks should not be needed. */
if(unlikely(in_interrupt()))
__builtin_memcpy(to,from,PAGE_SIZE);
else
fast_copy_page(to, from);
#endif
}
#include <asm/calling.h>
/*
* Save registers for the slow path of semaphores here to avoid
* disturbance of register allocation in fast paths with function calls.
* Written 2001 by Andi Kleen.
*/
.macro rwsem_thunk name,func
.globl \name
\name:
SAVE_ARGS
movq %rax,%rdi
call \func
jmp restore
.endm
rwsem_thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
rwsem_thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
rwsem_thunk rwsem_wake_thunk,rwsem_wake
/* This does not really belong here, but the macros are so
convenient. */
rwsem_thunk do_softirq_thunk,do_softirq
restore:
RESTORE_ARGS
ret
/*
* User address space access functions.
* The non inlined parts of asm-i386/uaccess.h are here.
*
* Copyright 1997 Andi Kleen <ak@muc.de>
* Copyright 1997 Linus Torvalds
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/mmx.h>
unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
{
prefetch(from);
if (access_ok(VERIFY_WRITE, to, n))
__copy_user(to,from,n);
return n;
}
unsigned long
__generic_copy_from_user(void *to, const void *from, unsigned long n)
{
prefetchw(to);
if (access_ok(VERIFY_READ, from, n))
__copy_user_zeroing(to,from,n);
else
memset(to, 0, n);
return n;
}
/*
* Copy a null terminated string from userspace.
*/
#define __do_strncpy_from_user(dst,src,count,res) \
do { \
long __d0, __d1, __d2; \
__asm__ __volatile__( \
" testq %1,%1\n" \
" jz 2f\n" \
"0: lodsb\n" \
" stosb\n" \
" testb %%al,%%al\n" \
" jz 1f\n" \
" decq %1\n" \
" jnz 0b\n" \
"1: subq %1,%0\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movq %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .quad 0b,3b\n" \
".previous" \
: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
: "memory"); \
} while (0)
long
__strncpy_from_user(char *dst, const char *src, long count)
long __strncpy_from_user(char *dst, const char *src, long count)
{
long res;
__do_strncpy_from_user(dst, src, count, res);
long __d0, __d1, __d2;
asm volatile( \
" testq %1,%1\n"
" jz 2f\n"
"0: lodsb\n"
" stosb\n"
" testb %%al,%%al\n"
" loopnz 0b\n"
"1: subq %1,%0\n"
"2:\n"
".section .fixup,\"ax\"\n"
"3: movq %5,%0\n"
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 8\n"
" .quad 0b,3b\n"
".previous"
: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),
"=&D" (__d2)
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst)
: "memory");
return res;
}
long
strncpy_from_user(char *dst, const char *src, long count)
long strncpy_from_user(char *dst, const char *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
__do_strncpy_from_user(dst, src, count, res);
return res;
return __strncpy_from_user(dst, src, count);
return -EFAULT;
}
/*
* Zero Userspace
*/
#define __do_clear_user(addr,size) \
do { \
long __d0; \
__asm__ __volatile__( \
"cld\n" \
"0: rep; stosl\n" \
" movq %2,%0\n" \
"1: rep; stosb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .quad 0b,3b\n" \
" .quad 1b,2b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
unsigned long
clear_user(void *to, unsigned long n)
unsigned long __clear_user(void *addr, unsigned long size)
{
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
long __d0;
/* no memory constraint because it doesn't change any memory gcc knows
about */
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
"0: movnti %[zero],(%[dst])\n"
" addq %[eight],%[dst]\n"
" loop 0b\n"
"4: movq %[size1],%%rcx\n"
" testl %%ecx,%%ecx\n"
" jz 2f\n"
"1: movb %b[zero],(%[dst])\n"
" incq %[dst]\n"
" loop 1b\n"
"2: sfence\n"
".section .fixup,\"ax\"\n"
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 8\n"
" .quad 0b,3b\n"
" .quad 1b,2b\n"
".previous"
: [size8] "=c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst] "(addr),
[zero] "r" (0UL), [eight] "r" (8UL));
return size;
}
unsigned long
__clear_user(void *to, unsigned long n)
unsigned long clear_user(void *to, unsigned long n)
{
__do_clear_user(to, n);
if (access_ok(VERIFY_WRITE, to, n))
return __clear_user(to, n);
return n;
}
......
/*
* linux/arch/i386/mm/extable.c
* linux/arch/x86_64/mm/extable.c
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <asm/uaccess.h>
#include <linux/init.h>
extern const struct exception_table_entry __start___ex_table[];
extern const struct exception_table_entry __stop___ex_table[];
static inline unsigned long
void __init exception_table_check(void)
{
const struct exception_table_entry *e;
unsigned long prev;
prev = 0;
for (e = __start___ex_table; e < __stop___ex_table; e++) {
if (e->insn < prev) {
panic("unordered exception table at %016lx:%016lx and %016lx:%016lx\n",
prev, e[-1].fixup,
e->insn, e->fixup);
}
prev = e->insn;
}
}
static unsigned long
search_one_table(const struct exception_table_entry *first,
const struct exception_table_entry *last,
unsigned long value)
......@@ -41,8 +59,7 @@ search_exception_table(unsigned long addr)
#ifndef CONFIG_MODULES
/* There is only the kernel to search. */
ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
return ret;
return search_one_table(__start___ex_table, __stop___ex_table-1, addr);
#else
/* The kernel is the last "module" -- no need to treat it special. */
struct module *mp;
......
......@@ -85,7 +85,7 @@ void dump_pagetable(unsigned long address)
}
int page_fault_trace;
int exception_trace;
int exception_trace = 1;
/*
* This routine handles page faults. It determines the address,
......@@ -311,11 +311,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (!pte_present(*pte))
goto bad_area_nosemaphore;
/* Strictly a flush_tlb_all because vmalloc is global,
but this only applies to new global pages so it should
not be needed. vmalloc will likely touch multiple ptes,
so do a full flush instead of a partial one. */
__flush_tlb();
__flush_tlb_all();
return;
}
}
......@@ -153,6 +153,7 @@ static struct temp_map {
} temp_mappings[] __initdata = {
{ &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
{ &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
{ &temp_boot_pmds[1], (void *)(44UL * 1024 * 1024) },
{}
};
......
......@@ -9,6 +9,7 @@
*/
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
......@@ -165,5 +166,5 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
void iounmap(void *addr)
{
if (addr > high_memory)
return vfree((void *) (PAGE_MASK & (unsigned long) addr));
vfree((void *) (PAGE_MASK & (unsigned long) addr));
}
O_TARGET := pci.o
obj-y := x86-64.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
obj-y += fixup.o
ifdef CONFIG_ACPI_PCI
obj-y += acpi.o
endif
obj-y += legacy.o
obj-y += irq.o common.o
include $(TOPDIR)/Rules.make
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/init.h>
#include "pci.h"
static int __init pci_acpi_init(void)
{
if (pcibios_scanned)
return 0;
if (!(pci_probe & PCI_NO_ACPI_ROUTING)) {
if (!acpi_pci_irq_init()) {
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi'\n");
pcibios_scanned++;
pcibios_enable_irq = acpi_pci_irq_enable;
} else
printk(KERN_WARNING "PCI: Invalid ACPI-PCI IRQ routing table\n");
}
return 0;
}
subsys_initcall(pci_acpi_init);
See arch/i386/pci/changelog for early changelog.
/*
* Low-Level PCI Support for PC
*
* (c) 1999--2000 Martin Mares <mj@ucw.cz>
Note: on x86-64 there is no PCI BIOS so there is no way to sort in the
same order as 32bit Linux. This could cause grief for dualbooting because
devices may wander. May want to use ACPI for sorting eventually.
*/
#include <linux/sched.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <asm/segment.h>
#include <asm/io.h>
#include <asm/smp.h>
#include "pci.h"
unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
int pcibios_last_bus = 0xfe; /* XXX */
struct pci_bus *pci_root_bus = NULL;
struct pci_ops *pci_root_ops = NULL;
int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
/*
* legacy, numa, and acpi all want to call pcibios_scan_root
* from their initcalls. This flag prevents that.
*/
int pcibios_scanned;
/*
* This interrupt-safe spinlock protects all accesses to PCI
* configuration space.
*/
spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
/*
* Several buggy motherboards address only 16 devices and mirror
* them to next 16 IDs. We try to detect this `feature' on all
* primary buses (those containing host bridges as they are
* expected to be unique) and remove the ghost devices.
*/
static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
{
struct list_head *ln, *mn;
struct pci_dev *d, *e;
int mirror = PCI_DEVFN(16,0);
int seen_host_bridge = 0;
int i;
DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
d = pci_dev_b(ln);
if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
seen_host_bridge++;
for (mn=ln->next; mn != &b->devices; mn=mn->next) {
e = pci_dev_b(mn);
if (e->devfn != d->devfn + mirror ||
e->vendor != d->vendor ||
e->device != d->device ||
e->class != d->class)
continue;
for(i=0; i<PCI_NUM_RESOURCES; i++)
if (e->resource[i].start != d->resource[i].start ||
e->resource[i].end != d->resource[i].end ||
e->resource[i].flags != d->resource[i].flags)
continue;
break;
}
if (mn == &b->devices)
return;
}
if (!seen_host_bridge)
return;
printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
ln = &b->devices;
while (ln->next != &b->devices) {
d = pci_dev_b(ln->next);
if (d->devfn >= mirror) {
list_del(&d->global_list);
list_del(&d->bus_list);
kfree(d);
} else
ln = ln->next;
}
}
/*
* Called after each bus is probed, but before its children
* are examined.
*/
void __devinit pcibios_fixup_bus(struct pci_bus *b)
{
pcibios_fixup_ghosts(b);
pci_read_bridge_bases(b);
}
struct pci_bus * __devinit pcibios_scan_root(int busnum)
{
struct list_head *list;
struct pci_bus *bus;
list_for_each(list, &pci_root_buses) {
bus = pci_bus_b(list);
if (bus->number == busnum) {
/* Already scanned */
return bus;
}
}
printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
return pci_scan_bus(busnum, pci_root_ops, NULL);
}
static int __init pcibios_init(void)
{
if (!pci_root_ops) {
printk("PCI: System does not support PCI\n");
return 0;
}
pcibios_resource_survey();
/* may eventually need to do ACPI sort here. */
return 0;
}
subsys_initcall(pcibios_init);
char * __devinit pcibios_setup(char *str)
{
if (!strcmp(str, "off")) {
pci_probe = 0;
return NULL;
}
#ifdef CONFIG_PCI_DIRECT
else if (!strcmp(str, "conf1")) {
pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
return NULL;
}
else if (!strcmp(str, "conf2")) {
pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
return NULL;
}
#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
return NULL;
}
#endif
else if (!strcmp(str, "rom")) {
pci_probe |= PCI_ASSIGN_ROMS;
return NULL;
} else if (!strcmp(str, "assign-busses")) {
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
return NULL;
} else if (!strcmp(str, "usepirqmask")) {
pci_probe |= PCI_USE_PIRQ_MASK;
return NULL;
} else if (!strncmp(str, "irqmask=", 8)) {
pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
return NULL;
} else if (!strncmp(str, "lastbus=", 8)) {
pcibios_last_bus = simple_strtol(str+8, NULL, 0);
return NULL;
}
return str;
}
unsigned int pcibios_assign_all_busses(void)
{
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
int pcibios_enable_device(struct pci_dev *dev)
{
int err;
if ((err = pcibios_enable_resources(dev)) < 0)
return err;
return pcibios_enable_irq(dev);
}
/*
* Exceptions for specific devices. Usually work-arounds for fatal design flaws.
*
Short list on x86-64........so far.
*/
#include <linux/pci.h>
#include <linux/init.h>
#include "pci.h"
static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
{
/*
* NCR 53C810 returns class code 0 (at least on some systems).
* Fix class to be PCI_CLASS_STORAGE_SCSI
*/
if (!d->class) {
printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
d->class = PCI_CLASS_STORAGE_SCSI << 8;
}
}
static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
{
int i;
/*
* PCI IDE controllers use non-standard I/O port decoding, respect it.
*/
if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
return;
DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
for(i=0; i<4; i++) {
struct resource *r = &d->resource[i];
if ((r->start & ~0x80) == 0x374) {
r->start |= 2;
r->end = r->start;
}
}
}
static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
{
int i;
/*
* There exist PCI IDE controllers which have utter garbage
* in first four base registers. Ignore that.
*/
DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
for(i=0; i<4; i++)
d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
}
struct pci_fixup pcibios_fixups[] = {
{ PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
{ 0 }
};
This diff is collapsed.
This diff is collapsed.
......@@ -3,8 +3,8 @@
*/
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
jiffies = jiffies_64;
ENTRY(_start)
jiffies_64 = jiffies;
SECTIONS
{
. = 0xffffffff80100000;
......
......@@ -41,15 +41,9 @@ static __inline__ void apic_wait_icr_idle(void)
while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
}
#ifdef CONFIG_X86_GOOD_APIC
# define FORCE_READ_AROUND_WRITE 0
# define apic_read_around(x)
# define apic_write_around(x,y) apic_write((x),(y))
#else
# define FORCE_READ_AROUND_WRITE 1
# define apic_read_around(x) apic_read(x)
# define apic_write_around(x,y) apic_write_atomic((x),(y))
#endif
#define FORCE_READ_AROUND_WRITE 0
#define apic_read_around(x)
#define apic_write_around(x,y) apic_write((x),(y))
static inline void ack_APIC_irq(void)
{
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment