Commit 62395efd authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/asm' into tracing/syscalls

We need the wider TIF work-mask checks in entry_32.S.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents ccd50dfd 88200bc2
...@@ -933,6 +933,12 @@ config X86_CPUID ...@@ -933,6 +933,12 @@ config X86_CPUID
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid. /dev/cpu/31/cpuid.
config X86_CPU_DEBUG
tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
---help---
If you select this option, this will provide various x86 CPUs
information through debugfs.
choice choice
prompt "High Memory Support" prompt "High Memory Support"
default HIGHMEM4G if !X86_NUMAQ default HIGHMEM4G if !X86_NUMAQ
...@@ -1433,7 +1439,7 @@ config CRASH_DUMP ...@@ -1433,7 +1439,7 @@ config CRASH_DUMP
config KEXEC_JUMP config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)" bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL depends on EXPERIMENTAL
depends on KEXEC && HIBERNATION && X86_32 depends on KEXEC && HIBERNATION
---help--- ---help---
Jump between original kernel and kexeced kernel and invoke Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC code in physical address mode via KEXEC
......
...@@ -6,26 +6,23 @@ ...@@ -6,26 +6,23 @@
# for more details. # for more details.
# #
# Copyright (C) 1994 by Linus Torvalds # Copyright (C) 1994 by Linus Torvalds
# Changed by many, many contributors over the years.
# #
# ROOT_DEV specifies the default root-device when making the image. # ROOT_DEV specifies the default root-device when making the image.
# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case # This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
# the default of FLOPPY is used by 'build'. # the default of FLOPPY is used by 'build'.
ROOT_DEV := CURRENT ROOT_DEV := CURRENT
# If you want to preset the SVGA mode, uncomment the next line and # If you want to preset the SVGA mode, uncomment the next line and
# set SVGA_MODE to whatever number you want. # set SVGA_MODE to whatever number you want.
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
# The number is the same as you would ordinarily press at bootup. # The number is the same as you would ordinarily press at bootup.
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
# If you want the RAM disk device, define this to be the size in blocks. targets := vmlinux.bin setup.bin setup.elf bzImage
#RAMDISK := -DRAMDISK=512
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
subdir- := compressed subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
...@@ -71,17 +68,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ ...@@ -71,17 +68,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
KBUILD_CFLAGS += $(call cc-option,-m32) KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) $(obj)/bzImage: asflags-y := $(SVGA_MODE)
$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
quiet_cmd_image = BUILD $@ quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
$(obj)/vmlinux.bin $(ROOT_DEV) > $@ $(ROOT_DEV) > $@
$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image) $(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')' @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
......
...@@ -24,12 +24,8 @@ ...@@ -24,12 +24,8 @@
#include "boot.h" #include "boot.h"
#include "offsets.h" #include "offsets.h"
SETUPSECTS = 4 /* default nr of setup-sectors */
BOOTSEG = 0x07C0 /* original address of boot-sector */ BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ SYSSEG = 0x1000 /* historical load address >> 4 */
SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
/* to be loaded */
ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
#ifndef SVGA_MODE #ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA #define SVGA_MODE ASK_VGA
...@@ -97,12 +93,12 @@ bugger_off_msg: ...@@ -97,12 +93,12 @@ bugger_off_msg:
.section ".header", "a" .section ".header", "a"
.globl hdr .globl hdr
hdr: hdr:
setup_sects: .byte SETUPSECTS setup_sects: .byte 0 /* Filled in by build.c */
root_flags: .word ROOT_RDONLY root_flags: .word ROOT_RDONLY
syssize: .long SYSSIZE syssize: .long 0 /* Filled in by build.c */
ram_size: .word RAMDISK ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE vid_mode: .word SVGA_MODE
root_dev: .word ROOT_DEV root_dev: .word 0 /* Filled in by build.c */
boot_flag: .word 0xAA55 boot_flag: .word 0xAA55
# offset 512, entry point # offset 512, entry point
...@@ -123,14 +119,15 @@ _start: ...@@ -123,14 +119,15 @@ _start:
# or else old loadlin-1.5 will fail) # or else old loadlin-1.5 will fail)
.globl realmode_swtch .globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
# in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string .word kernel_version-512 # pointing to kernel version string
# above section of header is compatible # above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't # with loadlin-1.5 (header v1.5). Don't
# change it. # change it.
type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# Bootlin, SYSLX, bootsect...) # bootloaders know to change this.
# See Documentation/i386/boot.txt for # See Documentation/i386/boot.txt for
# assigned ids # assigned ids
...@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set ...@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
# space behind setup.S can be used for # space behind setup.S can be used for
# heap purposes. # heap purposes.
# Only the loader knows what is free # Only the loader knows what is free
#ifndef __BIG_KERNEL__
.byte 0
#else
.byte LOADED_HIGH .byte LOADED_HIGH
#endif
setup_move_size: .word 0x8000 # size to move, when setup is not setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup # loaded at 0x90000. We will move setup
...@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not ...@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
code32_start: # here loaders can put a different code32_start: # here loaders can put a different
# start address for 32-bit code. # start address for 32-bit code.
#ifndef __BIG_KERNEL__
.long 0x1000 # 0x1000 = default for zImage
#else
.long 0x100000 # 0x100000 = default for big kernel .long 0x100000 # 0x100000 = default for big kernel
#endif
ramdisk_image: .long 0 # address of loaded ramdisk image ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit # Here the loader puts the 32-bit
......
...@@ -32,47 +32,6 @@ static void realmode_switch_hook(void) ...@@ -32,47 +32,6 @@ static void realmode_switch_hook(void)
} }
} }
/*
* A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
* A bzImage kernel is loaded and runs at 0x100000.
*/
static void move_kernel_around(void)
{
/* Note: rely on the compile-time option here rather than
the LOADED_HIGH flag. The Qemu kernel loader unconditionally
sets the loadflags to zero. */
#ifndef __BIG_KERNEL__
u16 dst_seg, src_seg;
u32 syssize;
dst_seg = 0x1000 >> 4;
src_seg = 0x10000 >> 4;
syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
while (syssize) {
int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
int dwords = paras << 2;
asm volatile("pushw %%es ; "
"pushw %%ds ; "
"movw %1,%%es ; "
"movw %2,%%ds ; "
"xorw %%di,%%di ; "
"xorw %%si,%%si ; "
"rep;movsl ; "
"popw %%ds ; "
"popw %%es"
: "+c" (dwords)
: "r" (dst_seg), "r" (src_seg)
: "esi", "edi");
syssize -= paras;
dst_seg += paras;
src_seg += paras;
}
#endif
}
/* /*
* Disable all interrupts at the legacy PIC. * Disable all interrupts at the legacy PIC.
*/ */
...@@ -147,9 +106,6 @@ void go_to_protected_mode(void) ...@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
/* Hook before leaving real mode, also disables interrupts */ /* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook(); realmode_switch_hook();
/* Move the kernel/setup to their final resting places */
move_kernel_around();
/* Enable the A20 gate */ /* Enable the A20 gate */
if (enable_a20()) { if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n"); puts("A20 gate not responding, unable to boot...\n");
......
...@@ -130,7 +130,7 @@ static void die(const char * str, ...) ...@@ -130,7 +130,7 @@ static void die(const char * str, ...)
static void usage(void) static void usage(void)
{ {
die("Usage: build [-b] setup system [rootdev] [> image]"); die("Usage: build setup system [rootdev] [> image]");
} }
int main(int argc, char ** argv) int main(int argc, char ** argv)
...@@ -145,11 +145,6 @@ int main(int argc, char ** argv) ...@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
void *kernel; void *kernel;
u32 crc = 0xffffffffUL; u32 crc = 0xffffffffUL;
if (argc > 2 && !strcmp(argv[1], "-b"))
{
is_big_kernel = 1;
argc--, argv++;
}
if ((argc < 3) || (argc > 4)) if ((argc < 3) || (argc > 4))
usage(); usage();
if (argc > 3) { if (argc > 3) {
...@@ -216,8 +211,6 @@ int main(int argc, char ** argv) ...@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
die("Unable to mmap '%s': %m", argv[2]); die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */ /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16; sys_size = (sz + 15 + 4) / 16;
if (!is_big_kernel && sys_size > DEF_SYSSIZE)
die("System is too big. Try using bzImage or modules.");
/* Patch the setup code with the appropriate size parameters */ /* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1; buf[0x1f1] = setup_sectors-1;
......
#ifndef _ASM_X86_BOOT_H #ifndef _ASM_X86_BOOT_H
#define _ASM_X86_BOOT_H #define _ASM_X86_BOOT_H
/* Don't touch these, unless you really know what you're doing. */
#define DEF_SYSSEG 0x1000
#define DEF_SYSSIZE 0x7F00
/* Internal svga startup constants */ /* Internal svga startup constants */
#define NORMAL_VGA 0xffff /* 80x25 mode */ #define NORMAL_VGA 0xffff /* 80x25 mode */
#define EXTENDED_VGA 0xfffe /* 80x50 mode */ #define EXTENDED_VGA 0xfffe /* 80x50 mode */
......
#ifndef _ASM_X86_CPU_DEBUG_H
#define _ASM_X86_CPU_DEBUG_H
/*
* CPU x86 architecture debug
*
* Copyright(C) 2009 Jaswinder Singh Rajput
*/
/* Register flags */
enum cpu_debug_bit {
/* Model Specific Registers (MSRs) */
CPU_MC_BIT, /* Machine Check */
CPU_MONITOR_BIT, /* Monitor */
CPU_TIME_BIT, /* Time */
CPU_PMC_BIT, /* Performance Monitor */
CPU_PLATFORM_BIT, /* Platform */
CPU_APIC_BIT, /* APIC */
CPU_POWERON_BIT, /* Power-on */
CPU_CONTROL_BIT, /* Control */
CPU_FEATURES_BIT, /* Features control */
CPU_LBRANCH_BIT, /* Last Branch */
CPU_BIOS_BIT, /* BIOS */
CPU_FREQ_BIT, /* Frequency */
CPU_MTTR_BIT, /* MTRR */
CPU_PERF_BIT, /* Performance */
CPU_CACHE_BIT, /* Cache */
CPU_SYSENTER_BIT, /* Sysenter */
CPU_THERM_BIT, /* Thermal */
CPU_MISC_BIT, /* Miscellaneous */
CPU_DEBUG_BIT, /* Debug */
CPU_PAT_BIT, /* PAT */
CPU_VMX_BIT, /* VMX */
CPU_CALL_BIT, /* System Call */
CPU_BASE_BIT, /* BASE Address */
CPU_SMM_BIT, /* System mgmt mode */
CPU_SVM_BIT, /*Secure Virtual Machine*/
CPU_OSVM_BIT, /* OS-Visible Workaround*/
/* Standard Registers */
CPU_TSS_BIT, /* Task Stack Segment */
CPU_CR_BIT, /* Control Registers */
CPU_DT_BIT, /* Descriptor Table */
/* End of Registers flags */
CPU_REG_ALL_BIT, /* Select all Registers */
};
#define CPU_REG_ALL (~0) /* Select all Registers */
#define CPU_MC (1 << CPU_MC_BIT)
#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
#define CPU_TIME (1 << CPU_TIME_BIT)
#define CPU_PMC (1 << CPU_PMC_BIT)
#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
#define CPU_APIC (1 << CPU_APIC_BIT)
#define CPU_POWERON (1 << CPU_POWERON_BIT)
#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
#define CPU_BIOS (1 << CPU_BIOS_BIT)
#define CPU_FREQ (1 << CPU_FREQ_BIT)
#define CPU_MTRR (1 << CPU_MTTR_BIT)
#define CPU_PERF (1 << CPU_PERF_BIT)
#define CPU_CACHE (1 << CPU_CACHE_BIT)
#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
#define CPU_THERM (1 << CPU_THERM_BIT)
#define CPU_MISC (1 << CPU_MISC_BIT)
#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
#define CPU_PAT (1 << CPU_PAT_BIT)
#define CPU_VMX (1 << CPU_VMX_BIT)
#define CPU_CALL (1 << CPU_CALL_BIT)
#define CPU_BASE (1 << CPU_BASE_BIT)
#define CPU_SMM (1 << CPU_SMM_BIT)
#define CPU_SVM (1 << CPU_SVM_BIT)
#define CPU_OSVM (1 << CPU_OSVM_BIT)
#define CPU_TSS (1 << CPU_TSS_BIT)
#define CPU_CR (1 << CPU_CR_BIT)
#define CPU_DT (1 << CPU_DT_BIT)
/* Register file flags */
enum cpu_file_bit {
CPU_INDEX_BIT, /* index */
CPU_VALUE_BIT, /* value */
};
#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
/*
* DisplayFamily_DisplayModel Processor Families/Processor Number Series
* -------------------------- ------------------------------------------
* 05_01, 05_02, 05_04 Pentium, Pentium with MMX
*
* 06_01 Pentium Pro
* 06_03, 06_05 Pentium II Xeon, Pentium II
* 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
*
* 06_09, 060D Pentium M
*
* 06_0E Core Duo, Core Solo
*
* 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
* Core 2 Quad, Core 2 Extreme, Core 2 Duo,
* Pentium dual-core
* 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
*
* 06_1C Atom
*
* 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
* 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
*
* 0F_06 Xeon 7100, 5000 Series, Xeon MP,
* Pentium 4, Pentium D
*/
/* Register processors bits */
enum cpu_processor_bit {
CPU_NONE,
/* Intel */
CPU_INTEL_PENTIUM_BIT,
CPU_INTEL_P6_BIT,
CPU_INTEL_PENTIUM_M_BIT,
CPU_INTEL_CORE_BIT,
CPU_INTEL_CORE2_BIT,
CPU_INTEL_ATOM_BIT,
CPU_INTEL_XEON_P4_BIT,
CPU_INTEL_XEON_MP_BIT,
};
#define CPU_ALL (~0) /* Select all CPUs */
#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
/* Select all Intel CPUs*/
#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
#define MAX_CPU_FILES 512
struct cpu_private {
unsigned cpu;
unsigned type;
unsigned reg;
unsigned file;
};
struct cpu_debug_base {
char *name; /* Register name */
unsigned flag; /* Register flag */
};
struct cpu_cpuX_base {
struct dentry *dentry; /* Register dentry */
int init; /* Register index file */
};
struct cpu_file_base {
char *name; /* Register file name */
unsigned flag; /* Register file flag */
};
struct cpu_debug_range {
unsigned min; /* Register range min */
unsigned max; /* Register range max */
unsigned flag; /* Supported flags */
unsigned model; /* Supported models */
};
#endif /* _ASM_X86_CPU_DEBUG_H */
...@@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr) ...@@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr)
#define store_gdt(dtr) native_store_gdt(dtr) #define store_gdt(dtr) native_store_gdt(dtr)
#define store_idt(dtr) native_store_idt(dtr) #define store_idt(dtr) native_store_idt(dtr)
#define store_tr(tr) (tr = native_store_tr()) #define store_tr(tr) (tr = native_store_tr())
#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
#define load_TLS(t, cpu) native_load_tls(t, cpu) #define load_TLS(t, cpu) native_load_tls(t, cpu)
#define set_ldt native_set_ldt #define set_ldt native_set_ldt
...@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) ...@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
} }
#endif /* CONFIG_PARAVIRT */ #endif /* CONFIG_PARAVIRT */
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
static inline void native_write_idt_entry(gate_desc *idt, int entry, static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate) const gate_desc *gate)
{ {
......
...@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); ...@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
void *kmap_atomic(struct page *page, enum km_type type); void *kmap_atomic(struct page *page, enum km_type type);
void kunmap_atomic(void *kvaddr, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type);
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr); struct page *kmap_atomic_to_page(void *ptr);
#ifndef CONFIG_PARAVIRT #ifndef CONFIG_PARAVIRT
......
...@@ -9,13 +9,13 @@ ...@@ -9,13 +9,13 @@
# define PAGES_NR 4 # define PAGES_NR 4
#else #else
# define PA_CONTROL_PAGE 0 # define PA_CONTROL_PAGE 0
# define PA_TABLE_PAGE 1 # define VA_CONTROL_PAGE 1
# define PAGES_NR 2 # define PA_TABLE_PAGE 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#endif #endif
#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
...@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page, ...@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae, unsigned int has_pae,
unsigned int preserve_context); unsigned int preserve_context);
#else #else
NORET_TYPE void unsigned long
relocate_kernel(unsigned long indirection_page, relocate_kernel(unsigned long indirection_page,
unsigned long page_list, unsigned long page_list,
unsigned long start_address) ATTRIB_NORET; unsigned long start_address,
unsigned int preserve_context);
#endif #endif
#define ARCH_HAS_KIMAGE_ARCH #define ARCH_HAS_KIMAGE_ARCH
......
#ifndef _ASM_X86_LINKAGE_H #ifndef _ASM_X86_LINKAGE_H
#define _ASM_X86_LINKAGE_H #define _ASM_X86_LINKAGE_H
#include <linux/stringify.h>
#undef notrace #undef notrace
#define notrace __attribute__((no_instrument_function)) #define notrace __attribute__((no_instrument_function))
...@@ -53,14 +55,9 @@ ...@@ -53,14 +55,9 @@
.globl name; \ .globl name; \
name: name:
#ifdef CONFIG_X86_64 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
#define __ALIGN .p2align 4,,15 #define __ALIGN .p2align 4, 0x90
#define __ALIGN_STR ".p2align 4,,15" #define __ALIGN_STR __stringify(__ALIGN)
#endif
#ifdef CONFIG_X86_ALIGNMENT_16
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
#endif #endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o ...@@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_X86_64) += bugs_64.o
obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
......
...@@ -1078,8 +1078,7 @@ void __cpuinit cpu_init(void) ...@@ -1078,8 +1078,7 @@ void __cpuinit cpu_init(void)
atomic_inc(&init_mm.mm_count); atomic_inc(&init_mm.mm_count);
me->active_mm = &init_mm; me->active_mm = &init_mm;
if (me->mm) BUG_ON(me->mm);
BUG();
enter_lazy_tlb(&init_mm, me); enter_lazy_tlb(&init_mm, me);
load_sp0(t, &current->thread); load_sp0(t, &current->thread);
...@@ -1145,8 +1144,7 @@ void __cpuinit cpu_init(void) ...@@ -1145,8 +1144,7 @@ void __cpuinit cpu_init(void)
*/ */
atomic_inc(&init_mm.mm_count); atomic_inc(&init_mm.mm_count);
curr->active_mm = &init_mm; curr->active_mm = &init_mm;
if (curr->mm) BUG_ON(curr->mm);
BUG();
enter_lazy_tlb(&init_mm, curr); enter_lazy_tlb(&init_mm, curr);
load_sp0(t, thread); load_sp0(t, thread);
......
This diff is collapsed.
...@@ -639,7 +639,7 @@ static void mce_init_timer(void) ...@@ -639,7 +639,7 @@ static void mce_init_timer(void)
if (!next_interval) if (!next_interval)
return; return;
setup_timer(t, mcheck_timer, smp_processor_id()); setup_timer(t, mcheck_timer, smp_processor_id());
t->expires = round_jiffies_relative(jiffies + next_interval); t->expires = round_jiffies(jiffies + next_interval);
add_timer(t); add_timer(t);
} }
...@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, ...@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break; break;
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN: case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies_relative(jiffies + next_interval); t->expires = round_jiffies(jiffies + next_interval);
add_timer_on(t, cpu); add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break; break;
......
...@@ -442,8 +442,7 @@ sysenter_past_esp: ...@@ -442,8 +442,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp) GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz sysenter_audit jnz sysenter_audit
sysenter_do_call: sysenter_do_call:
cmpl $(nr_syscalls), %eax cmpl $(nr_syscalls), %eax
...@@ -454,7 +453,7 @@ sysenter_do_call: ...@@ -454,7 +453,7 @@ sysenter_do_call:
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx testl $_TIF_ALLWORK_MASK, %ecx
jne sysexit_audit jne sysexit_audit
sysenter_exit: sysenter_exit:
/* if something modifies registers it must also disable sysexit */ /* if something modifies registers it must also disable sysexit */
...@@ -468,7 +467,7 @@ sysenter_exit: ...@@ -468,7 +467,7 @@ sysenter_exit:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
sysenter_audit: sysenter_audit:
testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry jnz syscall_trace_entry
addl $4,%esp addl $4,%esp
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
...@@ -485,7 +484,7 @@ sysenter_audit: ...@@ -485,7 +484,7 @@ sysenter_audit:
jmp sysenter_do_call jmp sysenter_do_call
sysexit_audit: sysexit_audit:
testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work jne syscall_exit_work
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) ENABLE_INTERRUPTS(CLBR_ANY)
...@@ -498,7 +497,7 @@ sysexit_audit: ...@@ -498,7 +497,7 @@ sysexit_audit:
DISABLE_INTERRUPTS(CLBR_ANY) DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work jne syscall_exit_work
movl PT_EAX(%esp),%eax /* reload syscall return value */ movl PT_EAX(%esp),%eax /* reload syscall return value */
jmp sysenter_exit jmp sysenter_exit
...@@ -523,8 +522,7 @@ ENTRY(system_call) ...@@ -523,8 +522,7 @@ ENTRY(system_call)
SAVE_ALL SAVE_ALL
GET_THREAD_INFO(%ebp) GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation # system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax cmpl $(nr_syscalls), %eax
jae syscall_badsys jae syscall_badsys
...@@ -538,7 +536,7 @@ syscall_exit: ...@@ -538,7 +536,7 @@ syscall_exit:
# between sampling and the iret # between sampling and the iret
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work testl $_TIF_ALLWORK_MASK, %ecx # current->work
jne syscall_exit_work jne syscall_exit_work
restore_all: restore_all:
...@@ -673,7 +671,7 @@ END(syscall_trace_entry) ...@@ -673,7 +671,7 @@ END(syscall_trace_entry)
# perform syscall exit tracing # perform syscall exit tracing
ALIGN ALIGN
syscall_exit_work: syscall_exit_work:
testb $_TIF_WORK_SYSCALL_EXIT, %cl testl $_TIF_WORK_SYSCALL_EXIT, %ecx
jz work_pending jz work_pending
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
......
...@@ -368,6 +368,7 @@ ENTRY(save_rest) ...@@ -368,6 +368,7 @@ ENTRY(save_rest)
END(save_rest) END(save_rest)
/* save complete stack frame */ /* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid) ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8 XCPT_FRAME 1 RDI+8
cld cld
...@@ -396,6 +397,7 @@ ENTRY(save_paranoid) ...@@ -396,6 +397,7 @@ ENTRY(save_paranoid)
1: ret 1: ret
CFI_ENDPROC CFI_ENDPROC
END(save_paranoid) END(save_paranoid)
.popsection
/* /*
* A newly forked process directly context switches into this address. * A newly forked process directly context switches into this address.
...@@ -416,7 +418,6 @@ ENTRY(ret_from_fork) ...@@ -416,7 +418,6 @@ ENTRY(ret_from_fork)
GET_THREAD_INFO(%rcx) GET_THREAD_INFO(%rcx)
CFI_REMEMBER_STATE
RESTORE_REST RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
...@@ -428,7 +429,6 @@ ENTRY(ret_from_fork) ...@@ -428,7 +429,6 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath jmp ret_from_sys_call # go to the SYSRET fastpath
CFI_RESTORE_STATE
CFI_ENDPROC CFI_ENDPROC
END(ret_from_fork) END(ret_from_fork)
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/io.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/io.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -63,7 +63,7 @@ static void load_segments(void) ...@@ -63,7 +63,7 @@ static void load_segments(void)
"\tmovl %%eax,%%fs\n" "\tmovl %%eax,%%fs\n"
"\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n" "\tmovl %%eax,%%ss\n"
::: "eax", "memory"); : : : "eax", "memory");
#undef STR #undef STR
#undef __STR #undef __STR
} }
...@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image) ...@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
if (image->preserve_context) { if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
/* We need to put APICs in legacy mode so that we can /*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump * get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in * paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need * one form or other. kexec jump path also need
...@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image) ...@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT); << PAGE_SHIFT);
/* The segment registers are funny things, they have both a /*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is * visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded * set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the * with from a table in memory. At no other time is the
...@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image) ...@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value. * segments, before I zap the gdt with an invalid value.
*/ */
load_segments(); load_segments();
/* The gdt & idt are now invalid. /*
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt. * If you want to load them you must set up your own idt & gdt.
*/ */
set_gdt(phys_to_virt(0),0); set_gdt(phys_to_virt(0), 0);
set_idt(phys_to_virt(0),0); set_idt(phys_to_virt(0), 0);
/* now call it */ /* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head, image->start = relocate_kernel_ptr((unsigned long)image->head,
......
...@@ -12,11 +12,47 @@ ...@@ -12,11 +12,47 @@
#include <linux/reboot.h> #include <linux/reboot.h>
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/suspend.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/io.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
{
pud_t *pud;
pmd_t *pmd;
struct page *page;
int result = -ENOMEM;
addr &= PMD_MASK;
pgd += pgd_index(addr);
if (!pgd_present(*pgd)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pud = (pud_t *)page_address(page);
memset(pud, 0, PAGE_SIZE);
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
}
pud = pud_offset(pgd, addr);
if (!pud_present(*pud)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pmd = (pmd_t *)page_address(page);
memset(pmd, 0, PAGE_SIZE);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd))
set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
result = 0;
out:
return result;
}
static void init_level2_page(pmd_t *level2p, unsigned long addr) static void init_level2_page(pmd_t *level2p, unsigned long addr)
{ {
...@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, ...@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
} }
level3p = (pud_t *)page_address(page); level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr); result = init_level3_page(image, level3p, addr, last_addr);
if (result) { if (result)
goto out; goto out;
}
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE; addr += PGDIR_SIZE;
} }
...@@ -154,6 +189,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) ...@@ -154,6 +189,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
int result; int result;
level4p = (pgd_t *)__va(start_pgtable); level4p = (pgd_t *)__va(start_pgtable);
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
/*
* image->start may be outside 0 ~ max_pfn, for example when
* jump back to original kernel from kexeced kernel
*/
result = init_one_level2_page(image, level4p, image->start);
if (result) if (result)
return result; return result;
return init_transition_pgtable(image, level4p); return init_transition_pgtable(image, level4p);
...@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image) ...@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
{ {
unsigned long page_list[PAGES_NR]; unsigned long page_list[PAGES_NR];
void *control_page; void *control_page;
int save_ftrace_enabled;
tracer_disable(); #ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
save_processor_state();
#endif
save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */ /* Interrupts aren't acceptable while we reboot */
local_irq_disable(); local_irq_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
* one.
*/
disable_IO_APIC();
#endif
}
control_page = page_address(image->control_code_page) + PAGE_SIZE; control_page = page_address(image->control_code_page) + PAGE_SIZE;
memcpy(control_page, relocate_kernel, PAGE_SIZE); memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] = page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page)); (unsigned long)__pa(page_address(image->control_code_page));
/* The segment registers are funny things, they have both a if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
/*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is * visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded * set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the * with from a table in memory. At no other time is the
...@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image) ...@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value. * segments, before I zap the gdt with an invalid value.
*/ */
load_segments(); load_segments();
/* The gdt & idt are now invalid. /*
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt. * If you want to load them you must set up your own idt & gdt.
*/ */
set_gdt(phys_to_virt(0),0); set_gdt(phys_to_virt(0), 0);
set_idt(phys_to_virt(0),0); set_idt(phys_to_virt(0), 0);
/* now call it */ /* now call it */
relocate_kernel((unsigned long)image->head, (unsigned long)page_list, image->start = relocate_kernel((unsigned long)image->head,
image->start); (unsigned long)page_list,
image->start,
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
restore_processor_state();
#endif
__ftrace_enabled_restore(save_ftrace_enabled);
} }
void arch_crash_save_vmcoreinfo(void) void arch_crash_save_vmcoreinfo(void)
......
...@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void) ...@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void)
if (!force_hpet_address) if (!force_hpet_address)
return; return;
if (rcba_base == NULL) BUG_ON(rcba_base == NULL);
BUG();
/* read the Function Disable register, dword mode only */ /* read the Function Disable register, dword mode only */
val = readl(rcba_base + 0x3404); val = readl(rcba_base + 0x3404);
......
...@@ -17,7 +17,8 @@ ...@@ -17,7 +17,8 @@
#define PTR(x) (x << 2) #define PTR(x) (x << 2)
/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE /*
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for * ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back * jumping back
*/ */
...@@ -76,8 +77,10 @@ relocate_kernel: ...@@ -76,8 +77,10 @@ relocate_kernel:
movl %eax, CP_PA_SWAP_PAGE(%edi) movl %eax, CP_PA_SWAP_PAGE(%edi)
movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
/* get physical address of control page now */ /*
/* this is impossible after page table switch */ * get physical address of control page now
* this is impossible after page table switch
*/
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
/* switch to new set of page tables */ /* switch to new set of page tables */
...@@ -97,7 +100,8 @@ identity_mapped: ...@@ -97,7 +100,8 @@ identity_mapped:
/* store the start address on the stack */ /* store the start address on the stack */
pushl %edx pushl %edx
/* Set cr0 to a known state: /*
* Set cr0 to a known state:
* - Paging disabled * - Paging disabled
* - Alignment check disabled * - Alignment check disabled
* - Write protect disabled * - Write protect disabled
...@@ -113,7 +117,8 @@ identity_mapped: ...@@ -113,7 +117,8 @@ identity_mapped:
/* clear cr4 if applicable */ /* clear cr4 if applicable */
testl %ecx, %ecx testl %ecx, %ecx
jz 1f jz 1f
/* Set cr4 to a known state: /*
* Set cr4 to a known state:
* Setting everything to zero seems safe. * Setting everything to zero seems safe.
*/ */
xorl %eax, %eax xorl %eax, %eax
...@@ -132,15 +137,18 @@ identity_mapped: ...@@ -132,15 +137,18 @@ identity_mapped:
call swap_pages call swap_pages
addl $8, %esp addl $8, %esp
/* To be certain of avoiding problems with self-modifying code /*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here. * I need to execute a serializing instruction here.
* So I flush the TLB, it's handy, and not processor dependent. * So I flush the TLB, it's handy, and not processor dependent.
*/ */
xorl %eax, %eax xorl %eax, %eax
movl %eax, %cr3 movl %eax, %cr3
/* set all of the registers to known values */ /*
/* leave %esp alone */ * set all of the registers to known values
* leave %esp alone
*/
testl %esi, %esi testl %esi, %esi
jnz 1f jnz 1f
......
...@@ -19,29 +19,77 @@ ...@@ -19,29 +19,77 @@
#define PTR(x) (x << 3) #define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
/*
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
/* Minimal CPU state */
#define RSP DATA(0x0)
#define CR0 DATA(0x8)
#define CR3 DATA(0x10)
#define CR4 DATA(0x18)
/* other data */
#define CP_PA_TABLE_PAGE DATA(0x20)
#define CP_PA_SWAP_PAGE DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
.text .text
.align PAGE_SIZE .align PAGE_SIZE
.code64 .code64
.globl relocate_kernel .globl relocate_kernel
relocate_kernel: relocate_kernel:
/* %rdi indirection_page /*
* %rdi indirection_page
* %rsi page_list * %rsi page_list
* %rdx start address * %rdx start address
* %rcx preserve_context
*/ */
/* Save the CPU context, used for jumping back */
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushf
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
movq %rsp, RSP(%r11)
movq %cr0, %rax
movq %rax, CR0(%r11)
movq %cr3, %rax
movq %rax, CR3(%r11)
movq %cr4, %rax
movq %rax, CR4(%r11)
/* zero out flags, and disable interrupts */ /* zero out flags, and disable interrupts */
pushq $0 pushq $0
popfq popfq
/* get physical address of control page now */ /*
/* this is impossible after page table switch */ * get physical address of control page now
* this is impossible after page table switch
*/
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
/* get physical address of page table now too */ /* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx movq PTR(PA_TABLE_PAGE)(%rsi), %r9
/* get physical address of swap page now */
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
/* save some information for jumping back */
movq %r9, CP_PA_TABLE_PAGE(%r11)
movq %r10, CP_PA_SWAP_PAGE(%r11)
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
/* Switch to the identity mapped page tables */ /* Switch to the identity mapped page tables */
movq %rcx, %cr3 movq %r9, %cr3
/* setup a new stack at the end of the physical control page */ /* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp lea PAGE_SIZE(%r8), %rsp
...@@ -55,7 +103,8 @@ identity_mapped: ...@@ -55,7 +103,8 @@ identity_mapped:
/* store the start address on the stack */ /* store the start address on the stack */
pushq %rdx pushq %rdx
/* Set cr0 to a known state: /*
* Set cr0 to a known state:
* - Paging enabled * - Paging enabled
* - Alignment check disabled * - Alignment check disabled
* - Write protect disabled * - Write protect disabled
...@@ -68,7 +117,8 @@ identity_mapped: ...@@ -68,7 +117,8 @@ identity_mapped:
orl $(X86_CR0_PG | X86_CR0_PE), %eax orl $(X86_CR0_PG | X86_CR0_PE), %eax
movq %rax, %cr0 movq %rax, %cr0
/* Set cr4 to a known state: /*
* Set cr4 to a known state:
* - physical address extension enabled * - physical address extension enabled
*/ */
movq $X86_CR4_PAE, %rax movq $X86_CR4_PAE, %rax
...@@ -78,9 +128,87 @@ identity_mapped: ...@@ -78,9 +128,87 @@ identity_mapped:
1: 1:
/* Flush the TLB (needed?) */ /* Flush the TLB (needed?) */
movq %rcx, %cr3 movq %r9, %cr3
movq %rcx, %r11
call swap_pages
/*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3
/*
* set all of the registers to known values
* leave %rsp alone
*/
testq %r11, %r11
jnz 1f
xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
ret
1:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
call *%rdx
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
call 1f
1:
popq %r8
subq $(1b - relocate_kernel), %r8
movq CP_PA_SWAP_PAGE(%r8), %r10
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
movq CP_PA_TABLE_PAGE(%r8), %rax
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
ret
virtual_mapped:
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
movq CR3(%r8), %rax
movq CR0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0
movq %rbp, %rax
popf
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ret
/* Do the copies */ /* Do the copies */
swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */ movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi xorq %rdi, %rdi
xorq %rsi, %rsi xorq %rsi, %rsi
...@@ -112,36 +240,27 @@ identity_mapped: ...@@ -112,36 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */ movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi andq $0xfffffffffffff000, %rsi
movq %rdi, %rdx
movq %rsi, %rax
movq %r10, %rdi
movq $512, %rcx movq $512, %rcx
rep ; movsq rep ; movsq
jmp 0b
3:
/* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3
/* set all of the registers to known values */ movq %rax, %rdi
/* leave %rsp alone */ movq %rdx, %rsi
movq $512, %rcx
rep ; movsq
xorq %rax, %rax movq %rdx, %rdi
xorq %rbx, %rbx movq %r10, %rsi
xorq %rcx, %rcx movq $512, %rcx
xorq %rdx, %rdx rep ; movsq
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
ret ret
.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
...@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = { ...@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
static irqreturn_t piix4_master_intr(int irq, void *dev_id) static irqreturn_t piix4_master_intr(int irq, void *dev_id)
{ {
int realirq; int realirq;
irq_desc_t *desc; struct irq_desc *desc;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags); spin_lock_irqsave(&i8259A_lock, flags);
......
...@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ...@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0), ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area"); "irq_stack_union is not at start of per-cpu area");
#endif #endif
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big")
#endif
/* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/dwarf2.h>
/* /*
* memcpy - Copy a memory block. * memcpy - Copy a memory block.
* *
* Input: * Input:
* rdi destination * rdi destination
* rsi source * rsi source
* rdx count * rdx count
* *
* Output: * Output:
* rax original destination * rax original destination
*/ */
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* Calls to this get patched into the kernel image via the
* alternative instructions framework:
*/
ALIGN ALIGN
memcpy_c: memcpy_c:
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%rax movq %rdi, %rax
movl %edx,%ecx
shrl $3,%ecx movl %edx, %ecx
andl $7,%edx shrl $3, %ecx
andl $7, %edx
rep movsq rep movsq
movl %edx,%ecx movl %edx, %ecx
rep movsb rep movsb
ret ret
CFI_ENDPROC CFI_ENDPROC
...@@ -33,99 +41,110 @@ ENDPROC(memcpy_c) ...@@ -33,99 +41,110 @@ ENDPROC(memcpy_c)
ENTRY(__memcpy) ENTRY(__memcpy)
ENTRY(memcpy) ENTRY(memcpy)
CFI_STARTPROC CFI_STARTPROC
pushq %rbx
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbx, 0
movq %rdi,%rax
movl %edx,%ecx /*
shrl $6,%ecx * Put the number of full 64-byte blocks into %ecx.
* Tail portion is handled at the end:
*/
movq %rdi, %rax
movl %edx, %ecx
shrl $6, %ecx
jz .Lhandle_tail jz .Lhandle_tail
.p2align 4 .p2align 4
.Lloop_64: .Lloop_64:
/*
* We decrement the loop index here - and the zero-flag is
* checked at the end of the loop (instructions inbetween do
* not change the zero flag):
*/
decl %ecx decl %ecx
movq (%rsi),%r11 /*
movq 8(%rsi),%r8 * Move in blocks of 4x16 bytes:
*/
movq 0*8(%rsi), %r11
movq 1*8(%rsi), %r8
movq %r11, 0*8(%rdi)
movq %r8, 1*8(%rdi)
movq %r11,(%rdi) movq 2*8(%rsi), %r9
movq %r8,1*8(%rdi) movq 3*8(%rsi), %r10
movq %r9, 2*8(%rdi)
movq %r10, 3*8(%rdi)
movq 2*8(%rsi),%r9 movq 4*8(%rsi), %r11
movq 3*8(%rsi),%r10 movq 5*8(%rsi), %r8
movq %r11, 4*8(%rdi)
movq %r8, 5*8(%rdi)
movq %r9,2*8(%rdi) movq 6*8(%rsi), %r9
movq %r10,3*8(%rdi) movq 7*8(%rsi), %r10
movq %r9, 6*8(%rdi)
movq %r10, 7*8(%rdi)
movq 4*8(%rsi),%r11 leaq 64(%rsi), %rsi
movq 5*8(%rsi),%r8 leaq 64(%rdi), %rdi
movq %r11,4*8(%rdi)
movq %r8,5*8(%rdi)
movq 6*8(%rsi),%r9
movq 7*8(%rsi),%r10
movq %r9,6*8(%rdi)
movq %r10,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jnz .Lloop_64 jnz .Lloop_64
.Lhandle_tail: .Lhandle_tail:
movl %edx,%ecx movl %edx, %ecx
andl $63,%ecx andl $63, %ecx
shrl $3,%ecx shrl $3, %ecx
jz .Lhandle_7 jz .Lhandle_7
.p2align 4 .p2align 4
.Lloop_8: .Lloop_8:
decl %ecx decl %ecx
movq (%rsi),%r8 movq (%rsi), %r8
movq %r8,(%rdi) movq %r8, (%rdi)
leaq 8(%rdi),%rdi leaq 8(%rdi), %rdi
leaq 8(%rsi),%rsi leaq 8(%rsi), %rsi
jnz .Lloop_8 jnz .Lloop_8
.Lhandle_7: .Lhandle_7:
movl %edx,%ecx movl %edx, %ecx
andl $7,%ecx andl $7, %ecx
jz .Lende jz .Lend
.p2align 4 .p2align 4
.Lloop_1: .Lloop_1:
movb (%rsi),%r8b movb (%rsi), %r8b
movb %r8b,(%rdi) movb %r8b, (%rdi)
incq %rdi incq %rdi
incq %rsi incq %rsi
decl %ecx decl %ecx
jnz .Lloop_1 jnz .Lloop_1
.Lende: .Lend:
popq %rbx
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE rbx
ret ret
.Lfinal:
CFI_ENDPROC CFI_ENDPROC
ENDPROC(memcpy) ENDPROC(memcpy)
ENDPROC(__memcpy) ENDPROC(__memcpy)
/* Some CPUs run faster using the string copy instructions. /*
It is also a lot simpler. Use this when possible */ * Some CPUs run faster using the string copy instructions.
* It is also a lot simpler. Use this when possible:
*/
.section .altinstr_replacement,"ax" .section .altinstr_replacement, "ax"
1: .byte 0xeb /* jmp <disp8> */ 1: .byte 0xeb /* jmp <disp8> */
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
2: 2:
.previous .previous
.section .altinstructions,"a"
.section .altinstructions, "a"
.align 8 .align 8
.quad memcpy .quad memcpy
.quad 1b .quad 1b
.byte X86_FEATURE_REP_GOOD .byte X86_FEATURE_REP_GOOD
/* Replace only beginning, memcpy is used to apply alternatives, so it
* is silly to overwrite itself with nops - reboot is only outcome... */ /*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.byte 2b - 1b .byte 2b - 1b
.byte 2b - 1b .byte 2b - 1b
.previous .previous
...@@ -121,23 +121,30 @@ void kunmap_atomic(void *kvaddr, enum km_type type) ...@@ -121,23 +121,30 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
pagefault_enable(); pagefault_enable();
} }
/* This is the same as kmap_atomic() but can map memory that doesn't void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
* have a struct page associated with it.
*/
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
{ {
enum fixed_addresses idx; enum fixed_addresses idx;
unsigned long vaddr; unsigned long vaddr;
pagefault_disable(); pagefault_disable();
idx = type + KM_TYPE_NR*smp_processor_id(); debug_kmap_atomic_prot(type);
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
arch_flush_lazy_mmu_mode(); arch_flush_lazy_mmu_mode();
return (void*) vaddr; return (void*) vaddr;
} }
/* This is the same as kmap_atomic() but can map memory that doesn't
* have a struct page associated with it.
*/
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
{
return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
}
EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
struct page *kmap_atomic_to_page(void *ptr) struct page *kmap_atomic_to_page(void *ptr)
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <asm/iomap.h> #include <asm/iomap.h>
#include <asm/pat.h> #include <asm/pat.h>
#include <asm/highmem.h>
#include <linux/module.h> #include <linux/module.h>
int is_io_mapping_possible(resource_size_t base, unsigned long size) int is_io_mapping_possible(resource_size_t base, unsigned long size)
...@@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible); ...@@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible);
void * void *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{ {
enum fixed_addresses idx;
unsigned long vaddr;
pagefault_disable();
/* /*
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
* PAGE_KERNEL_WC maps to PWT, which translates to uncached if the * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
...@@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) ...@@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
prot = PAGE_KERNEL_UC_MINUS; prot = PAGE_KERNEL_UC_MINUS;
idx = type + KM_TYPE_NR*smp_processor_id(); return kmap_atomic_prot_pfn(pfn, type, prot);
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
arch_flush_lazy_mmu_mode();
return (void*) vaddr;
} }
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
......
...@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) ...@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active) { if (!ctx->active) {
pr_warning("kmmio: spurious debug trap on CPU %d.\n", pr_debug("kmmio: spurious debug trap on CPU %d.\n",
smp_processor_id()); smp_processor_id());
goto out; goto out;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment