Commit e2a6cbc0 authored by Linus Torvalds's avatar Linus Torvalds

Add "sysenter" support on x86, and a "vsyscall" page.

Instead of doing a "int 0x80" instruction for system calls,
user space can do a "call 0xfffff000" which will do the right
thing regardless of what kind of system call support the CPU
has.
parent 4749dedc
...@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o ...@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_EDD) += edd.o obj-$(CONFIG_EDD) += edd.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
obj-y += sysenter.o
EXTRA_AFLAGS := -traditional EXTRA_AFLAGS := -traditional
......
...@@ -94,7 +94,7 @@ VM_MASK = 0x00020000 ...@@ -94,7 +94,7 @@ VM_MASK = 0x00020000
movl %edx, %ds; \ movl %edx, %ds; \
movl %edx, %es; movl %edx, %es;
#define RESTORE_ALL \ #define RESTORE_REGS \
popl %ebx; \ popl %ebx; \
popl %ecx; \ popl %ecx; \
popl %edx; \ popl %edx; \
...@@ -104,14 +104,25 @@ VM_MASK = 0x00020000 ...@@ -104,14 +104,25 @@ VM_MASK = 0x00020000
popl %eax; \ popl %eax; \
1: popl %ds; \ 1: popl %ds; \
2: popl %es; \ 2: popl %es; \
addl $4, %esp; \
3: iret; \
.section .fixup,"ax"; \ .section .fixup,"ax"; \
4: movl $0,(%esp); \ 3: movl $0,(%esp); \
jmp 1b; \ jmp 1b; \
5: movl $0,(%esp); \ 4: movl $0,(%esp); \
jmp 2b; \ jmp 2b; \
6: pushl %ss; \ .previous; \
.section __ex_table,"a";\
.align 4; \
.long 1b,3b; \
.long 2b,4b; \
.previous
#define RESTORE_ALL \
RESTORE_REGS \
addl $4, %esp; \
1: iret; \
.section .fixup,"ax"; \
2: pushl %ss; \
popl %ds; \ popl %ds; \
pushl %ss; \ pushl %ss; \
popl %es; \ popl %es; \
...@@ -120,11 +131,11 @@ VM_MASK = 0x00020000 ...@@ -120,11 +131,11 @@ VM_MASK = 0x00020000
.previous; \ .previous; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.long 1b,4b; \ .long 1b,2b; \
.long 2b,5b; \
.long 3b,6b; \
.previous .previous
ENTRY(lcall7) ENTRY(lcall7)
pushfl # We get a different stack layout with call pushfl # We get a different stack layout with call
# gates, which has to be cleaned up later.. # gates, which has to be cleaned up later..
...@@ -220,6 +231,40 @@ need_resched: ...@@ -220,6 +231,40 @@ need_resched:
jmp need_resched jmp need_resched
#endif #endif
/* Points to after the "sysenter" instruction in the vsyscall page */
#define SYSENTER_RETURN 0xfffff007
# sysenter call handler stub
ALIGN
ENTRY(sysenter_entry)
sti
pushl $(__USER_DS)
pushl %ebp
pushfl
pushl $(__USER_CS)
pushl $SYSENTER_RETURN
pushl %eax
SAVE_ALL
GET_THREAD_INFO(%ebx)
cmpl $(NR_syscalls), %eax
jae syscall_badsys
testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebx)
jnz syscall_trace_entry
call *sys_call_table(,%eax,4)
movl %eax,EAX(%esp)
cli
movl TI_FLAGS(%ebx), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
RESTORE_REGS
movl 4(%esp),%edx
movl 16(%esp),%ecx
sti
sysexit
# system call handler stub # system call handler stub
ALIGN ALIGN
ENTRY(system_call) ENTRY(system_call)
......
...@@ -414,8 +414,8 @@ ENTRY(cpu_gdt_table) ...@@ -414,8 +414,8 @@ ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* 0x0b reserved */ .quad 0x0000000000000000 /* 0x0b reserved */
.quad 0x0000000000000000 /* 0x13 reserved */ .quad 0x0000000000000000 /* 0x13 reserved */
.quad 0x0000000000000000 /* 0x1b reserved */ .quad 0x0000000000000000 /* 0x1b reserved */
.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ .quad 0x0000000000000000 /* 0x20 unused */
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x28 unused */
.quad 0x0000000000000000 /* 0x33 TLS entry 1 */ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
.quad 0x0000000000000000 /* 0x3b TLS entry 2 */ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
.quad 0x0000000000000000 /* 0x43 TLS entry 3 */ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
...@@ -425,22 +425,25 @@ ENTRY(cpu_gdt_table) ...@@ -425,22 +425,25 @@ ENTRY(cpu_gdt_table)
.quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* 0x70 TSS descriptor */ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
.quad 0x0000000000000000 /* 0x78 LDT descriptor */ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
/* Segments used for calling PnP BIOS */ /* Segments used for calling PnP BIOS */
.quad 0x00c09a0000000000 /* 0x80 32-bit code */ .quad 0x00c09a0000000000 /* 0x90 32-bit code */
.quad 0x00809a0000000000 /* 0x88 16-bit code */ .quad 0x00809a0000000000 /* 0x98 16-bit code */
.quad 0x0080920000000000 /* 0x90 16-bit data */
.quad 0x0080920000000000 /* 0x98 16-bit data */
.quad 0x0080920000000000 /* 0xa0 16-bit data */ .quad 0x0080920000000000 /* 0xa0 16-bit data */
.quad 0x0080920000000000 /* 0xa8 16-bit data */
.quad 0x0080920000000000 /* 0xb0 16-bit data */
/* /*
* The APM segments have byte granularity and their bases * The APM segments have byte granularity and their bases
* and limits are set at run time. * and limits are set at run time.
*/ */
.quad 0x00409a0000000000 /* 0xa8 APM CS code */ .quad 0x00409a0000000000 /* 0xb8 APM CS code */
.quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */ .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0xb8 APM DS data */ .quad 0x0040920000000000 /* 0xc8 APM DS data */
#if CONFIG_SMP #if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
......
/*
* linux/arch/i386/kernel/sysenter.c
*
* (C) Copyright 2002 Linus Torvalds
*
* This file contains the needed initializations to support sysenter.
*/
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/thread_info.h>
#include <linux/gfp.h>
#include <linux/string.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/pgtable.h>
extern asmlinkage void sysenter_entry(void);
static void __init enable_sep_cpu(void *info)
{
unsigned long page = __get_free_page(GFP_ATOMIC);
int cpu = get_cpu();
unsigned long *esp0_ptr = &(init_tss + cpu)->esp0;
unsigned long rel32;
rel32 = (unsigned long) sysenter_entry - (page+11);
*(short *) (page+0) = 0x258b; /* movl xxxxx,%esp */
*(long **) (page+2) = esp0_ptr;
*(char *) (page+6) = 0xe9; /* jmp rl32 */
*(long *) (page+7) = rel32;
wrmsr(0x174, __KERNEL_CS, 0); /* SYSENTER_CS_MSR */
wrmsr(0x175, page+PAGE_SIZE, 0); /* SYSENTER_ESP_MSR */
wrmsr(0x176, page, 0); /* SYSENTER_EIP_MSR */
printk("Enabling SEP on CPU %d\n", cpu);
put_cpu();
}
static int __init sysenter_setup(void)
{
static const char int80[] = {
0xcd, 0x80, /* int $0x80 */
0xc3 /* ret */
};
static const char sysent[] = {
0x55, /* push %ebp */
0x51, /* push %ecx */
0x52, /* push %edx */
0x89, 0xe5, /* movl %esp,%ebp */
0x0f, 0x34, /* sysenter */
0x5a, /* pop %edx */
0x59, /* pop %ecx */
0x5d, /* pop %ebp */
0xc3 /* ret */
};
unsigned long page = get_zeroed_page(GFP_ATOMIC);
__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY);
memcpy((void *) page, int80, sizeof(int80));
if (!boot_cpu_has(X86_FEATURE_SEP))
return 0;
memcpy((void *) page, sysent, sizeof(sysent));
enable_sep_cpu(NULL);
smp_call_function(enable_sep_cpu, NULL, 1, 1);
return 0;
}
__initcall(sysenter_setup);
...@@ -72,7 +72,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) ...@@ -72,7 +72,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
static pte_t * __init one_page_table_init(pmd_t *pmd) static pte_t * __init one_page_table_init(pmd_t *pmd)
{ {
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
if (page_table != pte_offset_kernel(pmd, 0)) if (page_table != pte_offset_kernel(pmd, 0))
BUG(); BUG();
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
* task switches. * task switches.
*/ */
enum fixed_addresses { enum fixed_addresses {
FIX_VSYSCALL,
FIX_HOLE,
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif #endif
...@@ -96,10 +98,9 @@ extern void __set_fixmap (enum fixed_addresses idx, ...@@ -96,10 +98,9 @@ extern void __set_fixmap (enum fixed_addresses idx,
* used by vmalloc.c. * used by vmalloc.c.
* *
* Leave one empty page between vmalloc'ed areas and * Leave one empty page between vmalloc'ed areas and
* the start of the fixmap, and leave one page empty * the start of the fixmap.
* at the top of mem..
*/ */
#define FIXADDR_TOP (0xffffe000UL) #define FIXADDR_TOP (0xfffff000UL)
#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* 2 - reserved * 2 - reserved
* 3 - reserved * 3 - reserved
* *
* 4 - default user CS <==== new cacheline * 4 - unused <==== new cacheline
* 5 - default user DS * 5 - unused
* *
* ------- start of TLS (Thread-Local Storage) segments: * ------- start of TLS (Thread-Local Storage) segments:
* *
...@@ -25,16 +25,18 @@ ...@@ -25,16 +25,18 @@
* *
* 12 - kernel code segment <==== new cacheline * 12 - kernel code segment <==== new cacheline
* 13 - kernel data segment * 13 - kernel data segment
* 14 - TSS * 14 - default user CS
* 15 - LDT * 15 - default user DS
* 16 - PNPBIOS support (16->32 gate) * 16 - TSS
* 17 - PNPBIOS support * 17 - LDT
* 18 - PNPBIOS support * 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support * 19 - PNPBIOS support
* 20 - PNPBIOS support * 20 - PNPBIOS support
* 21 - APM BIOS support * 21 - PNPBIOS support
* 22 - APM BIOS support * 22 - PNPBIOS support
* 23 - APM BIOS support * 23 - APM BIOS support
* 24 - APM BIOS support
* 25 - APM BIOS support
*/ */
#define GDT_ENTRY_TLS_ENTRIES 3 #define GDT_ENTRY_TLS_ENTRIES 3
#define GDT_ENTRY_TLS_MIN 6 #define GDT_ENTRY_TLS_MIN 6
...@@ -42,10 +44,10 @@ ...@@ -42,10 +44,10 @@
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
#define GDT_ENTRY_DEFAULT_USER_CS 4 #define GDT_ENTRY_DEFAULT_USER_CS 14
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
#define GDT_ENTRY_DEFAULT_USER_DS 5 #define GDT_ENTRY_DEFAULT_USER_DS 15
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
#define GDT_ENTRY_KERNEL_BASE 12 #define GDT_ENTRY_KERNEL_BASE 12
...@@ -56,14 +58,14 @@ ...@@ -56,14 +58,14 @@
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3) #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4) #define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9) #define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
/* /*
* The GDT has 21 entries but we pad it to cacheline boundary: * The GDT has 23 entries but we pad it to cacheline boundary:
*/ */
#define GDT_ENTRIES 24 #define GDT_ENTRIES 24
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment