Commit 50cbf47d authored by Linus Torvalds's avatar Linus Torvalds Committed by Linus Torvalds

Fix the remaining known problems (NMI and debugging) with

fast system calls. They should now be fully comparable to
traditional system calls.

    Fix SEP stack allocation to look like a real thread stack.
    
    This fixes the "Trap on first sysenter instruction" problem,
    which can happen either as a result of an NMI or a breakpoint
    trap (TF bit set).
    
    Also make the user trampoline save and restore EFLAGS, so that
    the TF bit gets restored after a fast system call (it will have
    been cleared to avoid trap faults in the kernel).
parent 0098b69e
...@@ -233,7 +233,7 @@ need_resched: ...@@ -233,7 +233,7 @@ need_resched:
#endif #endif
/* Points to after the "sysenter" instruction in the vsyscall page */ /* Points to after the "sysenter" instruction in the vsyscall page */
#define SYSENTER_RETURN 0xffffe009 #define SYSENTER_RETURN 0xffffe00a
# sysenter call handler stub # sysenter call handler stub
ALIGN ALIGN
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/thread_info.h> #include <linux/thread_info.h>
#include <linux/sched.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/string.h> #include <linux/string.h>
...@@ -18,24 +19,55 @@ ...@@ -18,24 +19,55 @@
extern asmlinkage void sysenter_entry(void); extern asmlinkage void sysenter_entry(void);
/*
* Create a per-cpu fake "SEP thread" stack, so that we can
* enter the kernel without having to worry about things like
* "current" etc not working (debug traps and NMI's can happen
* before we can switch over to the "real" thread).
*
* Return the resulting fake stack pointer.
*/
struct fake_sep_struct {
struct thread_info thread;
struct task_struct task;
unsigned char trampoline[32] __attribute__((aligned(1024)));
unsigned char stack[0];
} __attribute__((aligned(8192)));
static struct fake_sep_struct *alloc_sep_thread(int cpu)
{
struct fake_sep_struct *entry;
entry = (struct fake_sep_struct *) __get_free_pages(GFP_ATOMIC, 1);
if (!entry)
return NULL;
memset(entry, 0, PAGE_SIZE<<1);
entry->thread.task = &entry->task;
entry->task.thread_info = &entry->thread;
entry->thread.preempt_count = 1;
entry->thread.cpu = cpu;
return entry;
}
static void __init enable_sep_cpu(void *info) static void __init enable_sep_cpu(void *info)
{ {
unsigned long page = __get_free_page(GFP_ATOMIC);
int cpu = get_cpu(); int cpu = get_cpu();
struct fake_sep_struct *sep = alloc_sep_thread(cpu);
unsigned long *esp0_ptr = &(init_tss + cpu)->esp0; unsigned long *esp0_ptr = &(init_tss + cpu)->esp0;
unsigned long rel32; unsigned long rel32;
rel32 = (unsigned long) sysenter_entry - (page+11); rel32 = (unsigned long) sysenter_entry - (unsigned long) (sep->trampoline+11);
*(short *) (page+0) = 0x258b; /* movl xxxxx,%esp */ *(short *) (sep->trampoline+0) = 0x258b; /* movl xxxxx,%esp */
*(long **) (page+2) = esp0_ptr; *(long **) (sep->trampoline+2) = esp0_ptr;
*(char *) (page+6) = 0xe9; /* jmp rl32 */ *(char *) (sep->trampoline+6) = 0xe9; /* jmp rl32 */
*(long *) (page+7) = rel32; *(long *) (sep->trampoline+7) = rel32;
wrmsr(0x174, __KERNEL_CS, 0); /* SYSENTER_CS_MSR */ wrmsr(0x174, __KERNEL_CS, 0); /* SYSENTER_CS_MSR */
wrmsr(0x175, page+PAGE_SIZE, 0); /* SYSENTER_ESP_MSR */ wrmsr(0x175, PAGE_SIZE*2 + (unsigned long) sep, 0); /* SYSENTER_ESP_MSR */
wrmsr(0x176, page, 0); /* SYSENTER_EIP_MSR */ wrmsr(0x176, (unsigned long) &sep->trampoline, 0); /* SYSENTER_EIP_MSR */
printk("Enabling SEP on CPU %d\n", cpu); printk("Enabling SEP on CPU %d\n", cpu);
put_cpu(); put_cpu();
...@@ -48,6 +80,7 @@ static int __init sysenter_setup(void) ...@@ -48,6 +80,7 @@ static int __init sysenter_setup(void)
0xc3 /* ret */ 0xc3 /* ret */
}; };
static const char sysent[] = { static const char sysent[] = {
0x9c, /* pushf */
0x51, /* push %ecx */ 0x51, /* push %ecx */
0x52, /* push %edx */ 0x52, /* push %edx */
0x55, /* push %ebp */ 0x55, /* push %ebp */
...@@ -59,6 +92,7 @@ static int __init sysenter_setup(void) ...@@ -59,6 +92,7 @@ static int __init sysenter_setup(void)
0x5d, /* pop %ebp */ 0x5d, /* pop %ebp */
0x5a, /* pop %edx */ 0x5a, /* pop %edx */
0x59, /* pop %ecx */ 0x59, /* pop %ecx */
0x9d, /* popf - restore TF */
0xc3 /* ret */ 0xc3 /* ret */
}; };
unsigned long page = get_zeroed_page(GFP_ATOMIC); unsigned long page = get_zeroed_page(GFP_ATOMIC);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment