Commit e3db4852 authored by Linus Torvalds's avatar Linus Torvalds

Use a fixed per-cpu SYSENTER_MSR_ESP value by having the sysenter

entry routine load the real ESP0 off that per-cpu stack. Make this
even faster by putting the sysenter stack in the per-CPU TSS, so
that we can use the tss->esp0 value directly (which we have to
update on task switches anyway).

CAREFUL! This needs very subtle code for debug and NMI exceptions,
to make sure we don't run with the sysenter stack in any real kernel
code!
parent f58a69aa
......@@ -72,6 +72,12 @@ DF_MASK = 0x00000400
NT_MASK = 0x00004000
VM_MASK = 0x00020000
/*
* ESP0 is at offset 4. 0x100 is the size of the TSS, and
* also thus the top-of-stack pointer offset of SYSENTER_ESP
*/
TSS_ESP0_OFFSET = (4 - 0x100)
#ifdef CONFIG_PREEMPT
#define preempt_stop cli
#else
......@@ -229,6 +235,8 @@ need_resched:
# sysenter call handler stub
ENTRY(sysenter_entry)
movl TSS_ESP0_OFFSET(%esp),%esp
sysenter_past_esp:
sti
pushl $(__USER_DS)
pushl %ebp
......@@ -458,12 +466,36 @@ device_not_available_emulate:
addl $4, %esp
jmp ret_from_exception
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
* that sets up the real kernel stack. Check here, since we can't
* allow the wrong stack to be used.
*
* "TSS_ESP0_OFFSET+12" is because the NMI/debug handler will have
* already pushed 3 words if it hits on the sysenter instruction:
* eflags, cs and eip.
*
* We just load the right stack, and push the three (known) values
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
#define CHECK_SYSENTER_EIP \
cmpl $sysenter_entry,(%esp); \
jne 1f; \
movl TSS_ESP0_OFFSET+12(%esp),%esp; \
pushfl; \
pushl $__KERNEL_CS; \
pushl $sysenter_past_esp; \
1:
ENTRY(debug)
CHECK_SYSENTER_EIP
pushl $0
pushl $do_debug
jmp error_code
ENTRY(nmi)
CHECK_SYSENTER_EIP
pushl %eax
SAVE_ALL
movl %esp, %edx
......
......@@ -41,8 +41,9 @@ void enable_sep_cpu(void *info)
struct tss_struct *tss = init_tss + cpu;
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp0, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
printk("Enabling SEP on CPU %d\n", cpu);
......
......@@ -404,6 +404,8 @@ struct thread_struct {
#define INIT_TSS { \
.esp0 = sizeof(init_stack) + (long)&init_stack, \
.ss0 = __KERNEL_DS, \
.esp1 = sizeof(init_tss[0]) + (long)&init_tss[0], \
.ss1 = __KERNEL_CS, \
.ldt = GDT_ENTRY_LDT, \
.bitmap = INVALID_IO_BITMAP_OFFSET, \
.io_bitmap = { [ 0 ... IO_BITMAP_SIZE ] = ~0 }, \
......@@ -412,13 +414,11 @@ struct thread_struct {
static inline void load_esp0(struct tss_struct *tss, unsigned long esp0)
{
tss->esp0 = esp0;
if (cpu_has_sep) {
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
if (tss->ss1 != __KERNEL_CS) {
tss->ss1 = __KERNEL_CS;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
}
wrmsr(MSR_IA32_SYSENTER_ESP, esp0, 0);
}
}
static inline void disable_sysenter(struct tss_struct *tss)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment