Commit 5f310f73 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar

x86/entry/32: Re-implement SYSENTER using the new C path

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/5b99659e8be70f3dd10cd8970a5c90293d9ad9a7.1444091585.git.luto@kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 150ac78d
...@@ -363,7 +363,7 @@ __visible void do_int80_syscall_32(struct pt_regs *regs) ...@@ -363,7 +363,7 @@ __visible void do_int80_syscall_32(struct pt_regs *regs)
syscall_return_slowpath(regs); syscall_return_slowpath(regs);
} }
/* Returns 0 to return using IRET or 1 to return using SYSRETL. */ /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs) __visible long do_fast_syscall_32(struct pt_regs *regs)
{ {
/* /*
...@@ -417,7 +417,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) ...@@ -417,7 +417,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip == landing_pad && regs->ip == landing_pad &&
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
#else #else
return 0; /*
* Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
*
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
* because the ECX fixup above will ensure that this is essentially
* never the case.
*
* We don't allow syscalls at all from VM86 mode, but we still
* need to check VM, because we might be returning from sys_vm86.
*/
return static_cpu_has(X86_FEATURE_SEP) &&
regs->cs == __USER_CS && regs->ss == __USER_DS &&
regs->ip == landing_pad &&
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif #endif
} }
#endif #endif
...@@ -287,76 +287,47 @@ need_resched: ...@@ -287,76 +287,47 @@ need_resched:
END(resume_kernel) END(resume_kernel)
#endif #endif
/*
* SYSENTER_RETURN points to after the SYSENTER instruction
* in the vsyscall page. See vsyscall-sysentry.S, which defines
* the symbol.
*/
# SYSENTER call handler stub # SYSENTER call handler stub
ENTRY(entry_SYSENTER_32) ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp: sysenter_past_esp:
/* pushl $__USER_DS /* pt_regs->ss */
* Interrupts are disabled here, but we can't trace it until pushl %ecx /* pt_regs->cx */
* enough kernel state to call TRACE_IRQS_OFF can be called - but pushfl /* pt_regs->flags (except IF = 0) */
* we immediately enable interrupts at that point anyway. orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
*/ pushl $__USER_CS /* pt_regs->cs */
pushl $__USER_DS pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %ebp pushl %eax /* pt_regs->orig_ax */
pushfl SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
orl $X86_EFLAGS_IF, (%esp)
pushl $__USER_CS
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary: TI_sysenter_return
* is relative to thread_info, which is at the bottom of the
* kernel stack page. 4*4 means the 4 words pushed above;
* TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
* and THREAD_SIZE takes us to the bottom.
*/
pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
pushl %eax
SAVE_ALL
ENABLE_INTERRUPTS(CLBR_NONE)
/* /*
* Load the potential sixth argument from user stack. * User mode is traced as though IRQs are on, and SYSENTER
* Careful about security. * turned them off.
*/ */
cmpl $__PAGE_OFFSET-3, %ebp
jae syscall_fault
ASM_STAC
1: movl (%ebp), %ebp
ASM_CLAC
movl %ebp, PT_EBP(%esp)
_ASM_EXTABLE(1b, syscall_fault)
GET_THREAD_INFO(%ebp)
testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
jnz syscall_trace_entry
sysenter_do_call:
cmpl $(NR_syscalls), %eax
jae sysenter_badsys
call *sys_call_table(, %eax, 4)
sysenter_after_call:
movl %eax, PT_EAX(%esp)
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx movl %esp, %eax
jnz syscall_exit_work_irqs_off call do_fast_syscall_32
sysenter_exit: testl %eax, %eax
/* if something modifies registers it must also disable sysexit */ jz .Lsyscall_32_done
movl PT_EIP(%esp), %edx
movl PT_OLDESP(%esp), %ecx /* Opportunistic SYSEXIT */
xorl %ebp, %ebp TRACE_IRQS_ON /* User mode traces as IRQs on. */
TRACE_IRQS_ON movl PT_EIP(%esp), %edx /* pt_regs->ip */
movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
popl %ebx /* pt_regs->bx */
addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
popl %esi /* pt_regs->si */
popl %edi /* pt_regs->di */
popl %ebp /* pt_regs->bp */
popl %eax /* pt_regs->ax */
1: mov PT_FS(%esp), %fs 1: mov PT_FS(%esp), %fs
PTGS_TO_GS PTGS_TO_GS
/*
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
ENABLE_INTERRUPTS_SYSEXIT ENABLE_INTERRUPTS_SYSEXIT
.pushsection .fixup, "ax" .pushsection .fixup, "ax"
...@@ -371,7 +342,7 @@ ENDPROC(entry_SYSENTER_32) ...@@ -371,7 +342,7 @@ ENDPROC(entry_SYSENTER_32)
ENTRY(entry_INT80_32) ENTRY(entry_INT80_32)
ASM_CLAC ASM_CLAC
pushl %eax /* pt_regs->orig_ax */ pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, load -ENOSYS into ax */ SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/* /*
* User mode is traced as though IRQs are on, and the interrupt gate * User mode is traced as though IRQs are on, and the interrupt gate
...@@ -381,6 +352,7 @@ ENTRY(entry_INT80_32) ...@@ -381,6 +352,7 @@ ENTRY(entry_INT80_32)
movl %esp, %eax movl %esp, %eax
call do_int80_syscall_32 call do_int80_syscall_32
.Lsyscall_32_done:
restore_all: restore_all:
TRACE_IRQS_IRET TRACE_IRQS_IRET
...@@ -457,42 +429,6 @@ ldt_ss: ...@@ -457,42 +429,6 @@ ldt_ss:
#endif #endif
ENDPROC(entry_INT80_32) ENDPROC(entry_INT80_32)
# perform syscall exit tracing
ALIGN
syscall_trace_entry:
movl $-ENOSYS, PT_EAX(%esp)
movl %esp, %eax
call syscall_trace_enter
/* What it returned is what we'll actually use. */
cmpl $(NR_syscalls), %eax
jnae syscall_call
jmp syscall_exit
END(syscall_trace_entry)
# perform syscall exit tracing
ALIGN
syscall_exit_work_irqs_off:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
syscall_exit_work:
movl %esp, %eax
call syscall_return_slowpath
jmp restore_all
END(syscall_exit_work)
syscall_fault:
ASM_CLAC
GET_THREAD_INFO(%ebp)
movl $-EFAULT, PT_EAX(%esp)
jmp resume_userspace
END(syscall_fault)
sysenter_badsys:
movl $-ENOSYS, %eax
jmp sysenter_after_call
END(sysenter_badsys)
.macro FIXUP_ESPFIX_STACK .macro FIXUP_ESPFIX_STACK
/* /*
* Switch back for ESPFIX stack to the normal zerobased stack * Switch back for ESPFIX stack to the normal zerobased stack
......
...@@ -34,6 +34,8 @@ __kernel_vsyscall: ...@@ -34,6 +34,8 @@ __kernel_vsyscall:
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
"syscall", X86_FEATURE_SYSCALL32 "syscall", X86_FEATURE_SYSCALL32
#else
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
#endif #endif
/* Enter using int $0x80 */ /* Enter using int $0x80 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment