Commit dc4c2a0a authored by Alexander van Heukelum's avatar Alexander van Heukelum Committed by H. Peter Anvin

i386: fix/simplify espfix stack switching, move it into assembly

The espfix code triggers if we have a protected mode userspace
application with a 16-bit stack. On returning to userspace, with iret,
the CPU doesn't restore the high word of the stack pointer. This is an
"official" bug, and the work-around used in the kernel is to temporarily
switch to a 32-bit stack segment/pointer pair where the high word of the
pointer is equal to the high word of the userspace stackpointer.

The current implementation uses THREAD_SIZE to determine the cut-off,
but there is no good reason not to use the more natural 64kb... However,
implementing this by simply substituting THREAD_SIZE with 65536 in
patch_espfix_desc crashed the test application. patch_espfix_desc tries
to do what is described above, but gets it subtly wrong if the userspace
stack pointer is just below a multiple of THREAD_SIZE: an overflow
occurs to bit 13... With a bit of luck, when the kernelspace
stackpointer is just below a 64kb-boundary, the overflow then ripples
trough to bit 16 and userspace will see its stack pointer changed by
65536.

This patch moves all espfix code into entry_32.S. Selecting a 16-bit
cut-off simplifies the code. The game with changing the limit dynamically
is removed too. It complicates matters and I see no value in it. Changing
only the top 16-bit word of ESP is one instruction and it also implies
that only two bytes of the ESPFIX GDT entry need to be changed and this
can be implemented in just a handful simple to understand instructions.
As a side effect, the operation to compute the original ESP from the
ESPFIX ESP and the GDT entry simplifies a bit too, and the remaining
three instructions have been expanded inline in entry_32.S.

impact: can now reliably run userspace with ESP=xxxxfffc on 16-bit
stack segment
Signed-off-by: default avatarAlexander van Heukelum <heukelum@fastmail.fm>
Acked-by: default avatarStas Sergeev <stsp@aknet.ru>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 2e04bc76
...@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ...@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
/* data */ /* data */
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT GDT_STACK_CANARY_INIT
#endif #endif
......
...@@ -588,24 +588,34 @@ ldt_ss: ...@@ -588,24 +588,34 @@ ldt_ss:
jne restore_nocheck jne restore_nocheck
#endif #endif
/* If returning to userspace with 16bit stack, /*
* try to fix the higher word of ESP, as the CPU * Setup and switch to ESPFIX stack
* won't restore it. *
* This is an "official" bug of all the x86-compatible * We're returning to userspace with a 16 bit stack. The CPU will not
* CPUs, which we can try to work around to make * restore the high word of ESP for us on executing iret... This is an
* dosemu and wine happy. */ * "official" bug of all the x86-compatible CPUs, which we can work
movl PT_OLDESP(%esp), %eax * around to make dosemu and wine happy. We do this by preloading the
movl %esp, %edx * high word of ESP with the high word of the userspace ESP while
call patch_espfix_desc * compensating for the offset by changing to the ESPFIX segment with
* a base address that matches for the difference.
*/
mov %esp, %edx /* load kernel esp */
mov PT_OLDESP(%esp), %eax /* load userspace esp */
mov %dx, %ax /* eax: new kernel esp */
sub %eax, %edx /* offset (low word is 0) */
PER_CPU(gdt_page, %ebx)
shr $16, %edx
mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
pushl $__ESPFIX_SS pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
pushl %eax push %eax /* new kernel esp */
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
/* Disable interrupts, but do not irqtrace this section: we /* Disable interrupts, but do not irqtrace this section: we
* will soon execute iret and the tracer was already set to * will soon execute iret and the tracer was already set to
* the irqstate after the iret */ * the irqstate after the iret */
DISABLE_INTERRUPTS(CLBR_EAX) DISABLE_INTERRUPTS(CLBR_EAX)
lss (%esp), %esp lss (%esp), %esp /* switch to espfix segment */
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck jmp restore_nocheck
CFI_ENDPROC CFI_ENDPROC
...@@ -718,15 +728,24 @@ PTREGSCALL(vm86) ...@@ -718,15 +728,24 @@ PTREGSCALL(vm86)
PTREGSCALL(vm86old) PTREGSCALL(vm86old)
.macro FIXUP_ESPFIX_STACK .macro FIXUP_ESPFIX_STACK
/* since we are on a wrong stack, we cant make it a C code :( */ /*
* Switch back for ESPFIX stack to the normal zerobased stack
*
* We can't call C functions using the ESPFIX stack. This code reads
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
*/
/* fixup the stack */
PER_CPU(gdt_page, %ebx) PER_CPU(gdt_page, %ebx)
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
addl %esp, %eax mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
shl $16, %eax
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
pushl %eax pushl %eax
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
lss (%esp), %esp lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
.endm .endm
.macro UNWIND_ESPFIX_STACK .macro UNWIND_ESPFIX_STACK
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment