Commit 72674d48 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-urgent-2020-07-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A series of fixes for x86:

   - Reset MXCSR in kernel_fpu_begin() to prevent using a stale user
     space value.

   - Prevent writing MSR_TEST_CTRL on CPUs which are not explicitly
     whitelisted for split lock detection. Some CPUs which do not
     support it crash even when the MSR is written to 0 which is the
     default value.

   - Fix the XEN PV fallout of the entry code rework

   - Fix the 32bit fallout of the entry code rework

   - Add more selftests to ensure that these entry problems don't come
     back.

   - Disable 16 bit segments on XEN PV. It's not supported because XEN
     PV does not implement ESPFIX64"

* tag 'x86-urgent-2020-07-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ldt: Disable 16-bit segments on Xen PV
  x86/entry/32: Fix #MC and #DB wiring on x86_32
  x86/entry/xen: Route #DB correctly on Xen PV
  x86/entry, selftests: Further improve user entry sanity checks
  x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER
  selftests/x86: Consolidate and fix get/set_eflags() helpers
  selftests/x86/syscall_nt: Clear weird flags after each test
  selftests/x86/syscall_nt: Add more flag combinations
  x86/entry/64/compat: Fix Xen PV SYSENTER frame setup
  x86/entry: Move SYSENTER's regs->sp and regs->flags fixups into C
  x86/entry: Assert that syscalls are on the right stack
  x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted
  x86/fpu: Reset MXCSR to default in kernel_fpu_begin()
parents f23dbe18 cc801833
...@@ -45,6 +45,32 @@ ...@@ -45,6 +45,32 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h> #include <trace/events/syscalls.h>
/* Check that the stack and regs on entry from user mode are sane. */
static void check_user_regs(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
* Make sure that the entry code gave us a sensible EFLAGS
* register. Native because we want to check the actual CPU
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
X86_EFLAGS_NT));
/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));
/*
* All entries from user mode (except #DF) should be on the
* normal thread stack and should have user pt_regs in the
* correct location.
*/
WARN_ON_ONCE(!on_thread_stack());
WARN_ON_ONCE(regs != task_pt_regs(current));
}
}
#ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_CONTEXT_TRACKING
/** /**
* enter_from_user_mode - Establish state when coming from user mode * enter_from_user_mode - Establish state when coming from user mode
...@@ -127,9 +153,6 @@ static long syscall_trace_enter(struct pt_regs *regs) ...@@ -127,9 +153,6 @@ static long syscall_trace_enter(struct pt_regs *regs)
unsigned long ret = 0; unsigned long ret = 0;
u32 work; u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
work = READ_ONCE(ti->flags); work = READ_ONCE(ti->flags);
if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
...@@ -346,6 +369,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) ...@@ -346,6 +369,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{ {
struct thread_info *ti; struct thread_info *ti;
check_user_regs(regs);
enter_from_user_mode(); enter_from_user_mode();
instrumentation_begin(); instrumentation_begin();
...@@ -409,6 +434,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs) ...@@ -409,6 +434,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */ /* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{ {
check_user_regs(regs);
enter_from_user_mode(); enter_from_user_mode();
instrumentation_begin(); instrumentation_begin();
...@@ -460,6 +487,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) ...@@ -460,6 +487,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
vdso_image_32.sym_int80_landing_pad; vdso_image_32.sym_int80_landing_pad;
bool success; bool success;
check_user_regs(regs);
/* /*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
* so that 'regs->ip -= 2' lands back on an int $0x80 instruction. * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
...@@ -510,6 +539,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) ...@@ -510,6 +539,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif #endif
} }
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
{
/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
regs->sp = regs->bp;
/* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
regs->flags |= X86_EFLAGS_IF;
return do_fast_syscall_32(regs);
}
#endif #endif
SYSCALL_DEFINE0(ni_syscall) SYSCALL_DEFINE0(ni_syscall)
...@@ -553,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall) ...@@ -553,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall)
bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
{ {
if (user_mode(regs)) { if (user_mode(regs)) {
check_user_regs(regs);
enter_from_user_mode(); enter_from_user_mode();
return false; return false;
} }
...@@ -686,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) ...@@ -686,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
*/ */
void noinstr idtentry_enter_user(struct pt_regs *regs) void noinstr idtentry_enter_user(struct pt_regs *regs)
{ {
check_user_regs(regs);
enter_from_user_mode(); enter_from_user_mode();
} }
......
...@@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32) ...@@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
.Lsysenter_past_esp: .Lsysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */ pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */ pushl $0 /* pt_regs->sp (placeholder) */
pushfl /* pt_regs->flags (except IF = 0) */ pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */ pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */ pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */ pushl %eax /* pt_regs->orig_ax */
...@@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32) ...@@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
.Lsysenter_flags_fixed: .Lsysenter_flags_fixed:
movl %esp, %eax movl %esp, %eax
call do_fast_syscall_32 call do_SYSENTER_32
/* XEN PV guests always use IRET path */ /* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
......
...@@ -57,29 +57,30 @@ SYM_CODE_START(entry_SYSENTER_compat) ...@@ -57,29 +57,30 @@ SYM_CODE_START(entry_SYSENTER_compat)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
* User tracing code (ptrace or signal handlers) might assume that
* the saved RAX contains a 32-bit number when we're invoking a 32-bit
* syscall. Just in case the high bits are nonzero, zero-extend
* the syscall number. (This could almost certainly be deleted
* with no ill effects.)
*/
movl %eax, %eax
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */ pushq $__USER32_DS /* pt_regs->ss */
pushq %rbp /* pt_regs->sp (stashed in bp) */ pushq $0 /* pt_regs->sp = 0 (placeholder) */
/* /*
* Push flags. This is nasty. First, interrupts are currently * Push flags. This is nasty. First, interrupts are currently
* off, but we need pt_regs->flags to have IF set. Second, even * off, but we need pt_regs->flags to have IF set. Second, if TS
* if TF was set when SYSENTER started, it's clear by now. We fix * was set in usermode, it's still set, and we're singlestepping
* that later using TIF_SINGLESTEP. * through this code. do_SYSENTER_32() will fix up IF.
*/ */
pushfq /* pt_regs->flags (except IF = 0) */ pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
pushq $__USER32_CS /* pt_regs->cs */ pushq $__USER32_CS /* pt_regs->cs */
pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq $0 /* pt_regs->ip = 0 (placeholder) */
SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
/*
* User tracing code (ptrace or signal handlers) might assume that
* the saved RAX contains a 32-bit number when we're invoking a 32-bit
* syscall. Just in case the high bits are nonzero, zero-extend
* the syscall number. (This could almost certainly be deleted
* with no ill effects.)
*/
movl %eax, %eax
pushq %rax /* pt_regs->orig_ax */ pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */ pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */ pushq %rsi /* pt_regs->si */
...@@ -135,7 +136,7 @@ SYM_CODE_START(entry_SYSENTER_compat) ...@@ -135,7 +136,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
.Lsysenter_flags_fixed: .Lsysenter_flags_fixed:
movq %rsp, %rdi movq %rsp, %rdi
call do_fast_syscall_32 call do_SYSENTER_32
/* XEN PV guests always use IRET path */ /* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
......
...@@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) ...@@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
* MXCSR and XCR definitions: * MXCSR and XCR definitions:
*/ */
static inline void ldmxcsr(u32 mxcsr)
{
asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}
extern unsigned int mxcsr_feature_mask; extern unsigned int mxcsr_feature_mask;
#define XCR_XFEATURE_ENABLED_MASK 0x00000000 #define XCR_XFEATURE_ENABLED_MASK 0x00000000
......
...@@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs) ...@@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs)
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
/* Maps to a regular IDTENTRY on 32bit for now */
# define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY
# define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY
/** /**
* DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant
* @vector: Vector number (ignored for C) * @vector: Vector number (ignored for C)
...@@ -387,28 +383,18 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -387,28 +383,18 @@ __visible noinstr void func(struct pt_regs *regs, \
#endif /* !CONFIG_X86_64 */ #endif /* !CONFIG_X86_64 */
/* C-Code mapping */ /* C-Code mapping */
#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
#ifdef CONFIG_X86_64
#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST #define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST #define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST
#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST #define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST
#endif
/**
* DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points
* @vector: Vector number (ignored for C)
* @func: Function name of the entry point
*
* Used for xennmi and xendebug redirections. No DEFINE as this is all ASM
* indirection magic.
*/
#define DECLARE_IDTENTRY_XEN(vector, func) \
asmlinkage void xen_asm_exc_xen##func(void); \
asmlinkage void asm_exc_xen##func(void)
#else /* !__ASSEMBLY__ */ #else /* !__ASSEMBLY__ */
...@@ -455,9 +441,6 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -455,9 +441,6 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_MCE(vector, func) \ # define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func) DECLARE_IDTENTRY(vector, func)
# define DECLARE_IDTENTRY_DEBUG(vector, func) \
DECLARE_IDTENTRY(vector, func)
/* No ASM emitted for DF as this goes through a C shim */ /* No ASM emitted for DF as this goes through a C shim */
# define DECLARE_IDTENTRY_DF(vector, func) # define DECLARE_IDTENTRY_DF(vector, func)
...@@ -469,10 +452,6 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -469,10 +452,6 @@ __visible noinstr void func(struct pt_regs *regs, \
/* No ASM code emitted for NMI */ /* No ASM code emitted for NMI */
#define DECLARE_IDTENTRY_NMI(vector, func) #define DECLARE_IDTENTRY_NMI(vector, func)
/* XEN NMI and DB wrapper */
#define DECLARE_IDTENTRY_XEN(vector, func) \
idtentry vector asm_exc_xen##func exc_##func has_error_code=0
/* /*
* ASM code to emit the common vector entry stubs where each stub is * ASM code to emit the common vector entry stubs where each stub is
* packed into 8 bytes. * packed into 8 bytes.
...@@ -565,16 +544,28 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); ...@@ -565,16 +544,28 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
#else
DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check);
#endif
#endif #endif
/* NMI */ /* NMI */
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); #ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
#endif
/* #DB */ /* #DB */
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug);
DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); #else
DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug);
#endif
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug);
#endif
/* #DF */ /* #DF */
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
......
...@@ -49,6 +49,13 @@ enum split_lock_detect_state { ...@@ -49,6 +49,13 @@ enum split_lock_detect_state {
static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init; static u64 msr_test_ctrl_cache __ro_after_init;
/*
* With a name like MSR_TEST_CTL it should go without saying, but don't touch
* MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
* on CPUs that do not support SLD can cause fireworks, even when writing '0'.
*/
static bool cpu_model_supports_sld __ro_after_init;
/* /*
* Processors which have self-snooping capability can handle conflicting * Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists * memory type across CPUs by snooping its own cache. However, there exists
...@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on) ...@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
static void split_lock_init(void) static void split_lock_init(void)
{ {
split_lock_verify_msr(sld_state != sld_off); if (cpu_model_supports_sld)
split_lock_verify_msr(sld_state != sld_off);
} }
static void split_lock_warn(unsigned long ip) static void split_lock_warn(unsigned long ip)
...@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) ...@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
return; return;
} }
cpu_model_supports_sld = true;
split_lock_setup(); split_lock_setup();
} }
...@@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; ...@@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{ {
WARN_ON_ONCE(user_mode(regs));
/* /*
* Only required when from kernel mode. See * Only required when from kernel mode. See
* mce_check_crashing_cpu() for details. * mce_check_crashing_cpu() for details.
...@@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check) ...@@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
} }
#else #else
/* 32bit unified entry point */ /* 32bit unified entry point */
DEFINE_IDTENTRY_MCE(exc_machine_check) DEFINE_IDTENTRY_RAW(exc_machine_check)
{ {
unsigned long dr7; unsigned long dr7;
......
...@@ -101,6 +101,12 @@ void kernel_fpu_begin(void) ...@@ -101,6 +101,12 @@ void kernel_fpu_begin(void)
copy_fpregs_to_fpstate(&current->thread.fpu); copy_fpregs_to_fpstate(&current->thread.fpu);
} }
__cpu_invalidate_fpregs_state(); __cpu_invalidate_fpregs_state();
if (boot_cpu_has(X86_FEATURE_XMM))
ldmxcsr(MXCSR_DEFAULT);
if (boot_cpu_has(X86_FEATURE_FPU))
asm volatile ("fninit");
} }
EXPORT_SYMBOL_GPL(kernel_fpu_begin); EXPORT_SYMBOL_GPL(kernel_fpu_begin);
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pgtable_areas.h> #include <asm/pgtable_areas.h>
#include <xen/xen.h>
/* This is a multiple of PAGE_SIZE. */ /* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
...@@ -543,6 +545,37 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount) ...@@ -543,6 +545,37 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
return bytecount; return bytecount;
} }
static bool allow_16bit_segments(void)
{
if (!IS_ENABLED(CONFIG_X86_16BIT))
return false;
#ifdef CONFIG_XEN_PV
/*
* Xen PV does not implement ESPFIX64, which means that 16-bit
* segments will not work correctly. Until either Xen PV implements
* ESPFIX64 and can signal this fact to the guest or unless someone
* provides compelling evidence that allowing broken 16-bit segments
* is worthwhile, disallow 16-bit segments under Xen PV.
*/
if (xen_pv_domain()) {
static DEFINE_MUTEX(xen_warning);
static bool warned;
mutex_lock(&xen_warning);
if (!warned) {
pr_info("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
warned = true;
}
mutex_unlock(&xen_warning);
return false;
}
#endif
return true;
}
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
...@@ -574,7 +607,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ...@@ -574,7 +607,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
/* The user wants to clear the entry. */ /* The user wants to clear the entry. */
memset(&ldt, 0, sizeof(ldt)); memset(&ldt, 0, sizeof(ldt));
} else { } else {
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
error = -EINVAL; error = -EINVAL;
goto out; goto out;
} }
......
...@@ -869,6 +869,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, ...@@ -869,6 +869,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
instrumentation_begin(); instrumentation_begin();
trace_hardirqs_off_finish(); trace_hardirqs_off_finish();
/*
* If something gets miswired and we end up here for a user mode
* #DB, we will malfunction.
*/
WARN_ON_ONCE(user_mode(regs));
/* /*
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a * Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled. * watchpoint at the same time then that will still be handled.
...@@ -887,6 +893,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, ...@@ -887,6 +893,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6) unsigned long dr6)
{ {
/*
* If something gets miswired and we end up here for a kernel mode
* #DB, we will malfunction.
*/
WARN_ON_ONCE(!user_mode(regs));
idtentry_enter_user(regs); idtentry_enter_user(regs);
instrumentation_begin(); instrumentation_begin();
...@@ -917,7 +929,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) ...@@ -917,7 +929,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
} }
#else #else
/* 32 bit does not have separate entry points. */ /* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_DEBUG(exc_debug) DEFINE_IDTENTRY_RAW(exc_debug)
{ {
unsigned long dr6, dr7; unsigned long dr6, dr7;
......
...@@ -598,6 +598,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, ...@@ -598,6 +598,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
void noist_exc_debug(struct pt_regs *regs);
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
{
/* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */
exc_nmi(regs);
}
DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
{
/*
* There's no IST on Xen PV, but we still need to dispatch
* to the correct handler.
*/
if (user_mode(regs))
noist_exc_debug(regs);
else
exc_debug(regs);
}
struct trap_array_entry { struct trap_array_entry {
void (*orig)(void); void (*orig)(void);
void (*xen)(void); void (*xen)(void);
...@@ -609,18 +629,18 @@ struct trap_array_entry { ...@@ -609,18 +629,18 @@ struct trap_array_entry {
.xen = xen_asm_##func, \ .xen = xen_asm_##func, \
.ist_okay = ist_ok } .ist_okay = ist_ok }
#define TRAP_ENTRY_REDIR(func, xenfunc, ist_ok) { \ #define TRAP_ENTRY_REDIR(func, ist_ok) { \
.orig = asm_##func, \ .orig = asm_##func, \
.xen = xen_asm_##xenfunc, \ .xen = xen_asm_xenpv_##func, \
.ist_okay = ist_ok } .ist_okay = ist_ok }
static struct trap_array_entry trap_array[] = { static struct trap_array_entry trap_array[] = {
TRAP_ENTRY_REDIR(exc_debug, exc_xendebug, true ), TRAP_ENTRY_REDIR(exc_debug, true ),
TRAP_ENTRY(exc_double_fault, true ), TRAP_ENTRY(exc_double_fault, true ),
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
TRAP_ENTRY(exc_machine_check, true ), TRAP_ENTRY(exc_machine_check, true ),
#endif #endif
TRAP_ENTRY_REDIR(exc_nmi, exc_xennmi, true ), TRAP_ENTRY_REDIR(exc_nmi, true ),
TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_int3, false ),
TRAP_ENTRY(exc_overflow, false ), TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
......
...@@ -29,10 +29,9 @@ _ASM_NOKPROBE(xen_\name) ...@@ -29,10 +29,9 @@ _ASM_NOKPROBE(xen_\name)
.endm .endm
xen_pv_trap asm_exc_divide_error xen_pv_trap asm_exc_divide_error
xen_pv_trap asm_exc_debug xen_pv_trap asm_xenpv_exc_debug
xen_pv_trap asm_exc_xendebug
xen_pv_trap asm_exc_int3 xen_pv_trap asm_exc_int3
xen_pv_trap asm_exc_xennmi xen_pv_trap asm_xenpv_exc_nmi
xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_overflow
xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_bounds
xen_pv_trap asm_exc_invalid_op xen_pv_trap asm_exc_invalid_op
...@@ -161,10 +160,22 @@ SYM_FUNC_END(xen_syscall32_target) ...@@ -161,10 +160,22 @@ SYM_FUNC_END(xen_syscall32_target)
/* 32-bit compat sysenter target */ /* 32-bit compat sysenter target */
SYM_FUNC_START(xen_sysenter_target) SYM_FUNC_START(xen_sysenter_target)
mov 0*8(%rsp), %rcx /*
mov 1*8(%rsp), %r11 * NB: Xen is polite and clears TF from EFLAGS for us. This means
mov 5*8(%rsp), %rsp * that we don't need to guard against single step exceptions here.
jmp entry_SYSENTER_compat */
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER32_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER32_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
SYM_FUNC_END(xen_sysenter_target) SYM_FUNC_END(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */ #else /* !CONFIG_IA32_EMULATION */
......
...@@ -70,10 +70,10 @@ all_64: $(BINARIES_64) ...@@ -70,10 +70,10 @@ all_64: $(BINARIES_64)
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
$(BINARIES_32): $(OUTPUT)/%_32: %.c $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
$(BINARIES_64): $(OUTPUT)/%_64: %.c $(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries # x86_64 users should be encouraged to install 32-bit libraries
......
// SPDX-License-Identifier: GPL-2.0-only
#ifndef __SELFTESTS_X86_HELPERS_H
#define __SELFTESTS_X86_HELPERS_H
#include <asm/processor-flags.h>
static inline unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile (
#ifdef __x86_64__
"subq $128, %%rsp\n\t"
"pushfq\n\t"
"popq %0\n\t"
"addq $128, %%rsp"
#else
"pushfl\n\t"
"popl %0"
#endif
: "=r" (eflags) :: "memory");
return eflags;
}
static inline void set_eflags(unsigned long eflags)
{
asm volatile (
#ifdef __x86_64__
"subq $128, %%rsp\n\t"
"pushq %0\n\t"
"popfq\n\t"
"addq $128, %%rsp"
#else
"pushl %0\n\t"
"popfl"
#endif
:: "r" (eflags) : "flags", "memory");
}
#endif /* __SELFTESTS_X86_HELPERS_H */
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
#include <sys/ptrace.h> #include <sys/ptrace.h>
#include <sys/user.h> #include <sys/user.h>
#include "helpers.h"
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags) int flags)
{ {
...@@ -67,21 +69,6 @@ static unsigned char altstack_data[SIGSTKSZ]; ...@@ -67,21 +69,6 @@ static unsigned char altstack_data[SIGSTKSZ];
# define INT80_CLOBBERS # define INT80_CLOBBERS
#endif #endif
static unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
return eflags;
}
static void set_eflags(unsigned long eflags)
{
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
: : "rm" (eflags) : "flags");
}
#define X86_EFLAGS_TF (1UL << 8)
static void sigtrap(int sig, siginfo_t *info, void *ctx_void) static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{ {
ucontext_t *ctx = (ucontext_t*)ctx_void; ucontext_t *ctx = (ucontext_t*)ctx_void;
......
...@@ -15,30 +15,11 @@ ...@@ -15,30 +15,11 @@
#include <setjmp.h> #include <setjmp.h>
#include <errno.h> #include <errno.h>
#ifdef __x86_64__ #include "helpers.h"
# define WIDTH "q"
#else
# define WIDTH "l"
#endif
/* Our sigaltstack scratch space. */ /* Our sigaltstack scratch space. */
static unsigned char altstack_data[SIGSTKSZ]; static unsigned char altstack_data[SIGSTKSZ];
static unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
return eflags;
}
static void set_eflags(unsigned long eflags)
{
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
: : "rm" (eflags) : "flags");
}
#define X86_EFLAGS_TF (1UL << 8)
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags) int flags)
{ {
......
...@@ -13,29 +13,11 @@ ...@@ -13,29 +13,11 @@
#include <signal.h> #include <signal.h>
#include <err.h> #include <err.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <asm/processor-flags.h>
#ifdef __x86_64__ #include "helpers.h"
# define WIDTH "q"
#else
# define WIDTH "l"
#endif
static unsigned int nerrs; static unsigned int nerrs;
static unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
return eflags;
}
static void set_eflags(unsigned long eflags)
{
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
: : "rm" (eflags) : "flags");
}
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags) int flags)
{ {
...@@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags) ...@@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags)
set_eflags(get_eflags() | extraflags); set_eflags(get_eflags() | extraflags);
syscall(SYS_getpid); syscall(SYS_getpid);
flags = get_eflags(); flags = get_eflags();
set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
if ((flags & extraflags) == extraflags) { if ((flags & extraflags) == extraflags) {
printf("[OK]\tThe syscall worked and flags are still set\n"); printf("[OK]\tThe syscall worked and flags are still set\n");
} else { } else {
...@@ -73,6 +56,12 @@ int main(void) ...@@ -73,6 +56,12 @@ int main(void)
printf("[RUN]\tSet NT and issue a syscall\n"); printf("[RUN]\tSet NT and issue a syscall\n");
do_it(X86_EFLAGS_NT); do_it(X86_EFLAGS_NT);
printf("[RUN]\tSet AC and issue a syscall\n");
do_it(X86_EFLAGS_AC);
printf("[RUN]\tSet NT|AC and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_AC);
/* /*
* Now try it again with TF set -- TF forces returns via IRET in all * Now try it again with TF set -- TF forces returns via IRET in all
* cases except non-ptregs-using 64-bit full fast path syscalls. * cases except non-ptregs-using 64-bit full fast path syscalls.
...@@ -80,8 +69,28 @@ int main(void) ...@@ -80,8 +69,28 @@ int main(void)
sethandler(SIGTRAP, sigtrap, 0); sethandler(SIGTRAP, sigtrap, 0);
printf("[RUN]\tSet TF and issue a syscall\n");
do_it(X86_EFLAGS_TF);
printf("[RUN]\tSet NT|TF and issue a syscall\n"); printf("[RUN]\tSet NT|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_TF); do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
printf("[RUN]\tSet AC|TF and issue a syscall\n");
do_it(X86_EFLAGS_AC | X86_EFLAGS_TF);
printf("[RUN]\tSet NT|AC|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF);
/*
* Now try DF. This is evil and it's plausible that we will crash
* glibc, but glibc would have to do something rather surprising
* for this to happen.
*/
printf("[RUN]\tSet DF and issue a syscall\n");
do_it(X86_EFLAGS_DF);
printf("[RUN]\tSet TF|DF and issue a syscall\n");
do_it(X86_EFLAGS_TF | X86_EFLAGS_DF);
return nerrs == 0 ? 0 : 1; return nerrs == 0 ? 0 : 1;
} }
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
#include <setjmp.h> #include <setjmp.h>
#include <sys/uio.h> #include <sys/uio.h>
#include "helpers.h"
#ifdef __x86_64__ #ifdef __x86_64__
# define VSYS(x) (x) # define VSYS(x) (x)
#else #else
...@@ -493,21 +495,8 @@ static int test_process_vm_readv(void) ...@@ -493,21 +495,8 @@ static int test_process_vm_readv(void)
} }
#ifdef __x86_64__ #ifdef __x86_64__
#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t num_vsyscall_traps; static volatile sig_atomic_t num_vsyscall_traps;
static unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
return eflags;
}
static void set_eflags(unsigned long eflags)
{
asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
}
static void sigtrap(int sig, siginfo_t *info, void *ctx_void) static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{ {
ucontext_t *ctx = (ucontext_t *)ctx_void; ucontext_t *ctx = (ucontext_t *)ctx_void;
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <features.h> #include <features.h>
#include <stdio.h> #include <stdio.h>
#include "helpers.h"
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
int main() int main()
...@@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), ...@@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction"); err(1, "sigaction");
} }
#ifdef __x86_64__
# define WIDTH "q"
#else
# define WIDTH "l"
#endif
static unsigned long get_eflags(void)
{
unsigned long eflags;
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
return eflags;
}
static void set_eflags(unsigned long eflags)
{
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
: : "rm" (eflags) : "flags");
}
#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t nerrs; static volatile sig_atomic_t nerrs;
static unsigned long sysinfo; static unsigned long sysinfo;
static bool got_sysinfo = false; static bool got_sysinfo = false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment