Commit ac07f5c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/fpu update from Ingo Molnar:
 "The biggest change is the addition of the non-lazy (eager) FPU saving
  support model and enabling it on CPUs with optimized xsaveopt/xrstor
  FPU state saving instructions.

  There are also various Sparse fixes"

Fix up trivial add-add conflict in arch/x86/kernel/traps.c

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, kvm: fix kvm's usage of kernel_fpu_begin/end()
  x86, fpu: remove cpu_has_xmm check in the fx_finit()
  x86, fpu: make eagerfpu= boot param tri-state
  x86, fpu: enable eagerfpu by default for xsaveopt
  x86, fpu: decouple non-lazy/eager fpu restore from xsave
  x86, fpu: use non-lazy fpu restore for processors supporting xsave
  lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models
  x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage
  x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()
  x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()
  x86, fpu: drop_fpu() before restoring new state from sigframe
  x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels
  x86, fpu: Consolidate inline asm routines for saving/restoring fpu state
  x86, signal: Cleanup ifdefs and is_ia32, is_x32
parents 3b29b03a b1a74bf8
...@@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
and restore using xsave. The kernel will fallback to and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state. enabling legacy floating-point and sse state.
eagerfpu= [X86]
on enable eager fpu restore
off disable eager fpu restore
auto selects the default scheme, which automatically
enables eagerfpu restore for xsaveopt.
nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
wfi(ARM) instruction doesn't work correctly and not to wfi(ARM) instruction doesn't work correctly and not to
use it. This is also useful when using JTAG debugger. use it. This is also useful when using JTAG debugger.
......
...@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, ...@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate); get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp); buf = compat_ptr(tmp);
err |= restore_i387_xstate_ia32(buf); err |= restore_xstate_sig(buf, 1);
get_user_ex(*pax, &sc->ax); get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
...@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ...@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
if (used_math()) { if (used_math()) {
sp = sp - sig_xstate_ia32_size; unsigned long fx_aligned, math_size;
sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
*fpstate = (struct _fpstate_ia32 __user *) sp; *fpstate = (struct _fpstate_ia32 __user *) sp;
if (save_i387_xstate_ia32(*fpstate) < 0) if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
math_size) < 0)
return (void __user *) -1L; return (void __user *) -1L;
} }
......
...@@ -97,6 +97,7 @@ ...@@ -97,6 +97,7 @@
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
...@@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32]; ...@@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1 # define cpu_has_invlpg 1
......
This diff is collapsed.
...@@ -19,12 +19,37 @@ struct pt_regs; ...@@ -19,12 +19,37 @@ struct pt_regs;
struct user_i387_struct; struct user_i387_struct;
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern void fpu_finit(struct fpu *fpu);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern void math_state_restore(void); extern void math_state_restore(void);
extern bool irq_fpu_usable(void); extern bool irq_fpu_usable(void);
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void); /*
* Careful: __kernel_fpu_begin/end() must be called with preempt disabled
* and they don't touch the preempt state on their own.
* If you enable preemption after __kernel_fpu_begin(), preempt notifier
* should call the __kernel_fpu_end() to prevent the kernel/user FPU
* state from getting corrupted. KVM for example uses this model.
*
* All other cases use kernel_fpu_begin/end() which disable preemption
* during kernel FPU usage.
*/
extern void __kernel_fpu_begin(void);
extern void __kernel_fpu_end(void);
static inline void kernel_fpu_begin(void)
{
WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
__kernel_fpu_begin();
}
static inline void kernel_fpu_end(void)
{
__kernel_fpu_end();
preempt_enable();
}
/* /*
* Some instructions like VIA's padlock instructions generate a spurious * Some instructions like VIA's padlock instructions generate a spurious
......
...@@ -31,6 +31,10 @@ typedef struct { ...@@ -31,6 +31,10 @@ typedef struct {
unsigned long sig[_NSIG_WORDS]; unsigned long sig[_NSIG_WORDS];
} sigset_t; } sigset_t;
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
#else #else
/* Here we must cater to libcs that poke about in kernel headers. */ /* Here we must cater to libcs that poke about in kernel headers. */
......
...@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { ...@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
*/ */
#define XMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile( \
"movups %%xmm0,(%0) ;\n\t" \
"movups %%xmm1,0x10(%0) ;\n\t" \
"movups %%xmm2,0x20(%0) ;\n\t" \
"movups %%xmm3,0x30(%0) ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
} while (0)
#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%0),%%xmm0 ;\n\t" \
"movups 0x10(%0),%%xmm1 ;\n\t" \
"movups 0x20(%0),%%xmm2 ;\n\t" \
"movups 0x30(%0),%%xmm3 ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
write_cr0(cr0); \
preempt_enable(); \
} while (0)
#define ALIGN16 __attribute__((aligned(16)))
#define OFFS(x) "16*("#x")" #define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")" #define PF_OFFS(x) "256+16*("#x")"
#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
...@@ -587,10 +555,8 @@ static void ...@@ -587,10 +555,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) ...@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: :
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3) unsigned long *p3)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: :
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4) unsigned long *p3, unsigned long *p4)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: :
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5) unsigned long *p3, unsigned long *p4, unsigned long *p5)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
/* Make sure GCC forgets anything it knows about p4 or p5, /* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a such that it won't pass to the asm volatile below a
...@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
like assuming they have some legal value. */ like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5)); asm("" : "=r" (p4), "=r" (p5));
XMMS_RESTORE; kernel_fpu_end();
} }
static struct xor_block_template xor_block_pIII_sse = { static struct xor_block_template xor_block_pIII_sse = {
......
...@@ -34,41 +34,7 @@ ...@@ -34,41 +34,7 @@
* no advantages to be gotten from x86-64 here anyways. * no advantages to be gotten from x86-64 here anyways.
*/ */
typedef struct { #include <asm/i387.h>
unsigned long a, b;
} __attribute__((aligned(16))) xmm_store_t;
/* Doesn't use gcc to save the XMM registers, because there is no easy way to
tell it to do a clts before the register saving. */
#define XMMS_SAVE \
do { \
preempt_disable(); \
asm volatile( \
"movq %%cr0,%0 ;\n\t" \
"clts ;\n\t" \
"movups %%xmm0,(%1) ;\n\t" \
"movups %%xmm1,0x10(%1) ;\n\t" \
"movups %%xmm2,0x20(%1) ;\n\t" \
"movups %%xmm3,0x30(%1) ;\n\t" \
: "=&r" (cr0) \
: "r" (xmm_save) \
: "memory"); \
} while (0)
#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%1),%%xmm0 ;\n\t" \
"movups 0x10(%1),%%xmm1 ;\n\t" \
"movups 0x20(%1),%%xmm2 ;\n\t" \
"movups 0x30(%1),%%xmm3 ;\n\t" \
"movq %0,%%cr0 ;\n\t" \
: \
: "r" (cr0), "r" (xmm_save) \
: "memory"); \
preempt_enable(); \
} while (0)
#define OFFS(x) "16*("#x")" #define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")" #define PF_OFFS(x) "256+16*("#x")"
...@@ -91,10 +57,8 @@ static void ...@@ -91,10 +57,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
unsigned long cr0;
xmm_store_t xmm_save[4];
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) ...@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3) unsigned long *p3)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE;
kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
#define BLOCK(i) \ #define BLOCK(i) \
...@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4) unsigned long *p3, unsigned long *p4)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5) unsigned long *p3, unsigned long *p4, unsigned long *p5)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static struct xor_block_template xor_block_sse = { static struct xor_block_template xor_block_sse = {
......
...@@ -20,32 +20,6 @@ ...@@ -20,32 +20,6 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/i387.h> #include <asm/i387.h>
#define ALIGN32 __aligned(32)
#define YMM_SAVED_REGS 4
#define YMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
} while (0);
#define YMMS_RESTORE \
do { \
asm volatile("sfence" : : : "memory"); \
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
write_cr0(cr0); \
preempt_enable(); \
} while (0);
#define BLOCK4(i) \ #define BLOCK4(i) \
BLOCK(32 * i, 0) \ BLOCK(32 * i, 0) \
BLOCK(32 * (i + 1), 1) \ BLOCK(32 * (i + 1), 1) \
...@@ -60,10 +34,9 @@ do { \ ...@@ -60,10 +34,9 @@ do { \
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -82,16 +55,15 @@ do { \ ...@@ -82,16 +55,15 @@ do { \
p1 = (unsigned long *)((uintptr_t)p1 + 512); p1 = (unsigned long *)((uintptr_t)p1 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2) unsigned long *p2)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -113,16 +85,15 @@ do { \ ...@@ -113,16 +85,15 @@ do { \
p2 = (unsigned long *)((uintptr_t)p2 + 512); p2 = (unsigned long *)((uintptr_t)p2 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3) unsigned long *p2, unsigned long *p3)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -147,16 +118,15 @@ do { \ ...@@ -147,16 +118,15 @@ do { \
p3 = (unsigned long *)((uintptr_t)p3 + 512); p3 = (unsigned long *)((uintptr_t)p3 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3, unsigned long *p4) unsigned long *p2, unsigned long *p3, unsigned long *p4)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -184,7 +154,7 @@ do { \ ...@@ -184,7 +154,7 @@ do { \
p4 = (unsigned long *)((uintptr_t)p4 + 512); p4 = (unsigned long *)((uintptr_t)p4 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static struct xor_block_template xor_block_avx = { static struct xor_block_template xor_block_avx = {
......
...@@ -34,17 +34,14 @@ ...@@ -34,17 +34,14 @@
extern unsigned int xstate_size; extern unsigned int xstate_size;
extern u64 pcntxt_mask; extern u64 pcntxt_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern struct xsave_struct *init_xstate_buf;
extern void xsave_init(void); extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *sw);
static inline int fpu_xrstor_checking(struct fpu *fpu) static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{ {
struct xsave_struct *fx = &fpu->state->xsave;
int err; int err;
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
...@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) ...@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
* Clear the xsave header first, so that reserved fields are * Clear the xsave header first, so that reserved fields are
* initialized to zero. * initialized to zero.
*/ */
err = __clear_user(&buf->xsave_hdr, err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
sizeof(struct xsave_hdr_struct));
if (unlikely(err)) if (unlikely(err))
return -EFAULT; return -EFAULT;
...@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) ...@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
: [err] "=r" (err) : [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0) : "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory"); : "memory");
if (unlikely(err) && __clear_user(buf, xstate_size))
err = -EFAULT;
/* No need to clear here because the caller clears USED_MATH */
return err; return err;
} }
......
...@@ -165,10 +165,15 @@ void __init check_bugs(void) ...@@ -165,10 +165,15 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data); print_cpu_info(&boot_cpu_data);
#endif #endif
check_config(); check_config();
check_fpu();
check_hlt(); check_hlt();
check_popad(); check_popad();
init_utsname()->machine[1] = init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions(); alternative_instructions();
/*
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
check_fpu();
} }
...@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) ...@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs(); dbg_restore_debug_regs();
fpu_init(); fpu_init();
xsave_init();
if (is_uv_system()) if (is_uv_system())
uv_cpu_init(); uv_cpu_init();
...@@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void) ...@@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs(); dbg_restore_debug_regs();
fpu_init(); fpu_init();
xsave_init();
} }
#endif #endif
...@@ -19,24 +19,17 @@ ...@@ -19,24 +19,17 @@
#include <asm/fpu-internal.h> #include <asm/fpu-internal.h>
#include <asm/user.h> #include <asm/user.h>
#ifdef CONFIG_X86_64
# include <asm/sigcontext32.h>
# include <asm/user32.h>
#else
# define save_i387_xstate_ia32 save_i387_xstate
# define restore_i387_xstate_ia32 restore_i387_xstate
# define _fpstate_ia32 _fpstate
# define _xstate_ia32 _xstate
# define sig_xstate_ia32_size sig_xstate_size
# define fx_sw_reserved_ia32 fx_sw_reserved
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
#endif
/* /*
* Were we in an interrupt that interrupted kernel mode? * Were we in an interrupt that interrupted kernel mode?
* *
* We can do a kernel_fpu_begin/end() pair *ONLY* if that * For now, with eagerfpu we will return interrupted kernel FPU
* state as not-idle. TBD: Ideally we can change the return value
* to something like __thread_has_fpu(current). But we need to
* be careful of doing __thread_clear_has_fpu() before saving
* the FPU etc for supporting nested uses etc. For now, take
* the simple route!
*
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
* pair does nothing at all: the thread must not have fpu (so * pair does nothing at all: the thread must not have fpu (so
* that we don't try to save the FPU state), and TS must * that we don't try to save the FPU state), and TS must
* be set (so that the clts/stts pair does nothing that is * be set (so that the clts/stts pair does nothing that is
...@@ -44,6 +37,9 @@ ...@@ -44,6 +37,9 @@
*/ */
static inline bool interrupted_kernel_fpu_idle(void) static inline bool interrupted_kernel_fpu_idle(void)
{ {
if (use_eager_fpu())
return 0;
return !__thread_has_fpu(current) && return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS); (read_cr0() & X86_CR0_TS);
} }
...@@ -77,29 +73,29 @@ bool irq_fpu_usable(void) ...@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
} }
EXPORT_SYMBOL(irq_fpu_usable); EXPORT_SYMBOL(irq_fpu_usable);
void kernel_fpu_begin(void) void __kernel_fpu_begin(void)
{ {
struct task_struct *me = current; struct task_struct *me = current;
WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
if (__thread_has_fpu(me)) { if (__thread_has_fpu(me)) {
__save_init_fpu(me); __save_init_fpu(me);
__thread_clear_has_fpu(me); __thread_clear_has_fpu(me);
/* We do 'stts()' in kernel_fpu_end() */ /* We do 'stts()' in __kernel_fpu_end() */
} else { } else if (!use_eager_fpu()) {
this_cpu_write(fpu_owner_task, NULL); this_cpu_write(fpu_owner_task, NULL);
clts(); clts();
} }
} }
EXPORT_SYMBOL(kernel_fpu_begin); EXPORT_SYMBOL(__kernel_fpu_begin);
void kernel_fpu_end(void) void __kernel_fpu_end(void)
{ {
if (use_eager_fpu())
math_state_restore();
else
stts(); stts();
preempt_enable();
} }
EXPORT_SYMBOL(kernel_fpu_end); EXPORT_SYMBOL(__kernel_fpu_end);
void unlazy_fpu(struct task_struct *tsk) void unlazy_fpu(struct task_struct *tsk)
{ {
...@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) ...@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
} }
EXPORT_SYMBOL(unlazy_fpu); EXPORT_SYMBOL(unlazy_fpu);
#ifdef CONFIG_MATH_EMULATION unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
# define HAVE_HWFP (boot_cpu_data.hard_math)
#else
# define HAVE_HWFP 1
#endif
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size; unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size); EXPORT_SYMBOL_GPL(xstate_size);
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata; static struct i387_fxsave_struct fx_scratch __cpuinitdata;
static void __cpuinit mxcsr_feature_mask_init(void) static void __cpuinit mxcsr_feature_mask_init(void)
{ {
unsigned long mask = 0; unsigned long mask = 0;
clts();
if (cpu_has_fxsr) { if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
asm volatile("fxsave %0" : : "m" (fx_scratch)); asm volatile("fxsave %0" : : "m" (fx_scratch));
...@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) ...@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
mask = 0x0000ffbf; mask = 0x0000ffbf;
} }
mxcsr_feature_mask &= mask; mxcsr_feature_mask &= mask;
stts();
} }
static void __cpuinit init_thread_xstate(void) static void __cpuinit init_thread_xstate(void)
...@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) ...@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
init_thread_xstate(); init_thread_xstate();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
/* clean state in init */ xsave_init();
current_thread_info()->status = 0; eager_fpu_init();
clear_used_math();
} }
void fpu_finit(struct fpu *fpu) void fpu_finit(struct fpu *fpu)
...@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) ...@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
} }
if (cpu_has_fxsr) { if (cpu_has_fxsr) {
struct i387_fxsave_struct *fx = &fpu->state->fxsave; fx_finit(&fpu->state->fxsave);
memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
if (cpu_has_xmm)
fx->mxcsr = MXCSR_DEFAULT;
} else { } else {
struct i387_fsave_struct *fp = &fpu->state->fsave; struct i387_fsave_struct *fp = &fpu->state->fsave;
memset(fp, 0, xstate_size); memset(fp, 0, xstate_size);
...@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) ...@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
* FXSR floating point environment conversions. * FXSR floating point environment conversions.
*/ */
static void void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{ {
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
...@@ -491,7 +472,7 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) ...@@ -491,7 +472,7 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0])); memcpy(&to[i], &from[i], sizeof(to[0]));
} }
static void convert_to_fxsr(struct task_struct *tsk, void convert_to_fxsr(struct task_struct *tsk,
const struct user_i387_ia32_struct *env) const struct user_i387_ia32_struct *env)
{ {
...@@ -588,223 +569,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -588,223 +569,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
return ret; return ret;
} }
/*
* Signal frame handlers.
*/
static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
fp->status = fp->swd;
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
return -1;
return 1;
}
static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
struct user_i387_ia32_struct env;
int err = 0;
convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env)))
return -1;
err |= __put_user(fx->swd, &buf->status);
err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
if (err)
return -1;
if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
return -1;
return 1;
}
static int save_i387_xsave(void __user *buf)
{
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fx = buf;
int err = 0;
sanitize_i387_state(tsk);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context.
* This will enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware applications can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
if (save_i387_fxsave(fx) < 0)
return -1;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
sizeof(struct _fpx_sw_bytes));
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_ia32_size
- FP_XSTATE_MAGIC2_SIZE));
if (err)
return -1;
return 1;
}
int save_i387_xstate_ia32(void __user *buf)
{
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
struct task_struct *tsk = current;
if (!used_math())
return 0;
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
return -EACCES;
/*
* This will cause a "finit" to be triggered by the next
* attempted FPU operation by the 'current' process.
*/
clear_used_math();
if (!HAVE_HWFP) {
return fpregs_soft_get(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, fp) ? -1 : 1;
}
unlazy_fpu(tsk);
if (cpu_has_xsave)
return save_i387_xsave(fp);
if (cpu_has_fxsr)
return save_i387_fxsave(fp);
else
return save_i387_fsave(fp);
}
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
sizeof(struct i387_fsave_struct));
}
static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
unsigned int size)
{
struct task_struct *tsk = current;
struct user_i387_ia32_struct env;
int err;
err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
size);
/* mxcsr reserved bits must be masked to zero for security reasons */
tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
if (err || __copy_from_user(&env, buf, sizeof(env)))
return 1;
convert_to_fxsr(tsk, &env);
return 0;
}
static int restore_i387_xsave(void __user *buf)
{
struct _fpx_sw_bytes fx_sw_user;
struct _fpstate_ia32 __user *fx_user =
((struct _fpstate_ia32 __user *) buf);
struct i387_fxsave_struct __user *fx =
(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
struct xsave_hdr_struct *xsave_hdr =
&current->thread.fpu.state->xsave.xsave_hdr;
u64 mask;
int err;
if (check_for_xstate(fx, buf, &fx_sw_user))
goto fx_only;
mask = fx_sw_user.xstate_bv;
err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
xsave_hdr->xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
/*
* Init the state that is not present in the memory layout
* and enabled by the OS.
*/
mask = ~(pcntxt_mask & ~mask);
xsave_hdr->xstate_bv &= mask;
return err;
fx_only:
/*
* Couldn't find the extended state information in the memory
* layout. Restore the FP/SSE and init the other extended state
* enabled by the OS.
*/
xsave_hdr->xstate_bv = XSTATE_FPSSE;
return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
}
int restore_i387_xstate_ia32(void __user *buf)
{
int err;
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
if (HAVE_HWFP)
clear_fpu(tsk);
if (!buf) {
if (used_math()) {
clear_fpu(tsk);
clear_used_math();
}
return 0;
} else
if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
return -EACCES;
if (!used_math()) {
err = init_fpu(tsk);
if (err)
return err;
}
if (HAVE_HWFP) {
if (cpu_has_xsave)
err = restore_i387_xsave(buf);
else if (cpu_has_fxsr)
err = restore_i387_fxsave(fp, sizeof(struct
i387_fxsave_struct));
else
err = restore_i387_fsave(fp);
} else {
err = fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, fp) != 0;
}
set_used_math();
return err;
}
/* /*
* FPU state for core dumps. * FPU state for core dumps.
* This is only used for a.out dumps now. * This is only used for a.out dumps now.
......
...@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) ...@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{ {
int ret; int ret;
unlazy_fpu(src);
*dst = *src; *dst = *src;
if (fpu_allocated(&src->thread.fpu)) { if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
ret = fpu_alloc(&dst->thread.fpu); ret = fpu_alloc(&dst->thread.fpu);
if (ret) if (ret)
return ret; return ret;
fpu_copy(&dst->thread.fpu, &src->thread.fpu); fpu_copy(dst, src);
} }
return 0; return 0;
} }
...@@ -97,16 +95,6 @@ void arch_task_cache_init(void) ...@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
SLAB_PANIC | SLAB_NOTRACK, NULL); SLAB_PANIC | SLAB_NOTRACK, NULL);
} }
static inline void drop_fpu(struct task_struct *tsk)
{
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
/* /*
* Free current thread data structures etc.. * Free current thread data structures etc..
*/ */
...@@ -163,7 +151,13 @@ void flush_thread(void) ...@@ -163,7 +151,13 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk); flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
drop_fpu(tsk); drop_init_fpu(tsk);
/*
* Free the FPU state for non xsave platforms. They get reallocated
* lazily at the first use.
*/
if (!use_eager_fpu())
free_thread_xstate(tsk);
} }
static void hard_disable_TSC(void) static void hard_disable_TSC(void)
......
...@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) ...@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS; regs->cs = __USER_CS;
regs->ip = new_ip; regs->ip = new_ip;
regs->sp = new_sp; regs->sp = new_sp;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
} }
EXPORT_SYMBOL_GPL(start_thread); EXPORT_SYMBOL_GPL(start_thread);
......
...@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, ...@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
regs->cs = _cs; regs->cs = _cs;
regs->ss = _ss; regs->ss = _ss;
regs->flags = X86_EFLAGS_IF; regs->flags = X86_EFLAGS_IF;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
} }
void void
......
...@@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { ...@@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
#define genregs32_get genregs_get #define genregs32_get genregs_get
#define genregs32_set genregs_set #define genregs32_set genregs_set
#define user_i387_ia32_struct user_i387_struct
#define user32_fxsr_struct user_fxsr_struct
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
...@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, ...@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
regs->orig_ax = -1; /* disable syscall checks */ regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate); get_user_ex(buf, &sc->fpstate);
err |= restore_i387_xstate(buf); err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
get_user_ex(*pax, &sc->ax); get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
...@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate) void __user **fpstate)
{ {
/* Default to using normal stack */ /* Default to using normal stack */
unsigned long math_size = 0;
unsigned long sp = regs->sp; unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp); int onsigstack = on_sig_stack(sp);
#ifdef CONFIG_X86_64
/* redzone */ /* redzone */
if (config_enabled(CONFIG_X86_64))
sp -= 128; sp -= 128;
#endif /* CONFIG_X86_64 */
if (!onsigstack) { if (!onsigstack) {
/* This is the X/Open sanctioned signal stack switching. */ /* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) { if (ka->sa.sa_flags & SA_ONSTACK) {
if (current->sas_ss_size) if (current->sas_ss_size)
sp = current->sas_ss_sp + current->sas_ss_size; sp = current->sas_ss_sp + current->sas_ss_size;
} else { } else if (config_enabled(CONFIG_X86_32) &&
#ifdef CONFIG_X86_32 (regs->ss & 0xffff) != __USER_DS &&
/* This is the legacy signal stack switching. */
if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) && !(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) ka->sa.sa_restorer) {
/* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
#endif /* CONFIG_X86_32 */
} }
} }
if (used_math()) { if (used_math()) {
sp -= sig_xstate_size; sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
#ifdef CONFIG_X86_64 &buf_fx, &math_size);
sp = round_down(sp, 64);
#endif /* CONFIG_X86_64 */
*fpstate = (void __user *)sp; *fpstate = (void __user *)sp;
} }
...@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (onsigstack && !likely(on_sig_stack(sp))) if (onsigstack && !likely(on_sig_stack(sp)))
return (void __user *)-1L; return (void __user *)-1L;
/* save i387 state */ /* save i387 and extended state */
if (used_math() && save_i387_xstate(*fpstate) < 0) if (used_math() &&
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L; return (void __user *)-1L;
return (void __user *)sp; return (void __user *)sp;
...@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
} }
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
#endif /* CONFIG_X86_X32_ABI */
return 0;
}
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* /*
* Atomically swap in the new signal mask, and wait for a signal. * Atomically swap in the new signal mask, and wait for a signal.
...@@ -612,55 +678,22 @@ static int signr_convert(int sig) ...@@ -612,55 +678,22 @@ static int signr_convert(int sig)
return sig; return sig;
} }
#ifdef CONFIG_X86_32
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
#else /* !CONFIG_X86_32 */
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else /* !CONFIG_IA32_EMULATION */
#define is_ia32 0
#endif /* CONFIG_IA32_EMULATION */
#ifdef CONFIG_X86_X32_ABI
#define is_x32 test_thread_flag(TIF_X32)
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs);
#else /* !CONFIG_X86_X32_ABI */
#define is_x32 0
#endif /* CONFIG_X86_X32_ABI */
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs);
int ia32_setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
static int static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs) struct pt_regs *regs)
{ {
int usig = signr_convert(sig); int usig = signr_convert(sig);
sigset_t *set = sigmask_to_save(); sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */ /* Set up the stack frame */
if (is_ia32) { if (is_ia32_frame()) {
if (ka->sa.sa_flags & SA_SIGINFO) if (ka->sa.sa_flags & SA_SIGINFO)
return ia32_setup_rt_frame(usig, ka, info, set, regs); return ia32_setup_rt_frame(usig, ka, info, cset, regs);
else else
return ia32_setup_frame(usig, ka, set, regs); return ia32_setup_frame(usig, ka, cset, regs);
#ifdef CONFIG_X86_X32_ABI } else if (is_x32_frame()) {
} else if (is_x32) { return x32_setup_rt_frame(usig, ka, info, cset, regs);
return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
} else { } else {
return __setup_rt_frame(sig, ka, info, set, regs); return __setup_rt_frame(sig, ka, info, set, regs);
} }
...@@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) ...@@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
} }
#ifdef CONFIG_X86_X32_ABI #ifdef CONFIG_X86_X32_ABI
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
return 0;
}
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
{ {
struct rt_sigframe_x32 __user *frame; struct rt_sigframe_x32 __user *frame;
......
...@@ -628,11 +628,12 @@ void math_state_restore(void) ...@@ -628,11 +628,12 @@ void math_state_restore(void)
} }
__thread_fpu_begin(tsk); __thread_fpu_begin(tsk);
/* /*
* Paranoid restore. send a SIGSEGV if we fail to restore the state. * Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/ */
if (unlikely(restore_fpu_checking(tsk))) { if (unlikely(restore_fpu_checking(tsk))) {
__thread_fpu_end(tsk); drop_init_fpu(tsk);
force_sig(SIGSEGV, tsk); force_sig(SIGSEGV, tsk);
return; return;
} }
...@@ -645,6 +646,8 @@ dotraplinkage void __kprobes ...@@ -645,6 +646,8 @@ dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code) do_device_not_available(struct pt_regs *regs, long error_code)
{ {
exception_enter(regs); exception_enter(regs);
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION #ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) { if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { }; struct math_emu_info info = { };
......
This diff is collapsed.
...@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ...@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif #endif
if (user_has_fpu()) /*
clts(); * If the FPU is not active (through the host task or
* the guest vcpu), then restore the cr0.TS bit.
*/
if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
stts();
load_gdt(&__get_cpu_var(host_gdt)); load_gdt(&__get_cpu_var(host_gdt));
} }
...@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void) ...@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void)
unsigned long tmpl; unsigned long tmpl;
struct desc_ptr dt; struct desc_ptr dt;
vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
......
...@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/ */
kvm_put_guest_xcr0(vcpu); kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1; vcpu->guest_fpu_loaded = 1;
unlazy_fpu(current); __kernel_fpu_begin();
fpu_restore_checking(&vcpu->arch.guest_fpu); fpu_restore_checking(&vcpu->arch.guest_fpu);
trace_kvm_fpu(1); trace_kvm_fpu(1);
} }
...@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
vcpu->guest_fpu_loaded = 0; vcpu->guest_fpu_loaded = 0;
fpu_save_init(&vcpu->arch.guest_fpu); fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload; ++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0); trace_kvm_fpu(0);
......
...@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* we set it now, so we can trap and pass that trap to the Guest if it * we set it now, so we can trap and pass that trap to the Guest if it
* uses the FPU. * uses the FPU.
*/ */
if (cpu->ts) if (cpu->ts && user_has_fpu())
unlazy_fpu(current); stts();
/* /*
* SYSENTER is an optimized way of doing system calls. We can't allow * SYSENTER is an optimized way of doing system calls. We can't allow
...@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
if (boot_cpu_has(X86_FEATURE_SEP)) if (boot_cpu_has(X86_FEATURE_SEP))
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
/* Clear the host TS bit if it was set above. */
if (cpu->ts && user_has_fpu())
clts();
/* /*
* If the Guest page faulted, then the cr2 register will tell us the * If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we * bad virtual address. We have to grab this now, because once we
...@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* a different CPU. So all the critical stuff should be done * a different CPU. So all the critical stuff should be done
* before this. * before this.
*/ */
else if (cpu->regs->trapnum == 7) else if (cpu->regs->trapnum == 7 && !user_has_fpu())
math_state_restore(); math_state_restore();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment