Commit ac07f5c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/fpu update from Ingo Molnar:
 "The biggest change is the addition of the non-lazy (eager) FPU saving
  support model and enabling it on CPUs with optimized xsaveopt/xrstor
  FPU state saving instructions.

  There are also various Sparse fixes"

Fix up trivial add-add conflict in arch/x86/kernel/traps.c

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, kvm: fix kvm's usage of kernel_fpu_begin/end()
  x86, fpu: remove cpu_has_xmm check in the fx_finit()
  x86, fpu: make eagerfpu= boot param tri-state
  x86, fpu: enable eagerfpu by default for xsaveopt
  x86, fpu: decouple non-lazy/eager fpu restore from xsave
  x86, fpu: use non-lazy fpu restore for processors supporting xsave
  lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models
  x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage
  x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()
  x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()
  x86, fpu: drop_fpu() before restoring new state from sigframe
  x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels
  x86, fpu: Consolidate inline asm routines for saving/restoring fpu state
  x86, signal: Cleanup ifdefs and is_ia32, is_x32
parents 3b29b03a b1a74bf8
...@@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
and restore using xsave. The kernel will fallback to and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state. enabling legacy floating-point and sse state.
eagerfpu= [X86]
on enable eager fpu restore
off disable eager fpu restore
auto selects the default scheme, which automatically
enables eagerfpu restore for xsaveopt.
nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
wfi(ARM) instruction doesn't work correctly and not to wfi(ARM) instruction doesn't work correctly and not to
use it. This is also useful when using JTAG debugger. use it. This is also useful when using JTAG debugger.
......
...@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, ...@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate); get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp); buf = compat_ptr(tmp);
err |= restore_i387_xstate_ia32(buf); err |= restore_xstate_sig(buf, 1);
get_user_ex(*pax, &sc->ax); get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
...@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ...@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
if (used_math()) { if (used_math()) {
sp = sp - sig_xstate_ia32_size; unsigned long fx_aligned, math_size;
sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
*fpstate = (struct _fpstate_ia32 __user *) sp; *fpstate = (struct _fpstate_ia32 __user *) sp;
if (save_i387_xstate_ia32(*fpstate) < 0) if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
math_size) < 0)
return (void __user *) -1L; return (void __user *) -1L;
} }
......
...@@ -97,6 +97,7 @@ ...@@ -97,6 +97,7 @@
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
...@@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32]; ...@@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1 # define cpu_has_invlpg 1
......
This diff is collapsed.
...@@ -19,12 +19,37 @@ struct pt_regs; ...@@ -19,12 +19,37 @@ struct pt_regs;
struct user_i387_struct; struct user_i387_struct;
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern void fpu_finit(struct fpu *fpu);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern void math_state_restore(void); extern void math_state_restore(void);
extern bool irq_fpu_usable(void); extern bool irq_fpu_usable(void);
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void); /*
* Careful: __kernel_fpu_begin/end() must be called with preempt disabled
* and they don't touch the preempt state on their own.
* If you enable preemption after __kernel_fpu_begin(), preempt notifier
* should call the __kernel_fpu_end() to prevent the kernel/user FPU
* state from getting corrupted. KVM for example uses this model.
*
* All other cases use kernel_fpu_begin/end() which disable preemption
* during kernel FPU usage.
*/
extern void __kernel_fpu_begin(void);
extern void __kernel_fpu_end(void);
static inline void kernel_fpu_begin(void)
{
WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
__kernel_fpu_begin();
}
static inline void kernel_fpu_end(void)
{
__kernel_fpu_end();
preempt_enable();
}
/* /*
* Some instructions like VIA's padlock instructions generate a spurious * Some instructions like VIA's padlock instructions generate a spurious
......
...@@ -31,6 +31,10 @@ typedef struct { ...@@ -31,6 +31,10 @@ typedef struct {
unsigned long sig[_NSIG_WORDS]; unsigned long sig[_NSIG_WORDS];
} sigset_t; } sigset_t;
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
#else #else
/* Here we must cater to libcs that poke about in kernel headers. */ /* Here we must cater to libcs that poke about in kernel headers. */
......
...@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { ...@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
*/ */
#define XMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile( \
"movups %%xmm0,(%0) ;\n\t" \
"movups %%xmm1,0x10(%0) ;\n\t" \
"movups %%xmm2,0x20(%0) ;\n\t" \
"movups %%xmm3,0x30(%0) ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
} while (0)
#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%0),%%xmm0 ;\n\t" \
"movups 0x10(%0),%%xmm1 ;\n\t" \
"movups 0x20(%0),%%xmm2 ;\n\t" \
"movups 0x30(%0),%%xmm3 ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
write_cr0(cr0); \
preempt_enable(); \
} while (0)
#define ALIGN16 __attribute__((aligned(16)))
#define OFFS(x) "16*("#x")" #define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")" #define PF_OFFS(x) "256+16*("#x")"
#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
...@@ -587,10 +555,8 @@ static void ...@@ -587,10 +555,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) ...@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: :
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3) unsigned long *p3)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: :
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4) unsigned long *p3, unsigned long *p4)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: :
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5) unsigned long *p3, unsigned long *p4, unsigned long *p5)
{ {
unsigned long lines = bytes >> 8; unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;
XMMS_SAVE; kernel_fpu_begin();
/* Make sure GCC forgets anything it knows about p4 or p5, /* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a such that it won't pass to the asm volatile below a
...@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
like assuming they have some legal value. */ like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5)); asm("" : "=r" (p4), "=r" (p5));
XMMS_RESTORE; kernel_fpu_end();
} }
static struct xor_block_template xor_block_pIII_sse = { static struct xor_block_template xor_block_pIII_sse = {
......
...@@ -34,41 +34,7 @@ ...@@ -34,41 +34,7 @@
* no advantages to be gotten from x86-64 here anyways. * no advantages to be gotten from x86-64 here anyways.
*/ */
typedef struct { #include <asm/i387.h>
unsigned long a, b;
} __attribute__((aligned(16))) xmm_store_t;
/* Doesn't use gcc to save the XMM registers, because there is no easy way to
tell it to do a clts before the register saving. */
#define XMMS_SAVE \
do { \
preempt_disable(); \
asm volatile( \
"movq %%cr0,%0 ;\n\t" \
"clts ;\n\t" \
"movups %%xmm0,(%1) ;\n\t" \
"movups %%xmm1,0x10(%1) ;\n\t" \
"movups %%xmm2,0x20(%1) ;\n\t" \
"movups %%xmm3,0x30(%1) ;\n\t" \
: "=&r" (cr0) \
: "r" (xmm_save) \
: "memory"); \
} while (0)
#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%1),%%xmm0 ;\n\t" \
"movups 0x10(%1),%%xmm1 ;\n\t" \
"movups 0x20(%1),%%xmm2 ;\n\t" \
"movups 0x30(%1),%%xmm3 ;\n\t" \
"movq %0,%%cr0 ;\n\t" \
: \
: "r" (cr0), "r" (xmm_save) \
: "memory"); \
preempt_enable(); \
} while (0)
#define OFFS(x) "16*("#x")" #define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")" #define PF_OFFS(x) "256+16*("#x")"
...@@ -91,10 +57,8 @@ static void ...@@ -91,10 +57,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
unsigned long cr0;
xmm_store_t xmm_save[4];
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) ...@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3) unsigned long *p3)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE;
kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
#define BLOCK(i) \ #define BLOCK(i) \
...@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4) unsigned long *p3, unsigned long *p4)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory" ); : "memory" );
XMMS_RESTORE; kernel_fpu_end();
} }
static void static void
...@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5) unsigned long *p3, unsigned long *p4, unsigned long *p5)
{ {
unsigned int lines = bytes >> 8; unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;
XMMS_SAVE; kernel_fpu_begin();
asm volatile( asm volatile(
#undef BLOCK #undef BLOCK
...@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL) : [inc] "r" (256UL)
: "memory"); : "memory");
XMMS_RESTORE; kernel_fpu_end();
} }
static struct xor_block_template xor_block_sse = { static struct xor_block_template xor_block_sse = {
......
...@@ -20,32 +20,6 @@ ...@@ -20,32 +20,6 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/i387.h> #include <asm/i387.h>
#define ALIGN32 __aligned(32)
#define YMM_SAVED_REGS 4
#define YMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
} while (0);
#define YMMS_RESTORE \
do { \
asm volatile("sfence" : : : "memory"); \
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
write_cr0(cr0); \
preempt_enable(); \
} while (0);
#define BLOCK4(i) \ #define BLOCK4(i) \
BLOCK(32 * i, 0) \ BLOCK(32 * i, 0) \
BLOCK(32 * (i + 1), 1) \ BLOCK(32 * (i + 1), 1) \
...@@ -60,10 +34,9 @@ do { \ ...@@ -60,10 +34,9 @@ do { \
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -82,16 +55,15 @@ do { \ ...@@ -82,16 +55,15 @@ do { \
p1 = (unsigned long *)((uintptr_t)p1 + 512); p1 = (unsigned long *)((uintptr_t)p1 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2) unsigned long *p2)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -113,16 +85,15 @@ do { \ ...@@ -113,16 +85,15 @@ do { \
p2 = (unsigned long *)((uintptr_t)p2 + 512); p2 = (unsigned long *)((uintptr_t)p2 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3) unsigned long *p2, unsigned long *p3)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -147,16 +118,15 @@ do { \ ...@@ -147,16 +118,15 @@ do { \
p3 = (unsigned long *)((uintptr_t)p3 + 512); p3 = (unsigned long *)((uintptr_t)p3 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3, unsigned long *p4) unsigned long *p2, unsigned long *p3, unsigned long *p4)
{ {
unsigned long cr0, lines = bytes >> 9; unsigned long lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE kernel_fpu_begin();
while (lines--) { while (lines--) {
#undef BLOCK #undef BLOCK
...@@ -184,7 +154,7 @@ do { \ ...@@ -184,7 +154,7 @@ do { \
p4 = (unsigned long *)((uintptr_t)p4 + 512); p4 = (unsigned long *)((uintptr_t)p4 + 512);
} }
YMMS_RESTORE kernel_fpu_end();
} }
static struct xor_block_template xor_block_avx = { static struct xor_block_template xor_block_avx = {
......
...@@ -34,17 +34,14 @@ ...@@ -34,17 +34,14 @@
extern unsigned int xstate_size; extern unsigned int xstate_size;
extern u64 pcntxt_mask; extern u64 pcntxt_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern struct xsave_struct *init_xstate_buf;
extern void xsave_init(void); extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *sw);
static inline int fpu_xrstor_checking(struct fpu *fpu) static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{ {
struct xsave_struct *fx = &fpu->state->xsave;
int err; int err;
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
...@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) ...@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
* Clear the xsave header first, so that reserved fields are * Clear the xsave header first, so that reserved fields are
* initialized to zero. * initialized to zero.
*/ */
err = __clear_user(&buf->xsave_hdr, err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
sizeof(struct xsave_hdr_struct));
if (unlikely(err)) if (unlikely(err))
return -EFAULT; return -EFAULT;
...@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) ...@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
: [err] "=r" (err) : [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0) : "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory"); : "memory");
if (unlikely(err) && __clear_user(buf, xstate_size))
err = -EFAULT;
/* No need to clear here because the caller clears USED_MATH */
return err; return err;
} }
......
...@@ -165,10 +165,15 @@ void __init check_bugs(void) ...@@ -165,10 +165,15 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data); print_cpu_info(&boot_cpu_data);
#endif #endif
check_config(); check_config();
check_fpu();
check_hlt(); check_hlt();
check_popad(); check_popad();
init_utsname()->machine[1] = init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions(); alternative_instructions();
/*
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
check_fpu();
} }
...@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) ...@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs(); dbg_restore_debug_regs();
fpu_init(); fpu_init();
xsave_init();
if (is_uv_system()) if (is_uv_system())
uv_cpu_init(); uv_cpu_init();
...@@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void) ...@@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs(); dbg_restore_debug_regs();
fpu_init(); fpu_init();
xsave_init();
} }
#endif #endif
This diff is collapsed.
...@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) ...@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{ {
int ret; int ret;
unlazy_fpu(src);
*dst = *src; *dst = *src;
if (fpu_allocated(&src->thread.fpu)) { if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
ret = fpu_alloc(&dst->thread.fpu); ret = fpu_alloc(&dst->thread.fpu);
if (ret) if (ret)
return ret; return ret;
fpu_copy(&dst->thread.fpu, &src->thread.fpu); fpu_copy(dst, src);
} }
return 0; return 0;
} }
...@@ -97,16 +95,6 @@ void arch_task_cache_init(void) ...@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
SLAB_PANIC | SLAB_NOTRACK, NULL); SLAB_PANIC | SLAB_NOTRACK, NULL);
} }
static inline void drop_fpu(struct task_struct *tsk)
{
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
/* /*
* Free current thread data structures etc.. * Free current thread data structures etc..
*/ */
...@@ -163,7 +151,13 @@ void flush_thread(void) ...@@ -163,7 +151,13 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk); flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
drop_fpu(tsk); drop_init_fpu(tsk);
/*
* Free the FPU state for non xsave platforms. They get reallocated
* lazily at the first use.
*/
if (!use_eager_fpu())
free_thread_xstate(tsk);
} }
static void hard_disable_TSC(void) static void hard_disable_TSC(void)
......
...@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) ...@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS; regs->cs = __USER_CS;
regs->ip = new_ip; regs->ip = new_ip;
regs->sp = new_sp; regs->sp = new_sp;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
} }
EXPORT_SYMBOL_GPL(start_thread); EXPORT_SYMBOL_GPL(start_thread);
......
...@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, ...@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
regs->cs = _cs; regs->cs = _cs;
regs->ss = _ss; regs->ss = _ss;
regs->flags = X86_EFLAGS_IF; regs->flags = X86_EFLAGS_IF;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
} }
void void
......
...@@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { ...@@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
#define genregs32_get genregs_get #define genregs32_get genregs_get
#define genregs32_set genregs_set #define genregs32_set genregs_set
#define user_i387_ia32_struct user_i387_struct
#define user32_fxsr_struct user_fxsr_struct
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
...@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, ...@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
regs->orig_ax = -1; /* disable syscall checks */ regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate); get_user_ex(buf, &sc->fpstate);
err |= restore_i387_xstate(buf); err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
get_user_ex(*pax, &sc->ax); get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
...@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate) void __user **fpstate)
{ {
/* Default to using normal stack */ /* Default to using normal stack */
unsigned long math_size = 0;
unsigned long sp = regs->sp; unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp); int onsigstack = on_sig_stack(sp);
#ifdef CONFIG_X86_64
/* redzone */ /* redzone */
sp -= 128; if (config_enabled(CONFIG_X86_64))
#endif /* CONFIG_X86_64 */ sp -= 128;
if (!onsigstack) { if (!onsigstack) {
/* This is the X/Open sanctioned signal stack switching. */ /* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) { if (ka->sa.sa_flags & SA_ONSTACK) {
if (current->sas_ss_size) if (current->sas_ss_size)
sp = current->sas_ss_sp + current->sas_ss_size; sp = current->sas_ss_sp + current->sas_ss_size;
} else { } else if (config_enabled(CONFIG_X86_32) &&
#ifdef CONFIG_X86_32 (regs->ss & 0xffff) != __USER_DS &&
/* This is the legacy signal stack switching. */ !(ka->sa.sa_flags & SA_RESTORER) &&
if ((regs->ss & 0xffff) != __USER_DS && ka->sa.sa_restorer) {
!(ka->sa.sa_flags & SA_RESTORER) && /* This is the legacy signal stack switching. */
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
#endif /* CONFIG_X86_32 */
} }
} }
if (used_math()) { if (used_math()) {
sp -= sig_xstate_size; sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
#ifdef CONFIG_X86_64 &buf_fx, &math_size);
sp = round_down(sp, 64);
#endif /* CONFIG_X86_64 */
*fpstate = (void __user *)sp; *fpstate = (void __user *)sp;
} }
...@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (onsigstack && !likely(on_sig_stack(sp))) if (onsigstack && !likely(on_sig_stack(sp)))
return (void __user *)-1L; return (void __user *)-1L;
/* save i387 state */ /* save i387 and extended state */
if (used_math() && save_i387_xstate(*fpstate) < 0) if (used_math() &&
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L; return (void __user *)-1L;
return (void __user *)sp; return (void __user *)sp;
...@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
} }
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
#endif /* CONFIG_X86_X32_ABI */
return 0;
}
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* /*
* Atomically swap in the new signal mask, and wait for a signal. * Atomically swap in the new signal mask, and wait for a signal.
...@@ -612,55 +678,22 @@ static int signr_convert(int sig) ...@@ -612,55 +678,22 @@ static int signr_convert(int sig)
return sig; return sig;
} }
#ifdef CONFIG_X86_32
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
#else /* !CONFIG_X86_32 */
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else /* !CONFIG_IA32_EMULATION */
#define is_ia32 0
#endif /* CONFIG_IA32_EMULATION */
#ifdef CONFIG_X86_X32_ABI
#define is_x32 test_thread_flag(TIF_X32)
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs);
#else /* !CONFIG_X86_X32_ABI */
#define is_x32 0
#endif /* CONFIG_X86_X32_ABI */
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs);
int ia32_setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
static int static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs) struct pt_regs *regs)
{ {
int usig = signr_convert(sig); int usig = signr_convert(sig);
sigset_t *set = sigmask_to_save(); sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */ /* Set up the stack frame */
if (is_ia32) { if (is_ia32_frame()) {
if (ka->sa.sa_flags & SA_SIGINFO) if (ka->sa.sa_flags & SA_SIGINFO)
return ia32_setup_rt_frame(usig, ka, info, set, regs); return ia32_setup_rt_frame(usig, ka, info, cset, regs);
else else
return ia32_setup_frame(usig, ka, set, regs); return ia32_setup_frame(usig, ka, cset, regs);
#ifdef CONFIG_X86_X32_ABI } else if (is_x32_frame()) {
} else if (is_x32) { return x32_setup_rt_frame(usig, ka, info, cset, regs);
return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
} else { } else {
return __setup_rt_frame(sig, ka, info, set, regs); return __setup_rt_frame(sig, ka, info, set, regs);
} }
...@@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) ...@@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
} }
#ifdef CONFIG_X86_X32_ABI #ifdef CONFIG_X86_X32_ABI
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
return 0;
}
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
{ {
struct rt_sigframe_x32 __user *frame; struct rt_sigframe_x32 __user *frame;
......
...@@ -628,11 +628,12 @@ void math_state_restore(void) ...@@ -628,11 +628,12 @@ void math_state_restore(void)
} }
__thread_fpu_begin(tsk); __thread_fpu_begin(tsk);
/* /*
* Paranoid restore. send a SIGSEGV if we fail to restore the state. * Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/ */
if (unlikely(restore_fpu_checking(tsk))) { if (unlikely(restore_fpu_checking(tsk))) {
__thread_fpu_end(tsk); drop_init_fpu(tsk);
force_sig(SIGSEGV, tsk); force_sig(SIGSEGV, tsk);
return; return;
} }
...@@ -645,6 +646,8 @@ dotraplinkage void __kprobes ...@@ -645,6 +646,8 @@ dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code) do_device_not_available(struct pt_regs *regs, long error_code)
{ {
exception_enter(regs); exception_enter(regs);
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION #ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) { if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { }; struct math_emu_info info = { };
......
This diff is collapsed.
...@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ...@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif #endif
if (user_has_fpu()) /*
clts(); * If the FPU is not active (through the host task or
* the guest vcpu), then restore the cr0.TS bit.
*/
if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
stts();
load_gdt(&__get_cpu_var(host_gdt)); load_gdt(&__get_cpu_var(host_gdt));
} }
...@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void) ...@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void)
unsigned long tmpl; unsigned long tmpl;
struct desc_ptr dt; struct desc_ptr dt;
vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
......
...@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/ */
kvm_put_guest_xcr0(vcpu); kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1; vcpu->guest_fpu_loaded = 1;
unlazy_fpu(current); __kernel_fpu_begin();
fpu_restore_checking(&vcpu->arch.guest_fpu); fpu_restore_checking(&vcpu->arch.guest_fpu);
trace_kvm_fpu(1); trace_kvm_fpu(1);
} }
...@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
vcpu->guest_fpu_loaded = 0; vcpu->guest_fpu_loaded = 0;
fpu_save_init(&vcpu->arch.guest_fpu); fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload; ++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0); trace_kvm_fpu(0);
......
...@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* we set it now, so we can trap and pass that trap to the Guest if it * we set it now, so we can trap and pass that trap to the Guest if it
* uses the FPU. * uses the FPU.
*/ */
if (cpu->ts) if (cpu->ts && user_has_fpu())
unlazy_fpu(current); stts();
/* /*
* SYSENTER is an optimized way of doing system calls. We can't allow * SYSENTER is an optimized way of doing system calls. We can't allow
...@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
if (boot_cpu_has(X86_FEATURE_SEP)) if (boot_cpu_has(X86_FEATURE_SEP))
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
/* Clear the host TS bit if it was set above. */
if (cpu->ts && user_has_fpu())
clts();
/* /*
* If the Guest page faulted, then the cr2 register will tell us the * If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we * bad virtual address. We have to grab this now, because once we
...@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) ...@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* a different CPU. So all the critical stuff should be done * a different CPU. So all the critical stuff should be done
* before this. * before this.
*/ */
else if (cpu->regs->trapnum == 7) else if (cpu->regs->trapnum == 7 && !user_has_fpu())
math_state_restore(); math_state_restore();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment