Commit 08ded2cd authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Borislav Petkov

x86/fpu: Get rid of the FNSAVE optimization

The FNSAVE support requires conditionals in quite some call paths because
FNSAVE reinitializes the FPU hardware. If the save has to preserve the FPU
register state then the caller has to conditionally restore it from memory
when FNSAVE is in use.

This also requires a conditional in context switch because the restore
avoidance optimization cannot work with FNSAVE. As this only affects 20+
years old CPUs there is really no reason to keep this optimization
effective for FNSAVE. It's about time to not optimize for antiques anymore.

Just unconditionally FRSTOR the save content to the registers and clean up
the conditionals all over the place.
Suggested-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210623121454.617369268@linutronix.de
parent ebe7234b
...@@ -83,6 +83,7 @@ extern void fpstate_init_soft(struct swregs_state *soft); ...@@ -83,6 +83,7 @@ extern void fpstate_init_soft(struct swregs_state *soft);
#else #else
static inline void fpstate_init_soft(struct swregs_state *soft) {} static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif #endif
extern void save_fpregs_to_fpstate(struct fpu *fpu);
#define user_insn(insn, output, input...) \ #define user_insn(insn, output, input...) \
({ \ ({ \
...@@ -375,8 +376,6 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) ...@@ -375,8 +376,6 @@ static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask)
return err; return err;
} }
extern int save_fpregs_to_fpstate(struct fpu *fpu);
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{ {
if (use_xsave()) { if (use_xsave()) {
...@@ -507,12 +506,17 @@ static inline void __fpregs_load_activate(void) ...@@ -507,12 +506,17 @@ static inline void __fpregs_load_activate(void)
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{ {
if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
if (!save_fpregs_to_fpstate(old_fpu)) save_fpregs_to_fpstate(old_fpu);
old_fpu->last_cpu = -1; /*
else * The save operation preserved register state, so the
old_fpu->last_cpu = cpu; * fpu_fpregs_owner_ctx is still @old_fpu. Store the
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
* context.
*/
old_fpu->last_cpu = cpu;
/* But leave fpu_fpregs_owner_ctx! */
trace_x86_fpu_regs_deactivated(old_fpu); trace_x86_fpu_regs_deactivated(old_fpu);
} }
} }
......
...@@ -83,16 +83,20 @@ bool irq_fpu_usable(void) ...@@ -83,16 +83,20 @@ bool irq_fpu_usable(void)
EXPORT_SYMBOL(irq_fpu_usable); EXPORT_SYMBOL(irq_fpu_usable);
/* /*
* These must be called with preempt disabled. Returns * Save the FPU register state in fpu->state. The register state is
* 'true' if the FPU state is still intact and we can * preserved.
* keep registers active.
* *
* The legacy FNSAVE instruction cleared all FPU state * Must be called with fpregs_lock() held.
* unconditionally, so registers are essentially destroyed. *
* Modern FPU state can be kept in registers, if there are * The legacy FNSAVE instruction clears all FPU state unconditionally, so
* no pending FP exceptions. * register state has to be reloaded. That might be a pointless exercise
* when the FPU is going to be used by another task right after that. But
* this only affects 20+ years old 32bit systems and avoids conditionals all
* over the place.
*
* FXSAVE and all XSAVE variants preserve the FPU register state.
*/ */
int save_fpregs_to_fpstate(struct fpu *fpu) void save_fpregs_to_fpstate(struct fpu *fpu)
{ {
if (likely(use_xsave())) { if (likely(use_xsave())) {
os_xsave(&fpu->state.xsave); os_xsave(&fpu->state.xsave);
...@@ -103,21 +107,20 @@ int save_fpregs_to_fpstate(struct fpu *fpu) ...@@ -103,21 +107,20 @@ int save_fpregs_to_fpstate(struct fpu *fpu)
*/ */
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies; fpu->avx512_timestamp = jiffies;
return 1; return;
} }
if (likely(use_fxsr())) { if (likely(use_fxsr())) {
fxsave(&fpu->state.fxsave); fxsave(&fpu->state.fxsave);
return 1; return;
} }
/* /*
* Legacy FPU register saving, FNSAVE always clears FPU registers, * Legacy FPU register saving, FNSAVE always clears FPU registers,
* so we have to mark them inactive: * so we have to reload them from the memory state.
*/ */
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
frstor(&fpu->state.fsave);
return 0;
} }
EXPORT_SYMBOL(save_fpregs_to_fpstate); EXPORT_SYMBOL(save_fpregs_to_fpstate);
...@@ -133,10 +136,6 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) ...@@ -133,10 +136,6 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
if (!(current->flags & PF_KTHREAD) && if (!(current->flags & PF_KTHREAD) &&
!test_thread_flag(TIF_NEED_FPU_LOAD)) { !test_thread_flag(TIF_NEED_FPU_LOAD)) {
set_thread_flag(TIF_NEED_FPU_LOAD); set_thread_flag(TIF_NEED_FPU_LOAD);
/*
* Ignore return value -- we don't care if reg state
* is clobbered.
*/
save_fpregs_to_fpstate(&current->thread.fpu); save_fpregs_to_fpstate(&current->thread.fpu);
} }
__cpu_invalidate_fpregs_state(); __cpu_invalidate_fpregs_state();
...@@ -171,11 +170,8 @@ void fpu__save(struct fpu *fpu) ...@@ -171,11 +170,8 @@ void fpu__save(struct fpu *fpu)
fpregs_lock(); fpregs_lock();
trace_x86_fpu_before_save(fpu); trace_x86_fpu_before_save(fpu);
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { if (!test_thread_flag(TIF_NEED_FPU_LOAD))
if (!save_fpregs_to_fpstate(fpu)) { save_fpregs_to_fpstate(fpu);
copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu); trace_x86_fpu_after_save(fpu);
fpregs_unlock(); fpregs_unlock();
...@@ -244,20 +240,16 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) ...@@ -244,20 +240,16 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/* /*
* If the FPU registers are not current just memcpy() the state. * If the FPU registers are not owned by current just memcpy() the
* Otherwise save current FPU registers directly into the child's FPU * state. Otherwise save the FPU registers directly into the
* context, without any memory-to-memory copying. * child's FPU context, without any memory-to-memory copying.
*
* ( The function 'fails' in the FNSAVE case, which destroys
* register contents so we have to load them back. )
*/ */
fpregs_lock(); fpregs_lock();
if (test_thread_flag(TIF_NEED_FPU_LOAD)) if (test_thread_flag(TIF_NEED_FPU_LOAD))
memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
else if (!save_fpregs_to_fpstate(dst_fpu)) else
copy_kernel_to_fpregs(&dst_fpu->state); save_fpregs_to_fpstate(dst_fpu);
fpregs_unlock(); fpregs_unlock();
set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment