Commit 24b8a236 authored by Linus Torvalds's avatar Linus Torvalds Committed by Ingo Molnar

x86/fpu: Clean up FPU switching in the middle of task switching

It happens to work, but it's very very wrong, because our 'current'
macro is magic that is supposedly loading a stable value.

It just happens to be not quite stable enough and the compilers
re-load the value enough for this code to work.  But it's wrong.

The whole

        struct fpu *prev_fpu = &prev->fpu;

thing in __switch_to() is pretty ugly. There's no reason why we
should look at that 'prev_fpu' pointer there, or pass it down.

And it only generates worse code, in how it loads 'current' when
__switch_to() has the right task pointers.

The attached patch not only cleans this up, it actually
generates better code too:

 (a) it removes one push/pop pair at entry/exit because there's one
     less register used (no 'current')

 (b) it removes that pointless load of 'current' because it just uses
     the right argument:

	-       movq    %gs:pcpu_hot(%rip), %r12
	-       testq   $16384, (%r12)
	+       testq   $16384, (%rdi)
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarUros Bizjak <ubizjak@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20231018184227.446318-1-ubizjak@gmail.com
parent e39828d2
...@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void); ...@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void);
* The FPU context is only stored/restored for a user task and * The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads. * PF_KTHREAD is used to distinguish between kernel and user threads.
*/ */
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
{ {
if (cpu_feature_enabled(X86_FEATURE_FPU) && if (cpu_feature_enabled(X86_FEATURE_FPU) &&
!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
struct fpu *old_fpu = &old->thread.fpu;
save_fpregs_to_fpstate(old_fpu); save_fpregs_to_fpstate(old_fpu);
/* /*
* The save operation preserved register state, so the * The save operation preserved register state, so the
...@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) ...@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
* Delay loading of the complete FPU state until the return to userland. * Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately. * PKRU is handled separately.
*/ */
static inline void switch_fpu_finish(void) static inline void switch_fpu_finish(struct task_struct *new)
{ {
if (cpu_feature_enabled(X86_FEATURE_FPU)) if (cpu_feature_enabled(X86_FEATURE_FPU))
set_thread_flag(TIF_NEED_FPU_LOAD); set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
} }
#endif /* _ASM_X86_FPU_SCHED_H */ #endif /* _ASM_X86_FPU_SCHED_H */
...@@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{ {
struct thread_struct *prev = &prev_p->thread, struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread; *next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
switch_fpu_prepare(prev_fpu, cpu); switch_fpu_prepare(prev_p, cpu);
/* /*
* Save away %gs. No need to save %fs, as it was saved on the * Save away %gs. No need to save %fs, as it was saved on the
...@@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.current_task, next_p);
switch_fpu_finish(); switch_fpu_finish(next_p);
/* Load the Intel cache allocation PQR MSR. */ /* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in(next_p); resctrl_sched_in(next_p);
......
...@@ -562,14 +562,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -562,14 +562,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{ {
struct thread_struct *prev = &prev_p->thread; struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread; struct thread_struct *next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(pcpu_hot.hardirq_stack_inuse)); this_cpu_read(pcpu_hot.hardirq_stack_inuse));
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
switch_fpu_prepare(prev_fpu, cpu); switch_fpu_prepare(prev_p, cpu);
/* We must save %fs and %gs before load_TLS() because /* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS(). * %fs and %gs may be cleared by load_TLS().
...@@ -623,7 +622,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -623,7 +622,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.current_task, next_p);
raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish(); switch_fpu_finish(next_p);
/* Reload sp0. */ /* Reload sp0. */
update_task_stack(next_p); update_task_stack(next_p);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment