Commit 916d2b26 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] Fast path context switch - microoptimize FPU reload

Following some changes on x86-64.

When cpu_has_fxsr is defined to 1 like in many kernels unlazy_fpu can
collapse to three instructions. For that inlining is a very good idea.
Otherwise it's 10 instructions or so, which can be still inlined.

We don't need the lock prefix to test our local thread flags state.
Unfortunately test_thread_flag currently always uses test_bit which
has a LOCK on SMP, but that's unnecessary. LOCK is costly on P4,
so it's a good idea to avoid it.

Work around this for now by testing directly. Better would be
probably to define __set_bit for all architectures to not guarantee
atomicity and then always use that for local thread_info accesses
in linux/thread_info.h
parent 106d4087
......@@ -52,24 +52,6 @@ void init_fpu(struct task_struct *tsk)
* FPU lazy state save handling.
*/
static inline void __save_init_fpu( struct task_struct *tsk )
{
if ( cpu_has_fxsr ) {
asm volatile( "fxsave %0 ; fnclex"
: "=m" (tsk->thread.i387.fxsave) );
} else {
asm volatile( "fnsave %0 ; fwait"
: "=m" (tsk->thread.i387.fsave) );
}
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
}
void save_init_fpu( struct task_struct *tsk )
{
__save_init_fpu(tsk);
stts();
}
void kernel_fpu_begin(void)
{
preempt_disable();
......
......@@ -21,23 +21,41 @@ extern void init_fpu(struct task_struct *);
/*
* FPU lazy state save handling...
*/
extern void save_init_fpu( struct task_struct *tsk );
extern void restore_fpu( struct task_struct *tsk );
extern void kernel_fpu_begin(void);
#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
static inline void __save_init_fpu( struct task_struct *tsk )
{
if ( cpu_has_fxsr ) {
asm volatile( "fxsave %0 ; fnclex"
: "=m" (tsk->thread.i387.fxsave) );
} else {
asm volatile( "fnsave %0 ; fwait"
: "=m" (tsk->thread.i387.fsave) );
}
tsk->thread_info->flags &= ~TIF_USEDFPU;
}
static inline void save_init_fpu( struct task_struct *tsk )
{
__save_init_fpu(tsk);
stts();
}
#define unlazy_fpu( tsk ) do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) \
if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
save_init_fpu( tsk ); \
} while (0)
#define clear_fpu( tsk ) \
do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \
asm volatile("fwait"); \
clear_tsk_thread_flag(tsk, TIF_USEDFPU); \
(tsk)->thread_info->flags &= ~_TIF_USEDFPU; \
stts(); \
} \
} while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment