Commit 41dd42aa authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] sh: preempt safe lazy fpu handling

From: Paul Mundt <lethal@linux-sh.org>

This updates the lazy fpu handling to be preempt safe.  Patches from SUGIOKA
Toshinobu and Kaz Kojima.
parent dd9cd732
...@@ -180,7 +180,7 @@ asmlinkage void __init sh_cpu_init(void) ...@@ -180,7 +180,7 @@ asmlinkage void __init sh_cpu_init(void)
if (fpu_disabled) { if (fpu_disabled) {
printk("FPU Disabled\n"); printk("FPU Disabled\n");
cpu_data->flags &= ~CPU_HAS_FPU; cpu_data->flags &= ~CPU_HAS_FPU;
release_fpu(); disable_fpu();
} }
/* FPU initialization */ /* FPU initialization */
......
/* $Id: fpu.c,v 1.3 2003/09/23 23:15:44 lethal Exp $ /* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $
* *
* linux/arch/sh/kernel/fpu.c * linux/arch/sh/kernel/fpu.c
* *
...@@ -31,11 +31,15 @@ ...@@ -31,11 +31,15 @@
* Assume called with FPU enabled (SR.FD=0). * Assume called with FPU enabled (SR.FD=0).
*/ */
void void
save_fpu(struct task_struct *tsk) save_fpu(struct task_struct *tsk, struct pt_regs *regs)
{ {
unsigned long dummy;
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
enable_fpu();
asm volatile("sts.l fpul, @-%0\n\t" asm volatile("sts.l fpul, @-%0\n\t"
"sts.l fpscr, @-%0\n\t" "sts.l fpscr, @-%0\n\t"
"lds %1, fpscr\n\t" "lds %2, fpscr\n\t"
"frchg\n\t" "frchg\n\t"
"fmov.s fr15, @-%0\n\t" "fmov.s fr15, @-%0\n\t"
"fmov.s fr14, @-%0\n\t" "fmov.s fr14, @-%0\n\t"
...@@ -70,21 +74,24 @@ save_fpu(struct task_struct *tsk) ...@@ -70,21 +74,24 @@ save_fpu(struct task_struct *tsk)
"fmov.s fr2, @-%0\n\t" "fmov.s fr2, @-%0\n\t"
"fmov.s fr1, @-%0\n\t" "fmov.s fr1, @-%0\n\t"
"fmov.s fr0, @-%0\n\t" "fmov.s fr0, @-%0\n\t"
"lds %2, fpscr\n\t" "lds %3, fpscr\n\t"
: /* no output */ : "=r" (dummy)
: "r" ((char *)(&tsk->thread.fpu.hard.status)), : "0" ((char *)(&tsk->thread.fpu.hard.status)),
"r" (FPSCR_RCHG), "r" (FPSCR_RCHG),
"r" (FPSCR_INIT) "r" (FPSCR_INIT)
: "memory"); : "memory");
clear_tsk_thread_flag(tsk, TIF_USEDFPU); disable_fpu();
release_fpu(); release_fpu(regs);
} }
static void static void
restore_fpu(struct task_struct *tsk) restore_fpu(struct task_struct *tsk)
{ {
asm volatile("lds %1, fpscr\n\t" unsigned long dummy;
enable_fpu();
asm volatile("lds %2, fpscr\n\t"
"fmov.s @%0+, fr0\n\t" "fmov.s @%0+, fr0\n\t"
"fmov.s @%0+, fr1\n\t" "fmov.s @%0+, fr1\n\t"
"fmov.s @%0+, fr2\n\t" "fmov.s @%0+, fr2\n\t"
...@@ -121,9 +128,10 @@ restore_fpu(struct task_struct *tsk) ...@@ -121,9 +128,10 @@ restore_fpu(struct task_struct *tsk)
"frchg\n\t" "frchg\n\t"
"lds.l @%0+, fpscr\n\t" "lds.l @%0+, fpscr\n\t"
"lds.l @%0+, fpul\n\t" "lds.l @%0+, fpul\n\t"
: /* no output */ : "=r" (dummy)
: "r" (&tsk->thread.fpu), "r" (FPSCR_RCHG) : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
: "memory"); : "memory");
disable_fpu();
} }
/* /*
...@@ -135,6 +143,7 @@ restore_fpu(struct task_struct *tsk) ...@@ -135,6 +143,7 @@ restore_fpu(struct task_struct *tsk)
static void static void
fpu_init(void) fpu_init(void)
{ {
enable_fpu();
asm volatile("lds %0, fpul\n\t" asm volatile("lds %0, fpul\n\t"
"lds %1, fpscr\n\t" "lds %1, fpscr\n\t"
"fsts fpul, fr0\n\t" "fsts fpul, fr0\n\t"
...@@ -174,6 +183,7 @@ fpu_init(void) ...@@ -174,6 +183,7 @@ fpu_init(void)
"lds %2, fpscr\n\t" "lds %2, fpscr\n\t"
: /* no output */ : /* no output */
: "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
disable_fpu();
} }
/** /**
...@@ -262,14 +272,14 @@ ieee_fpe_handler (struct pt_regs *regs) ...@@ -262,14 +272,14 @@ ieee_fpe_handler (struct pt_regs *regs)
if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
struct task_struct *tsk = current; struct task_struct *tsk = current;
save_fpu(tsk); save_fpu(tsk, regs);
if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) {
/* FPU error */ /* FPU error */
denormal_to_double (&tsk->thread.fpu.hard, denormal_to_double (&tsk->thread.fpu.hard,
(finsn >> 8) & 0xf); (finsn >> 8) & 0xf);
tsk->thread.fpu.hard.fpscr &= tsk->thread.fpu.hard.fpscr &=
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
grab_fpu(); grab_fpu(regs);
restore_fpu(tsk); restore_fpu(tsk);
set_tsk_thread_flag(tsk, TIF_USEDFPU); set_tsk_thread_flag(tsk, TIF_USEDFPU);
} else { } else {
...@@ -295,7 +305,7 @@ do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long ...@@ -295,7 +305,7 @@ do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long
return; return;
regs.pc += 2; regs.pc += 2;
save_fpu(tsk); save_fpu(tsk, &regs);
tsk->thread.trap_no = 11; tsk->thread.trap_no = 11;
tsk->thread.error_code = 0; tsk->thread.error_code = 0;
force_sig(SIGFPE, tsk); force_sig(SIGFPE, tsk);
...@@ -307,7 +317,7 @@ do_fpu_state_restore(unsigned long r4, unsigned long r5, unsigned long r6, ...@@ -307,7 +317,7 @@ do_fpu_state_restore(unsigned long r4, unsigned long r5, unsigned long r6,
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
grab_fpu(); grab_fpu(&regs);
if (!user_mode(&regs)) { if (!user_mode(&regs)) {
printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
return; return;
......
/* $Id: process.c,v 1.24 2003/11/28 23:05:43 kkojima Exp $ /* $Id: process.c,v 1.25 2004/01/13 05:52:11 kkojima Exp $
* *
* linux/arch/sh/kernel/process.c * linux/arch/sh/kernel/process.c
* *
...@@ -174,9 +174,13 @@ void flush_thread(void) ...@@ -174,9 +174,13 @@ void flush_thread(void)
{ {
#if defined(CONFIG_CPU_SH4) #if defined(CONFIG_CPU_SH4)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct pt_regs *regs = (struct pt_regs *)
((unsigned long)tsk->thread_info
+ THREAD_SIZE - sizeof(struct pt_regs)
- sizeof(unsigned long));
/* Forget lazy FPU state */ /* Forget lazy FPU state */
clear_fpu(tsk); clear_fpu(tsk, regs);
tsk->used_math = 0; tsk->used_math = 0;
#endif #endif
} }
...@@ -196,7 +200,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) ...@@ -196,7 +200,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
fpvalid = tsk->used_math; fpvalid = tsk->used_math;
if (fpvalid) { if (fpvalid) {
unlazy_fpu(tsk); unlazy_fpu(tsk, regs);
memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu)); memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
} }
#endif #endif
...@@ -212,7 +216,8 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) ...@@ -212,7 +216,8 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
struct pt_regs ptregs; struct pt_regs ptregs;
ptregs = *(struct pt_regs *) ptregs = *(struct pt_regs *)
((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs) ((unsigned long)tsk->thread_info + THREAD_SIZE
- sizeof(struct pt_regs)
#ifdef CONFIG_SH_DSP #ifdef CONFIG_SH_DSP
- sizeof(struct pt_dspregs) - sizeof(struct pt_dspregs)
#endif #endif
...@@ -230,7 +235,11 @@ dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *fpu) ...@@ -230,7 +235,11 @@ dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *fpu)
#if defined(CONFIG_CPU_SH4) #if defined(CONFIG_CPU_SH4)
fpvalid = tsk->used_math; fpvalid = tsk->used_math;
if (fpvalid) { if (fpvalid) {
unlazy_fpu(tsk); struct pt_regs *regs = (struct pt_regs *)
((unsigned long)tsk->thread_info
+ THREAD_SIZE - sizeof(struct pt_regs)
- sizeof(unsigned long));
unlazy_fpu(tsk, regs);
memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu)); memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
} }
#endif #endif
...@@ -257,13 +266,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, ...@@ -257,13 +266,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
if (user_mode(regs)) { if (user_mode(regs)) {
childregs->regs[15] = usp; childregs->regs[15] = usp;
} else { } else {
childregs->regs[15] = (unsigned long)p->thread_info+THREAD_SIZE; childregs->regs[15] = (unsigned long)p->thread_info + THREAD_SIZE;
} }
if (clone_flags & CLONE_SETTLS) { if (clone_flags & CLONE_SETTLS) {
childregs->gbr = childregs->regs[0]; childregs->gbr = childregs->regs[0];
} }
childregs->regs[0] = 0; /* Set return value for child */ childregs->regs[0] = 0; /* Set return value for child */
childregs->sr |= SR_FD; /* Invalidate FPU flag */
p->set_child_tid = p->clear_child_tid = NULL; p->set_child_tid = p->clear_child_tid = NULL;
p->thread.sp = (unsigned long) childregs; p->thread.sp = (unsigned long) childregs;
...@@ -275,7 +283,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, ...@@ -275,7 +283,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
unlazy_fpu(tsk); unlazy_fpu(tsk, regs);
p->thread.fpu = tsk->thread.fpu; p->thread.fpu = tsk->thread.fpu;
p->used_math = tsk->used_math; p->used_math = tsk->used_math;
clear_ti_thread_flag(p->thread_info, TIF_USEDFPU); clear_ti_thread_flag(p->thread_info, TIF_USEDFPU);
...@@ -332,8 +340,39 @@ ubc_set_tracing(int asid, unsigned long pc) ...@@ -332,8 +340,39 @@ ubc_set_tracing(int asid, unsigned long pc)
struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next)
{ {
#if defined(CONFIG_CPU_SH4) #if defined(CONFIG_CPU_SH4)
unlazy_fpu(prev); struct pt_regs *regs = (struct pt_regs *)
((unsigned long)prev->thread_info
+ THREAD_SIZE - sizeof(struct pt_regs)
- sizeof(unsigned long));
unlazy_fpu(prev, regs);
#endif #endif
#ifdef CONFIG_PREEMPT
{
unsigned long flags;
struct pt_regs *regs;
local_irq_save(flags);
regs = (struct pt_regs *)
((unsigned long)prev->thread_info
+ THREAD_SIZE - sizeof(struct pt_regs)
#ifdef CONFIG_SH_DSP
- sizeof(struct pt_dspregs)
#endif
- sizeof(unsigned long));
if (user_mode(regs) && regs->regs[15] >= 0xc0000000) {
int offset = (int)regs->regs[15];
/* Reset stack pointer: clear critical region mark */
regs->regs[15] = regs->regs[1];
if (regs->pc < regs->regs[0])
/* Go to rewind point */
regs->pc = regs->regs[0] + offset;
}
local_irq_restore(flags);
}
#endif
/* /*
* Restore the kernel mode register * Restore the kernel mode register
* k7 (r7_bank1) * k7 (r7_bank1)
......
/* $Id: signal.c,v 1.19 2003/10/13 07:21:19 lethal Exp $ /* $Id: signal.c,v 1.20 2004/01/13 05:52:11 kkojima Exp $
* *
* linux/arch/sh/kernel/signal.c * linux/arch/sh/kernel/signal.c
* *
...@@ -168,7 +168,8 @@ static inline int restore_sigcontext_fpu(struct sigcontext __user *sc) ...@@ -168,7 +168,8 @@ static inline int restore_sigcontext_fpu(struct sigcontext __user *sc)
sizeof(long)*(16*2+2)); sizeof(long)*(16*2+2));
} }
static inline int save_sigcontext_fpu(struct sigcontext __user *sc) static inline int save_sigcontext_fpu(struct sigcontext __user *sc,
struct pt_regs *regs)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
...@@ -187,7 +188,7 @@ static inline int save_sigcontext_fpu(struct sigcontext __user *sc) ...@@ -187,7 +188,7 @@ static inline int save_sigcontext_fpu(struct sigcontext __user *sc)
*/ */
tsk->used_math = 0; tsk->used_math = 0;
unlazy_fpu(tsk); unlazy_fpu(tsk, regs);
return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard, return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard,
sizeof(long)*(16*2+2)); sizeof(long)*(16*2+2));
} }
...@@ -218,7 +219,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p ...@@ -218,7 +219,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p
struct task_struct *tsk = current; struct task_struct *tsk = current;
regs->sr |= SR_FD; /* Release FPU */ regs->sr |= SR_FD; /* Release FPU */
clear_fpu(tsk); clear_fpu(tsk, regs);
tsk->used_math = 0; tsk->used_math = 0;
__get_user (owned_fp, &sc->sc_ownedfp); __get_user (owned_fp, &sc->sc_ownedfp);
if (owned_fp) if (owned_fp)
...@@ -326,7 +327,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, ...@@ -326,7 +327,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
#undef COPY #undef COPY
#ifdef CONFIG_CPU_SH4 #ifdef CONFIG_CPU_SH4
err |= save_sigcontext_fpu(sc); err |= save_sigcontext_fpu(sc, regs);
#endif #endif
/* non-iBCS2 extensions.. */ /* non-iBCS2 extensions.. */
...@@ -521,9 +522,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, ...@@ -521,9 +522,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
case -ERESTARTNOINTR: case -ERESTARTNOINTR:
regs->pc -= 2; regs->pc -= 2;
} }
#ifndef CONFIG_PREEMPT
} else { } else {
/* gUSA handling */ /* gUSA handling */
#ifdef CONFIG_PREEMPT
unsigned long flags;
local_irq_save(flags);
#endif
if (regs->regs[15] >= 0xc0000000) { if (regs->regs[15] >= 0xc0000000) {
int offset = (int)regs->regs[15]; int offset = (int)regs->regs[15];
...@@ -533,6 +538,8 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, ...@@ -533,6 +538,8 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
/* Go to rewind point #1 */ /* Go to rewind point #1 */
regs->pc = regs->regs[0] + offset - 2; regs->pc = regs->regs[0] + offset - 2;
} }
#ifdef CONFIG_PREEMPT
local_irq_restore(flags);
#endif #endif
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <asm/types.h> #include <asm/types.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <asm/ptrace.h>
/* /*
* Default implementation of macro that returns current * Default implementation of macro that returns current
...@@ -167,7 +168,7 @@ extern int ubc_usercnt; ...@@ -167,7 +168,7 @@ extern int ubc_usercnt;
#define start_thread(regs, new_pc, new_sp) \ #define start_thread(regs, new_pc, new_sp) \
set_fs(USER_DS); \ set_fs(USER_DS); \
regs->pr = 0; \ regs->pr = 0; \
regs->sr = 0; /* User mode. */ \ regs->sr = SR_FD; /* User mode. */ \
regs->pc = new_pc; \ regs->pc = new_pc; \
regs->regs[15] = new_sp regs->regs[15] = new_sp
...@@ -200,7 +201,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); ...@@ -200,7 +201,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
* FPU lazy state save handling. * FPU lazy state save handling.
*/ */
static __inline__ void release_fpu(void) static __inline__ void disable_fpu(void)
{ {
unsigned long __dummy; unsigned long __dummy;
...@@ -212,7 +213,7 @@ static __inline__ void release_fpu(void) ...@@ -212,7 +213,7 @@ static __inline__ void release_fpu(void)
: "r" (SR_FD)); : "r" (SR_FD));
} }
static __inline__ void grab_fpu(void) static __inline__ void enable_fpu(void)
{ {
unsigned long __dummy; unsigned long __dummy;
...@@ -224,22 +225,32 @@ static __inline__ void grab_fpu(void) ...@@ -224,22 +225,32 @@ static __inline__ void grab_fpu(void)
: "r" (~SR_FD)); : "r" (~SR_FD));
} }
static __inline__ void release_fpu(struct pt_regs *regs)
{
regs->sr |= SR_FD;
}
static __inline__ void grab_fpu(struct pt_regs *regs)
{
regs->sr &= ~SR_FD;
}
#ifdef CONFIG_CPU_SH4 #ifdef CONFIG_CPU_SH4
extern void save_fpu(struct task_struct *__tsk); extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
#else #else
#define save_fpu(tsk) do { } while (0) #define save_fpu(tsk) do { } while (0)
#endif #endif
#define unlazy_fpu(tsk) do { \ #define unlazy_fpu(tsk, regs) do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \ if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
save_fpu(tsk); \ save_fpu(tsk, regs); \
} \ } \
} while (0) } while (0)
#define clear_fpu(tsk) do { \ #define clear_fpu(tsk, regs) do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \ if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
clear_tsk_thread_flag(tsk, TIF_USEDFPU); \ clear_tsk_thread_flag(tsk, TIF_USEDFPU); \
release_fpu(); \ release_fpu(regs); \
} \ } \
} while (0) } while (0)
......
#ifndef __ASM_SH_PTRACE_H #ifndef __ASM_SH_PTRACE_H
#define __ASM_SH_PTRACE_H #define __ASM_SH_PTRACE_H
#include <asm/processor.h>
#include <asm/ubc.h> #include <asm/ubc.h>
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment