Commit 56d1317d authored by Kenneth W. Chen's avatar Kenneth W. Chen Committed by Linus Torvalds

[PATCH] ia64: fix race in fsys_bubble_down to avoid fp-register corruption

The fast system-call fall-back-path has a race: it reads PSR, modifies
some bits, then writes back the new PSR.  Unfortunately, the contents
of PSR may change between reading and writing it.  For example, an
interrupt could occur which could trigger a context-switch.  The
context-switch might in turn flush the floating-point-high (FPH)
partition to memory, clear PSR.MFH, and set PSR.DFH.  To prevent this
race, the patch below turns off PSR.I before reading PSR.

This fixes a floating-point corruption problem that was observed on a
system with a libc which has the fast system-call support enabled.

The performance impact is minimal (on the order of a handful of cycles).
Signed-off-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarKen Chen <kenneth.w.chen@intel.com>
Signed-off-by: default avatarAsit Mallick <asit.k.mallick@intel.com>
Signed-off-by: default avatarDavid Mosberger <davidm@hpl.hp.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d1da42b4
......@@ -502,6 +502,7 @@ ENTRY(fsys_fallback_syscall)
adds r17=-1024,r15
movl r14=sys_call_table
;;
rsm psr.i
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
......@@ -542,7 +543,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
* to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN)
| IA64_PSR_BN | IA64_PSR_I)
invala
movl r8=PSR_ONE_BITS
......
......@@ -91,16 +91,18 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;;
(p6) ld8 r18=[r18]
mov r29=psr // read psr (12 cyc load latency)
mov r21=ar.fpsr
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;;
(p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0
(p8) br.dptk.many b7
(p6) rsm psr.i
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
;;
mov r29=psr // read psr (12 cyc load latency)
/*
* brl.cond doesn't work as intended because the linker would convert this branch
* into a branch to a PLT. Perhaps there will be a way to avoid this with some
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment