Commit 2c3956c9 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: strengthen I/O and memory barriers

From: Paul Mackerras <paulus@samba.org>

After I sent the recent patch to include/asm-ppc64/io.h which put stronger
barriers in the I/O accessor macros, Paul McKenney pointed out to me that a
writex/outx could still slide out from inside a spinlocked region.  This patch
makes the barriers a bit stronger so that this can't happen.  It means that we
need to use a sync instruction for wmb (a full "heavyweight" sync), since
drivers rely on wmb for ordering between writes to system memory and writes to
a device.

I have left smb_wmb() as a lighter-weight barrier that orders stores, and
doesn't impose an ordering between cacheable and non-cacheable accesses (the
amusingly-named eieio instruction).  I am assuming here that smp_wmb is only
used for ordering stores to system memory so that another cpu will see them in
order.  It can't be used for enforcing any ordering that a device will see,
because it is just a gcc barrier on UP.

This also changes the spinlock/rwlock unlock code to use lwsync ("light-weight
sync") rather than eieio, since eieio doesn't order loads, and we need to
ensure that loads stay inside the spinlocked region.
parent 24f59760
...@@ -316,6 +316,8 @@ _GLOBAL(_insb) ...@@ -316,6 +316,8 @@ _GLOBAL(_insb)
eieio eieio
stbu r5,1(r4) stbu r5,1(r4)
bdnz 00b bdnz 00b
twi 0,r5,0
isync
blr blr
_GLOBAL(_outsb) _GLOBAL(_outsb)
...@@ -325,8 +327,8 @@ _GLOBAL(_outsb) ...@@ -325,8 +327,8 @@ _GLOBAL(_outsb)
blelr- blelr-
00: lbzu r5,1(r4) 00: lbzu r5,1(r4)
stb r5,0(r3) stb r5,0(r3)
eieio
bdnz 00b bdnz 00b
sync
blr blr
_GLOBAL(_insw) _GLOBAL(_insw)
...@@ -338,6 +340,8 @@ _GLOBAL(_insw) ...@@ -338,6 +340,8 @@ _GLOBAL(_insw)
eieio eieio
sthu r5,2(r4) sthu r5,2(r4)
bdnz 00b bdnz 00b
twi 0,r5,0
isync
blr blr
_GLOBAL(_outsw) _GLOBAL(_outsw)
...@@ -346,9 +350,9 @@ _GLOBAL(_outsw) ...@@ -346,9 +350,9 @@ _GLOBAL(_outsw)
subi r4,r4,2 subi r4,r4,2
blelr- blelr-
00: lhzu r5,2(r4) 00: lhzu r5,2(r4)
eieio
sthbrx r5,0,r3 sthbrx r5,0,r3
bdnz 00b bdnz 00b
sync
blr blr
_GLOBAL(_insl) _GLOBAL(_insl)
...@@ -360,6 +364,8 @@ _GLOBAL(_insl) ...@@ -360,6 +364,8 @@ _GLOBAL(_insl)
eieio eieio
stwu r5,4(r4) stwu r5,4(r4)
bdnz 00b bdnz 00b
twi 0,r5,0
isync
blr blr
_GLOBAL(_outsl) _GLOBAL(_outsl)
...@@ -369,8 +375,8 @@ _GLOBAL(_outsl) ...@@ -369,8 +375,8 @@ _GLOBAL(_outsl)
blelr- blelr-
00: lwzu r5,4(r4) 00: lwzu r5,4(r4)
stwbrx r5,0,r3 stwbrx r5,0,r3
eieio
bdnz 00b bdnz 00b
sync
blr blr
/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */ /* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */
...@@ -383,6 +389,8 @@ _GLOBAL(_insw_ns) ...@@ -383,6 +389,8 @@ _GLOBAL(_insw_ns)
eieio eieio
sthu r5,2(r4) sthu r5,2(r4)
bdnz 00b bdnz 00b
twi 0,r5,0
isync
blr blr
/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */ /* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */
...@@ -393,8 +401,8 @@ _GLOBAL(_outsw_ns) ...@@ -393,8 +401,8 @@ _GLOBAL(_outsw_ns)
blelr- blelr-
00: lhzu r5,2(r4) 00: lhzu r5,2(r4)
sth r5,0(r3) sth r5,0(r3)
eieio
bdnz 00b bdnz 00b
sync
blr blr
_GLOBAL(_insl_ns) _GLOBAL(_insl_ns)
...@@ -406,6 +414,8 @@ _GLOBAL(_insl_ns) ...@@ -406,6 +414,8 @@ _GLOBAL(_insl_ns)
eieio eieio
stwu r5,4(r4) stwu r5,4(r4)
bdnz 00b bdnz 00b
twi 0,r5,0
isync
blr blr
_GLOBAL(_outsl_ns) _GLOBAL(_outsl_ns)
...@@ -415,8 +425,8 @@ _GLOBAL(_outsl_ns) ...@@ -415,8 +425,8 @@ _GLOBAL(_outsl_ns)
blelr- blelr-
00: lwzu r5,4(r4) 00: lwzu r5,4(r4)
stw r5,0(r3) stw r5,0(r3)
eieio
bdnz 00b bdnz 00b
sync
blr blr
_GLOBAL(abs) _GLOBAL(abs)
......
...@@ -240,14 +240,14 @@ static inline int in_8(volatile unsigned char *addr) ...@@ -240,14 +240,14 @@ static inline int in_8(volatile unsigned char *addr)
{ {
int ret; int ret;
__asm__ __volatile__("eieio; lbz%U1%X1 %0,%1; twi 0,%0,0; isync" __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr)); : "=r" (ret) : "m" (*addr));
return ret; return ret;
} }
static inline void out_8(volatile unsigned char *addr, int val) static inline void out_8(volatile unsigned char *addr, int val)
{ {
__asm__ __volatile__("sync; stb%U0%X0 %1,%0" __asm__ __volatile__("stb%U0%X0 %1,%0; sync"
: "=m" (*addr) : "r" (val)); : "=m" (*addr) : "r" (val));
} }
...@@ -255,7 +255,7 @@ static inline int in_le16(volatile unsigned short *addr) ...@@ -255,7 +255,7 @@ static inline int in_le16(volatile unsigned short *addr)
{ {
int ret; int ret;
__asm__ __volatile__("eieio; lhbrx %0,0,%1; twi 0,%0,0; isync" __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "r" (addr), "m" (*addr)); : "=r" (ret) : "r" (addr), "m" (*addr));
return ret; return ret;
} }
...@@ -264,20 +264,20 @@ static inline int in_be16(volatile unsigned short *addr) ...@@ -264,20 +264,20 @@ static inline int in_be16(volatile unsigned short *addr)
{ {
int ret; int ret;
__asm__ __volatile__("eieio; lhz%U1%X1 %0,%1; twi 0,%0,0; isync" __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr)); : "=r" (ret) : "m" (*addr));
return ret; return ret;
} }
static inline void out_le16(volatile unsigned short *addr, int val) static inline void out_le16(volatile unsigned short *addr, int val)
{ {
__asm__ __volatile__("sync; sthbrx %1,0,%2" __asm__ __volatile__("sthbrx %1,0,%2; sync"
: "=m" (*addr) : "r" (val), "r" (addr)); : "=m" (*addr) : "r" (val), "r" (addr));
} }
static inline void out_be16(volatile unsigned short *addr, int val) static inline void out_be16(volatile unsigned short *addr, int val)
{ {
__asm__ __volatile__("sync; sth%U0%X0 %1,%0" __asm__ __volatile__("sth%U0%X0 %1,%0; sync"
: "=m" (*addr) : "r" (val)); : "=m" (*addr) : "r" (val));
} }
...@@ -285,7 +285,7 @@ static inline unsigned in_le32(volatile unsigned *addr) ...@@ -285,7 +285,7 @@ static inline unsigned in_le32(volatile unsigned *addr)
{ {
unsigned ret; unsigned ret;
__asm__ __volatile__("eieio; lwbrx %0,0,%1; twi 0,%0,0; isync" __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "r" (addr), "m" (*addr)); : "=r" (ret) : "r" (addr), "m" (*addr));
return ret; return ret;
} }
...@@ -294,20 +294,20 @@ static inline unsigned in_be32(volatile unsigned *addr) ...@@ -294,20 +294,20 @@ static inline unsigned in_be32(volatile unsigned *addr)
{ {
unsigned ret; unsigned ret;
__asm__ __volatile__("eieio; lwz%U1%X1 %0,%1; twi 0,%0,0; isync" __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr)); : "=r" (ret) : "m" (*addr));
return ret; return ret;
} }
static inline void out_le32(volatile unsigned *addr, int val) static inline void out_le32(volatile unsigned *addr, int val)
{ {
__asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr)
: "r" (val), "r" (addr)); : "r" (val), "r" (addr));
} }
static inline void out_be32(volatile unsigned *addr, int val) static inline void out_be32(volatile unsigned *addr, int val)
{ {
__asm__ __volatile__("sync; stw%U0%X0 %1,%0; eieio" __asm__ __volatile__("stw%U0%X0 %1,%0; eieio"
: "=m" (*addr) : "r" (val)); : "=m" (*addr) : "r" (val));
} }
...@@ -316,7 +316,7 @@ static inline unsigned long in_le64(volatile unsigned long *addr) ...@@ -316,7 +316,7 @@ static inline unsigned long in_le64(volatile unsigned long *addr)
unsigned long tmp, ret; unsigned long tmp, ret;
__asm__ __volatile__( __asm__ __volatile__(
"eieio; ld %1,0(%2)\n" "ld %1,0(%2)\n"
"twi 0,%1,0\n" "twi 0,%1,0\n"
"isync\n" "isync\n"
"rldimi %0,%1,5*8,1*8\n" "rldimi %0,%1,5*8,1*8\n"
...@@ -334,7 +334,7 @@ static inline unsigned long in_be64(volatile unsigned long *addr) ...@@ -334,7 +334,7 @@ static inline unsigned long in_be64(volatile unsigned long *addr)
{ {
unsigned long ret; unsigned long ret;
__asm__ __volatile__("eieio; ld %0,0(%1); twi 0,%0,0; isync" __asm__ __volatile__("ld %0,0(%1); twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr)); : "=r" (ret) : "m" (*addr));
return ret; return ret;
} }
...@@ -351,13 +351,14 @@ static inline void out_le64(volatile unsigned long *addr, int val) ...@@ -351,13 +351,14 @@ static inline void out_le64(volatile unsigned long *addr, int val)
"rldicl %1,%1,32,0\n" "rldicl %1,%1,32,0\n"
"rlwimi %0,%1,8,8,31\n" "rlwimi %0,%1,8,8,31\n"
"rlwimi %0,%1,24,16,23\n" "rlwimi %0,%1,24,16,23\n"
"sync; std %0,0(%2)\n" "std %0,0(%2)\n"
"sync"
: "=r" (tmp) : "r" (val), "b" (addr) , "m" (*addr)); : "=r" (tmp) : "r" (val), "b" (addr) , "m" (*addr));
} }
static inline void out_be64(volatile unsigned long *addr, int val) static inline void out_be64(volatile unsigned long *addr, int val)
{ {
__asm__ __volatile__("sync; std %1,0(%0)" : "=m" (*addr) : "r" (val)); __asm__ __volatile__("std %1,0(%0); sync" : "=m" (*addr) : "r" (val));
} }
#ifndef CONFIG_PPC_ISERIES #ifndef CONFIG_PPC_ISERIES
......
...@@ -28,7 +28,7 @@ typedef struct { ...@@ -28,7 +28,7 @@ typedef struct {
static __inline__ void _raw_spin_unlock(spinlock_t *lock) static __inline__ void _raw_spin_unlock(spinlock_t *lock)
{ {
__asm__ __volatile__("eieio # spin_unlock": : :"memory"); __asm__ __volatile__("lwsync # spin_unlock": : :"memory");
lock->lock = 0; lock->lock = 0;
} }
...@@ -159,7 +159,7 @@ static __inline__ int is_write_locked(rwlock_t *rw) ...@@ -159,7 +159,7 @@ static __inline__ int is_write_locked(rwlock_t *rw)
static __inline__ void _raw_write_unlock(rwlock_t *rw) static __inline__ void _raw_write_unlock(rwlock_t *rw)
{ {
__asm__ __volatile__("eieio # write_unlock": : :"memory"); __asm__ __volatile__("lwsync # write_unlock": : :"memory");
rw->lock = 0; rw->lock = 0;
} }
...@@ -223,7 +223,7 @@ static __inline__ void _raw_read_unlock(rwlock_t *rw) ...@@ -223,7 +223,7 @@ static __inline__ void _raw_read_unlock(rwlock_t *rw)
unsigned int tmp; unsigned int tmp;
__asm__ __volatile__( __asm__ __volatile__(
"eieio # read_unlock\n\ "lwsync # read_unlock\n\
1: lwarx %0,0,%1\n\ 1: lwarx %0,0,%1\n\
addic %0,%0,-1\n\ addic %0,%0,-1\n\
stwcx. %0,0,%1\n\ stwcx. %0,0,%1\n\
......
...@@ -29,22 +29,26 @@ ...@@ -29,22 +29,26 @@
* read_barrier_depends() prevents data-dependent loads being reordered * read_barrier_depends() prevents data-dependent loads being reordered
* across this point (nop on PPC). * across this point (nop on PPC).
* *
* We can use the eieio instruction for wmb, but since it doesn't * We have to use the sync instructions for mb(), since lwsync doesn't
* give any ordering guarantees about loads, we have to use the * order loads with respect to previous stores. Lwsync is fine for
* stronger but slower sync instruction for mb and rmb. * rmb(), though.
* For wmb(), we use sync since wmb is used in drivers to order
* stores to system memory with respect to writes to the device.
* However, smp_wmb() can be a lighter-weight eieio barrier on
* SMP since it is only used to order updates to system memory.
*/ */
#define mb() __asm__ __volatile__ ("sync" : : : "memory") #define mb() __asm__ __volatile__ ("sync" : : : "memory")
#define rmb() __asm__ __volatile__ ("lwsync" : : : "memory") #define rmb() __asm__ __volatile__ ("lwsync" : : : "memory")
#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory")
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; mb(); } while (0) #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define set_wmb(var, value) do { var = value; wmb(); } while (0) #define set_wmb(var, value) do { var = value; smp_wmb(); } while (0)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define smp_mb() mb() #define smp_mb() mb()
#define smp_rmb() rmb() #define smp_rmb() rmb()
#define smp_wmb() wmb() #define smp_wmb() __asm__ __volatile__ ("eieio" : : : "memory")
#define smp_read_barrier_depends() read_barrier_depends() #define smp_read_barrier_depends() read_barrier_depends()
#else #else
#define smp_mb() __asm__ __volatile__("": : :"memory") #define smp_mb() __asm__ __volatile__("": : :"memory")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment