Commit 9bb17be0 authored by Will Deacon's avatar Will Deacon

ARM: locks: prefetch the destination word for write prior to strex

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch prefixes our {spin,read,write}_[try]lock implementations with
pldw instructions (on CPUs which support them) to try and grab the line
in exclusive state from the start. arch_rwlock_t is changed to avoid
using a volatile member, since this generates compiler warnings when
falling back on the __builtin_prefetch intrinsic which expects a const
void * argument.
Acked-by: default avatarNicolas Pitre <nico@linaro.org>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent d8f57aa4
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#error SMP not supported on pre-ARMv6 CPUs #error SMP not supported on pre-ARMv6 CPUs
#endif #endif
#include <asm/processor.h> #include <linux/prefetch.h>
/* /*
* sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
...@@ -70,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -70,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
u32 newval; u32 newval;
arch_spinlock_t lockval; arch_spinlock_t lockval;
prefetchw(&lock->slock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" add %1, %0, %4\n" " add %1, %0, %4\n"
...@@ -93,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -93,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
unsigned long contended, res; unsigned long contended, res;
u32 slock; u32 slock;
prefetchw(&lock->slock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%3]\n" " ldrex %0, [%3]\n"
...@@ -145,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) ...@@ -145,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
{ {
unsigned long tmp; unsigned long tmp;
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%1]\n" "1: ldrex %0, [%1]\n"
" teq %0, #0\n" " teq %0, #0\n"
...@@ -163,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) ...@@ -163,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
{ {
unsigned long contended, res; unsigned long contended, res;
prefetchw(&rw->lock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%2]\n" " ldrex %0, [%2]\n"
...@@ -196,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) ...@@ -196,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
} }
/* write_can_lock - would write_trylock() succeed? */ /* write_can_lock - would write_trylock() succeed? */
#define arch_write_can_lock(x) ((x)->lock == 0) #define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0)
/* /*
* Read locks are a bit more hairy: * Read locks are a bit more hairy:
...@@ -214,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) ...@@ -214,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
{ {
unsigned long tmp, tmp2; unsigned long tmp, tmp2;
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" adds %0, %0, #1\n" " adds %0, %0, #1\n"
...@@ -234,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) ...@@ -234,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
smp_mb(); smp_mb();
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" sub %0, %0, #1\n" " sub %0, %0, #1\n"
...@@ -252,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) ...@@ -252,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
{ {
unsigned long contended, res; unsigned long contended, res;
prefetchw(&rw->lock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%2]\n" " ldrex %0, [%2]\n"
...@@ -273,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) ...@@ -273,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
} }
/* read_can_lock - would read_trylock() succeed? */ /* read_can_lock - would read_trylock() succeed? */
#define arch_read_can_lock(x) ((x)->lock < 0x80000000) #define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
......
...@@ -25,7 +25,7 @@ typedef struct { ...@@ -25,7 +25,7 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
typedef struct { typedef struct {
volatile unsigned int lock; u32 lock;
} arch_rwlock_t; } arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 } #define __ARCH_RW_LOCK_UNLOCKED { 0 }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment