Commit e78fdfef authored by Vineet Gupta's avatar Vineet Gupta

ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff

This is to workaround the llock/scond livelock

HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
coherency transactions in the SCU. The exclusive line state keeps rotating
among contenting cores leading to a never ending cycle. So break the cycle
by deferring the retry of failed exclusive access (SCOND). The actual delay
needed is function of number of contending cores as well as the unrelated
coherency traffic from other cores. To keep the code simple, start off with
small delay of 1 which would suffice most cases and in case of contention
double the delay. Eventually the delay is sufficient such that the coherency
pipeline is drained, thus a subsequent exclusive access would succeed.

Link: http://lkml.kernel.org/r/1438612568-28265-1-git-send-email-vgupta@synopsys.comAcked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
parent 69cbe630
......@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
default y
depends on !ARC_CANT_LLSC
config ARC_STAR_9000923308
bool "Workaround for llock/scond livelock"
default y
depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
......
......@@ -23,17 +23,51 @@
#define atomic_set(v, i) (((v)->counter) = (i))
#ifdef CONFIG_ARC_STAR_9000923308
#define SCOND_FAIL_RETRY_VAR_DEF \
unsigned int delay = 1, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" bz 4f \n" \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
" mov.z %[delay], 1 \n" /* handle overflow */ \
" b 1b \n" /* start over */ \
"4: ; --- success --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
#else /* !CONFIG_ARC_STAR_9000923308 */
#define SCOND_FAIL_RETRY_VAR_DEF
#define SCOND_FAIL_RETRY_ASM \
" bnz 1b \n" \
#define SCOND_FAIL_RETRY_VARS
#endif
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
" \n" \
SCOND_FAIL_RETRY_ASM \
\
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
......@@ -43,6 +77,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\
/* \
* Explicit full memory barrier needed before/after as \
......@@ -54,8 +89,11 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
" \n" \
SCOND_FAIL_RETRY_ASM \
\
: [val] "=&r" (val) \
SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
......@@ -142,6 +180,9 @@ ATOMIC_OP(and, &=, and)
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
/**
* __atomic_add_unless - add unless the number is a given value
......
......@@ -20,6 +20,11 @@
#ifdef CONFIG_ARC_HAS_LLSC
/*
* A normal LLOCK/SCOND based system, w/o need for livelock workaround
*/
#ifndef CONFIG_ARC_STAR_9000923308
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
......@@ -233,6 +238,294 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
smp_mb();
}
#else /* CONFIG_ARC_STAR_9000923308 */
/*
* HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
* coherency transactions in the SCU. The exclusive line state keeps rotating
* among contenting cores leading to a never ending cycle. So break the cycle
* by deferring the retry of failed exclusive access (SCOND). The actual delay
* needed is function of number of contending cores as well as the unrelated
* coherency traffic from other cores. To keep the code simple, start off with
* small delay of 1 which would suffice most cases and in case of contention
* double the delay. Eventually the delay is sufficient such that the coherency
* pipeline is drained, thus a subsequent exclusive access would succeed.
*/
#define SCOND_FAIL_RETRY_VAR_DEF \
unsigned int delay, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
" mov.z %[delay], 1 \n" /* handle overflow */ \
" b 1b \n" /* start over */ \
" \n" \
"4: ; --- done --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
smp_mb();
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* zero means writer holds the lock exclusively, deny Reader.
* Otherwise grant lock to first/subseq reader
*
* if (rw->counter > 0) {
* rw->counter--;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
" sub %[val], %[val], 1 \n" /* reader lock */
" scond %[val], [%[rwlock]] \n"
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
" sub %[val], %[val], 1 \n" /* counter-- */
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
* deny writer. Otherwise if unlocked grant to writer
* Hence the claim that Linux rwlocks are unfair to writers.
* (can be starved for an indefinite time by readers).
*
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
* rw->counter = 0;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz 4f \n"
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter++;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" add %[val], %[val], 1 \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter))
: "memory", "cc");
smp_mb();
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" scond %[UNLOCKED], [%[rwlock]]\n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
: "memory", "cc");
smp_mb();
}
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
#endif /* CONFIG_ARC_STAR_9000923308 */
#else /* !CONFIG_ARC_HAS_LLSC */
static inline void arch_spin_lock(arch_spinlock_t *lock)
......
......@@ -336,6 +336,10 @@ static void arc_chk_core_config(void)
pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
else if (!cpu->extn.fpu_dp && fpu_enabled)
panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
!IS_ENABLED(CONFIG_ARC_STAR_9000923308))
panic("llock/scond livelock workaround missing\n");
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment