Commit 03a679a1 authored by Arnd Bergmann's avatar Arnd Bergmann

Merge tag 'generic-ticket-spinlocks-v6' of...

Merge tag 'generic-ticket-spinlocks-v6' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux into asm-generic

asm-generic: New generic ticket-based spinlock

This contains a new ticket-based spinlock that uses only generic
atomics and doesn't require as much from the memory system as qspinlock
does in order to be fair.  It also includes a bit of documentation about
the qspinlock and qrwlock fairness requirements.

This will soon be used by a handful of architectures that don't meet the
qspinlock requirements.

* tag 'generic-ticket-spinlocks-v6' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux:
  csky: Move to generic ticket-spinlock
  RISC-V: Move to queued RW locks
  RISC-V: Move to generic spinlocks
  openrisc: Move to ticket-spinlock
  asm-generic: qrwlock: Document the spinlock fairness requirements
  asm-generic: qspinlock: Indicate the use of mixed-size atomics
  asm-generic: ticket-lock: New generic ticket-based spinlock
parents fba2689e 9282d099
...@@ -3,7 +3,10 @@ generic-y += asm-offsets.h ...@@ -3,7 +3,10 @@ generic-y += asm-offsets.h
generic-y += extable.h generic-y += extable.h
generic-y += gpio.h generic-y += gpio.h
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += spinlock.h
generic-y += spinlock_types.h
generic-y += qrwlock.h generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += parport.h generic-y += parport.h
generic-y += user.h generic-y += user.h
generic-y += vmlinux.lds.h generic-y += vmlinux.lds.h
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_SPINLOCK_H
#define __ASM_CSKY_SPINLOCK_H
#include <linux/spinlock_types.h>
#include <asm/barrier.h>
/*
* Ticket-based spin-locking.
*/
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
arch_spinlock_t lockval;
u32 ticket_next = 1 << TICKET_NEXT;
u32 *p = &lock->lock;
u32 tmp;
asm volatile (
"1: ldex.w %0, (%2) \n"
" mov %1, %0 \n"
" add %0, %3 \n"
" stex.w %0, (%2) \n"
" bez %0, 1b \n"
: "=&r" (tmp), "=&r" (lockval)
: "r"(p), "r"(ticket_next)
: "cc");
while (lockval.tickets.next != lockval.tickets.owner)
lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
u32 tmp, contended, res;
u32 ticket_next = 1 << TICKET_NEXT;
u32 *p = &lock->lock;
do {
asm volatile (
" ldex.w %0, (%3) \n"
" movi %2, 1 \n"
" rotli %1, %0, 16 \n"
" cmpne %1, %0 \n"
" bt 1f \n"
" movi %2, 0 \n"
" add %0, %0, %4 \n"
" stex.w %0, (%3) \n"
"1: \n"
: "=&r" (res), "=&r" (tmp), "=&r" (contended)
: "r"(p), "r"(ticket_next)
: "cc");
} while (!res);
if (!contended)
smp_mb();
return !contended;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
WRITE_ONCE(lock->tickets.owner, lock->tickets.owner + 1);
}
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.tickets.owner == lock.tickets.next;
}
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tickets = READ_ONCE(lock->tickets);
return (tickets.next - tickets.owner) > 1;
}
#define arch_spin_is_contended arch_spin_is_contended
#include <asm/qrwlock.h>
#endif /* __ASM_CSKY_SPINLOCK_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
#define __ASM_CSKY_SPINLOCK_TYPES_H
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
#define TICKET_NEXT 16
typedef struct {
union {
u32 lock;
struct __raw_tickets {
/* little endian */
u16 owner;
u16 next;
} tickets;
};
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
#include <asm-generic/qrwlock_types.h>
#endif /* __ASM_CSKY_SPINLOCK_TYPES_H */
...@@ -30,7 +30,6 @@ config OPENRISC ...@@ -30,7 +30,6 @@ config OPENRISC
select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_STACKOVERFLOW
select OR1K_PIC select OR1K_PIC
select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_RWLOCKS
select OMPIC if SMP select OMPIC if SMP
select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_FRAME_POINTERS
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
generic-y += extable.h generic-y += extable.h
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += mcs_spinlock.h generic-y += spinlock_types.h
generic-y += qspinlock_types.h generic-y += spinlock.h
generic-y += qspinlock.h
generic-y += qrwlock_types.h generic-y += qrwlock_types.h
generic-y += qrwlock.h generic-y += qrwlock.h
generic-y += user.h generic-y += user.h
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* OpenRISC Linux
*
* Linux architectural port borrowing liberally from similar works of
* others. All original copyrights apply as per the original source
* declaration.
*
* OpenRISC implementation:
* Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
* Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
* et al.
*/
#ifndef __ASM_OPENRISC_SPINLOCK_H
#define __ASM_OPENRISC_SPINLOCK_H
#include <asm/qspinlock.h>
#include <asm/qrwlock.h>
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif
#ifndef _ASM_OPENRISC_SPINLOCK_TYPES_H
#define _ASM_OPENRISC_SPINLOCK_TYPES_H
#include <asm/qspinlock_types.h>
#include <asm/qrwlock_types.h>
#endif /* _ASM_OPENRISC_SPINLOCK_TYPES_H */
...@@ -39,6 +39,7 @@ config RISCV ...@@ -39,6 +39,7 @@ config RISCV
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_USE_MEMTEST select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_GENERAL_HUGETLB
......
...@@ -3,5 +3,9 @@ generic-y += early_ioremap.h ...@@ -3,5 +3,9 @@ generic-y += early_ioremap.h
generic-y += flat.h generic-y += flat.h
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += parport.h generic-y += parport.h
generic-y += spinlock.h
generic-y += spinlock_types.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += user.h generic-y += user.h
generic-y += vmlinux.lds.h generic-y += vmlinux.lds.h
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2015 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#ifndef _ASM_RISCV_SPINLOCK_H
#define _ASM_RISCV_SPINLOCK_H
#include <linux/kernel.h>
#include <asm/current.h>
#include <asm/fence.h>
/*
* Simple spin lock operations. These provide no fairness guarantees.
*/
/* FIXME: Replace this with a ticket lock, like MIPS. */
#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_store_release(&lock->lock, 0);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
int tmp = 1, busy;
__asm__ __volatile__ (
" amoswap.w %0, %2, %1\n"
RISCV_ACQUIRE_BARRIER
: "=r" (busy), "+A" (lock->lock)
: "r" (tmp)
: "memory");
return !busy;
}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (1) {
if (arch_spin_is_locked(lock))
continue;
if (arch_spin_trylock(lock))
break;
}
}
/***********************************************************/
static inline void arch_read_lock(arch_rwlock_t *lock)
{
int tmp;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bltz %1, 1b\n"
" addi %1, %1, 1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
static inline void arch_write_lock(arch_rwlock_t *lock)
{
int tmp;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bnez %1, 1b\n"
" li %1, -1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
static inline int arch_read_trylock(arch_rwlock_t *lock)
{
int busy;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bltz %1, 1f\n"
" addi %1, %1, 1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
return !busy;
}
static inline int arch_write_trylock(arch_rwlock_t *lock)
{
int busy;
__asm__ __volatile__(
"1: lr.w %1, %0\n"
" bnez %1, 1f\n"
" li %1, -1\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
return !busy;
}
static inline void arch_read_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__(
RISCV_RELEASE_BARRIER
" amoadd.w x0, %1, %0\n"
: "+A" (lock->lock)
: "r" (-1)
: "memory");
}
static inline void arch_write_unlock(arch_rwlock_t *lock)
{
smp_store_release(&lock->lock, 0);
}
#endif /* _ASM_RISCV_SPINLOCK_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2015 Regents of the University of California
*/
#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
#define _ASM_RISCV_SPINLOCK_TYPES_H
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
volatile unsigned int lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
/* /*
* Queue read/write lock * Queue read/write lock
* *
* These use generic atomic and locking routines, but depend on a fair spinlock
* implementation in order to be fair themselves. The implementation in
* asm-generic/spinlock.h meets these requirements.
*
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
* *
* Authors: Waiman Long <waiman.long@hp.com> * Authors: Waiman Long <waiman.long@hp.com>
......
...@@ -2,6 +2,35 @@ ...@@ -2,6 +2,35 @@
/* /*
* Queued spinlock * Queued spinlock
* *
* A 'generic' spinlock implementation that is based on MCS locks. For an
* architecture that's looking for a 'generic' spinlock, please first consider
* ticket-lock.h and only come looking here when you've considered all the
* constraints below and can show your hardware does actually perform better
* with qspinlock.
*
* qspinlock relies on atomic_*_release()/atomic_*_acquire() to be RCsc (or no
* weaker than RCtso if you're power), where regular code only expects atomic_t
* to be RCpc.
*
* qspinlock relies on a far greater (compared to asm-generic/spinlock.h) set
* of atomic operations to behave well together, please audit them carefully to
* ensure they all have forward progress. Many atomic operations may default to
* cmpxchg() loops which will not have good forward progress properties on
* LL/SC architectures.
*
* One notable example is atomic_fetch_or_acquire(), which x86 cannot (cheaply)
* do. Carefully read the patches that introduced
* queued_fetch_set_pending_acquire().
*
* qspinlock also heavily relies on mixed size atomic operations, in specific
* it requires architectures to have xchg16; something which many LL/SC
* architectures need to implement as a 32bit and+or in order to satisfy the
* forward progress guarantees mentioned above.
*
* Further reading on mixed size atomics that might be relevant:
*
* http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
* *
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/*
* 'Generic' ticket-lock implementation.
*
* It relies on atomic_fetch_add() having well defined forward progress
* guarantees under contention. If your architecture cannot provide this, stick
* to a test-and-set lock.
*
* It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
* sub-word of the value. This is generally true for anything LL/SC although
* you'd be hard pressed to find anything useful in architecture specifications
* about this. If your architecture cannot do this you might be better off with
* a test-and-set.
*
* It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
* uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
* a full fence after the spin to upgrade the otherwise-RCpc
* atomic_cond_read_acquire().
*
* The implementation uses smp_cond_load_acquire() to spin, so if the
* architecture has WFE like instructions to sleep instead of poll for word
* modifications be sure to implement that (see ARM64 for example).
*
*/
#ifndef __ASM_GENERIC_SPINLOCK_H #ifndef __ASM_GENERIC_SPINLOCK_H
#define __ASM_GENERIC_SPINLOCK_H #define __ASM_GENERIC_SPINLOCK_H
/*
* You need to implement asm/spinlock.h for SMP support. The generic #include <linux/atomic.h>
* version does not handle SMP. #include <asm-generic/spinlock_types.h>
static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
u32 val = atomic_fetch_add(1<<16, lock);
u16 ticket = val >> 16;
if (ticket == (u16)val)
return;
/*
* atomic_cond_read_acquire() is RCpc, but rather than defining a
* custom cond_read_rcsc() here we just emit a full fence. We only
* need the prior reads before subsequent writes ordering from
* smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
* have no outstanding writes due to the atomic_fetch_add() the extra
* orderings are free.
*/ */
#ifdef CONFIG_SMP atomic_cond_read_acquire(lock, ticket == (u16)VAL);
#error need an architecture specific asm/spinlock.h smp_mb();
#endif }
static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
{
u32 old = atomic_read(lock);
if ((old >> 16) != (old & 0xffff))
return false;
return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
}
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
u32 val = atomic_read(lock);
smp_store_release(ptr, (u16)val + 1);
}
static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
u32 val = atomic_read(lock);
return ((val >> 16) != (val & 0xffff));
}
static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
u32 val = atomic_read(lock);
return (s16)((val >> 16) - (val & 0xffff)) > 1;
}
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return !arch_spin_is_locked(&lock);
}
#include <asm/qrwlock.h>
#endif /* __ASM_GENERIC_SPINLOCK_H */ #endif /* __ASM_GENERIC_SPINLOCK_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
#define __ASM_GENERIC_SPINLOCK_TYPES_H
#include <linux/types.h>
typedef atomic_t arch_spinlock_t;
/*
* qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
* include.
*/
#include <asm/qrwlock_types.h>
#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0)
#endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment