Commit c09d6a04 authored by Will Deacon's avatar Will Deacon

arm64: atomics: patch in lse instructions when supported by the CPU

On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of atomic_t and atomic64_t
routines so that the call-site for the out-of-line ll/sc sequences is
patched with an LSE atomic instruction when we detect that
the CPU supports it.

If binutils is not recent enough to assemble the LSE instructions, then
the ll/sc sequences are inlined as though CONFIG_ARM64_LSE_ATOMICS=n.
Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent c0385b24
...@@ -17,7 +17,18 @@ GZFLAGS :=-9 ...@@ -17,7 +17,18 @@ GZFLAGS :=-9
KBUILD_DEFCONFIG := defconfig KBUILD_DEFCONFIG := defconfig
KBUILD_CFLAGS += -mgeneral-regs-only # Check for binutils support for specific extensions
lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1)
ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y)
ifeq ($(lseinstr),)
$(warning LSE atomics not supported by binutils)
endif
endif
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr)
KBUILD_AFLAGS += $(lseinstr)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian KBUILD_CPPFLAGS += -mbig-endian
AS += -EB AS += -EB
......
...@@ -21,11 +21,11 @@ ...@@ -21,11 +21,11 @@
#define __ASM_ATOMIC_H #define __ASM_ATOMIC_H
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/stringify.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/barrier.h> #include <asm/barrier.h>
#include <asm/cmpxchg.h> #include <asm/cmpxchg.h>
#include <asm/lse.h>
#define ATOMIC_INIT(i) { (i) } #define ATOMIC_INIT(i) { (i) }
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#define __ARM64_IN_ATOMIC_IMPL #define __ARM64_IN_ATOMIC_IMPL
#ifdef CONFIG_ARM64_LSE_ATOMICS #if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
#include <asm/atomic_lse.h> #include <asm/atomic_lse.h>
#else #else
#include <asm/atomic_ll_sc.h> #include <asm/atomic_ll_sc.h>
......
...@@ -37,18 +37,6 @@ ...@@ -37,18 +37,6 @@
* (the optimize attribute silently ignores these options). * (the optimize attribute silently ignores these options).
*/ */
#ifndef __LL_SC_INLINE
#define __LL_SC_INLINE static inline
#endif
#ifndef __LL_SC_PREFIX
#define __LL_SC_PREFIX(x) x
#endif
#ifndef __LL_SC_EXPORT
#define __LL_SC_EXPORT(x)
#endif
#define ATOMIC_OP(op, asm_op) \ #define ATOMIC_OP(op, asm_op) \
__LL_SC_INLINE void \ __LL_SC_INLINE void \
__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
......
...@@ -25,60 +25,129 @@ ...@@ -25,60 +25,129 @@
#error "please don't include this file directly" #error "please don't include this file directly"
#endif #endif
/* Move the ll/sc atomics out-of-line */ #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op)
#define __LL_SC_INLINE
#define __LL_SC_PREFIX(x) __ll_sc_##x static inline void atomic_andnot(int i, atomic_t *v)
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) {
register int w0 asm ("w0") = i;
/* Macros for constructing calls to out-of-line ll/sc atomics */ register atomic_t *x1 asm ("x1") = v;
#define __LL_SC_CALL(op) \
"bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n" asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
#define __LL_SC_CALL64(op) \ " stclr %w[i], %[v]\n")
"bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n" : [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
#define ATOMIC_OP(op, asm_op) \ : "x30");
static inline void atomic_##op(int i, atomic_t *v) \ }
{ \
register int w0 asm ("w0") = i; \ static inline void atomic_or(int i, atomic_t *v)
register atomic_t *x1 asm ("x1") = v; \ {
\ register int w0 asm ("w0") = i;
asm volatile( \ register atomic_t *x1 asm ("x1") = v;
__LL_SC_CALL(op) \
: "+r" (w0), "+Q" (v->counter) \ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
: "r" (x1) \ " stset %w[i], %[v]\n")
: "x30"); \ : [i] "+r" (w0), [v] "+Q" (v->counter)
} \ : "r" (x1)
: "x30");
#define ATOMIC_OP_RETURN(op, asm_op) \ }
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \ static inline void atomic_xor(int i, atomic_t *v)
register int w0 asm ("w0") = i; \ {
register atomic_t *x1 asm ("x1") = v; \ register int w0 asm ("w0") = i;
\ register atomic_t *x1 asm ("x1") = v;
asm volatile( \
__LL_SC_CALL(op##_return) \ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
: "+r" (w0) \ " steor %w[i], %[v]\n")
: "r" (x1) \ : [i] "+r" (w0), [v] "+Q" (v->counter)
: "x30", "memory"); \ : "r" (x1)
\ : "x30");
return w0; \ }
}
static inline void atomic_add(int i, atomic_t *v)
#define ATOMIC_OPS(op, asm_op) \ {
ATOMIC_OP(op, asm_op) \ register int w0 asm ("w0") = i;
ATOMIC_OP_RETURN(op, asm_op) register atomic_t *x1 asm ("x1") = v;
ATOMIC_OPS(add, add) asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
ATOMIC_OPS(sub, sub) " stadd %w[i], %[v]\n")
: [i] "+r" (w0), [v] "+Q" (v->counter)
ATOMIC_OP(and, and) : "r" (x1)
ATOMIC_OP(andnot, bic) : "x30");
ATOMIC_OP(or, orr) }
ATOMIC_OP(xor, eor)
static inline int atomic_add_return(int i, atomic_t *v)
#undef ATOMIC_OPS {
#undef ATOMIC_OP_RETURN register int w0 asm ("w0") = i;
#undef ATOMIC_OP register atomic_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(add_return),
/* LSE atomics */
" ldaddal %w[i], w30, %[v]\n"
" add %w[i], %w[i], w30")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return w0;
}
static inline void atomic_and(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(and),
/* LSE atomics */
" mvn %w[i], %w[i]\n"
" stclr %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline void atomic_sub(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(sub),
/* LSE atomics */
" neg %w[i], %w[i]\n"
" stadd %w[i], %[v]")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline int atomic_sub_return(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(sub_return)
" nop",
/* LSE atomics */
" neg %w[i], %w[i]\n"
" ldaddal %w[i], w30, %[v]\n"
" add %w[i], %w[i], w30")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return w0;
}
static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
{ {
...@@ -86,69 +155,164 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) ...@@ -86,69 +155,164 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
register int w1 asm ("w1") = old; register int w1 asm ("w1") = old;
register int w2 asm ("w2") = new; register int w2 asm ("w2") = new;
asm volatile( asm volatile(ARM64_LSE_ATOMIC_INSN(
__LL_SC_CALL(cmpxchg) /* LL/SC */
: "+r" (x0) " nop\n"
: "r" (w1), "r" (w2) __LL_SC_ATOMIC(cmpxchg)
" nop",
/* LSE atomics */
" mov w30, %w[old]\n"
" casal w30, %w[new], %[v]\n"
" mov %w[ret], w30")
: [ret] "+r" (x0), [v] "+Q" (ptr->counter)
: [old] "r" (w1), [new] "r" (w2)
: "x30", "cc", "memory"); : "x30", "cc", "memory");
return x0; return x0;
} }
#define ATOMIC64_OP(op, asm_op) \ #undef __LL_SC_ATOMIC
static inline void atomic64_##op(long i, atomic64_t *v) \
{ \
register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile( \
__LL_SC_CALL64(op) \
: "+r" (x0), "+Q" (v->counter) \
: "r" (x1) \
: "x30"); \
} \
#define ATOMIC64_OP_RETURN(op, asm_op) \
static inline long atomic64_##op##_return(long i, atomic64_t *v) \
{ \
register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile( \
__LL_SC_CALL64(op##_return) \
: "+r" (x0) \
: "r" (x1) \
: "x30", "memory"); \
\
return x0; \
}
#define ATOMIC64_OPS(op, asm_op) \
ATOMIC64_OP(op, asm_op) \
ATOMIC64_OP_RETURN(op, asm_op)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, sub)
ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor)
#undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
static inline void atomic64_andnot(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
" stclr %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline void atomic64_or(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
" stset %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline void atomic64_xor(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
" steor %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline void atomic64_add(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
" stadd %[i], %[v]\n")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline long atomic64_add_return(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(add_return),
/* LSE atomics */
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return x0;
}
static inline void atomic64_and(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(and),
/* LSE atomics */
" mvn %[i], %[i]\n"
" stclr %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline void atomic64_sub(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(sub),
/* LSE atomics */
" neg %[i], %[i]\n"
" stadd %[i], %[v]")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30");
}
static inline long atomic64_sub_return(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(sub_return)
" nop",
/* LSE atomics */
" neg %[i], %[i]\n"
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return x0;
}
static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
{ {
register unsigned long x0 asm ("x0") = (unsigned long)ptr; register unsigned long x0 asm ("x0") = (unsigned long)ptr;
register long x1 asm ("x1") = old; register long x1 asm ("x1") = old;
register long x2 asm ("x2") = new; register long x2 asm ("x2") = new;
asm volatile( asm volatile(ARM64_LSE_ATOMIC_INSN(
__LL_SC_CALL64(cmpxchg) /* LL/SC */
: "+r" (x0) " nop\n"
: "r" (x1), "r" (x2) __LL_SC_ATOMIC64(cmpxchg)
" nop",
/* LSE atomics */
" mov x30, %[old]\n"
" casal x30, %[new], %[v]\n"
" mov %[ret], x30")
: [ret] "+r" (x0), [v] "+Q" (ptr->counter)
: [old] "r" (x1), [new] "r" (x2)
: "x30", "cc", "memory"); : "x30", "cc", "memory");
return x0; return x0;
...@@ -156,15 +320,33 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) ...@@ -156,15 +320,33 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
static inline long atomic64_dec_if_positive(atomic64_t *v) static inline long atomic64_dec_if_positive(atomic64_t *v)
{ {
register unsigned long x0 asm ("x0") = (unsigned long)v; register long x0 asm ("x0") = (long)v;
asm volatile( asm volatile(ARM64_LSE_ATOMIC_INSN(
__LL_SC_CALL64(dec_if_positive) /* LL/SC */
: "+r" (x0) " nop\n"
__LL_SC_ATOMIC64(dec_if_positive)
" nop\n"
" nop\n"
" nop\n"
" nop\n"
" nop",
/* LSE atomics */
"1: ldr x30, %[v]\n"
" subs %[ret], x30, #1\n"
" b.mi 2f\n"
" casal x30, %[ret], %[v]\n"
" sub x30, x30, #1\n"
" sub x30, x30, %[ret]\n"
" cbnz x30, 1b\n"
"2:")
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
: :
: "x30", "cc", "memory"); : "x30", "cc", "memory");
return x0; return x0;
} }
#undef __LL_SC_ATOMIC64
#endif /* __ASM_ATOMIC_LSE_H */ #endif /* __ASM_ATOMIC_LSE_H */
#ifndef __ASM_LSE_H
#define __ASM_LSE_H
#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
#include <linux/stringify.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
__asm__(".arch_extension lse");
/* Move the ll/sc atomics out-of-line */
#define __LL_SC_INLINE
#define __LL_SC_PREFIX(x) __ll_sc_##x
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x))
/* Macro for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS)
#else
#define __LL_SC_INLINE static inline
#define __LL_SC_PREFIX(x) x
#define __LL_SC_EXPORT(x)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
#endif /* __ASM_LSE_H */
...@@ -285,6 +285,9 @@ static void __init setup_processor(void) ...@@ -285,6 +285,9 @@ static void __init setup_processor(void)
case 2: case 2:
elf_hwcap |= HWCAP_ATOMICS; elf_hwcap |= HWCAP_ATOMICS;
cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS); cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS);
if (IS_ENABLED(CONFIG_AS_LSE) &&
IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS))
pr_info("LSE atomics supported\n");
case 1: case 1:
/* RESERVED */ /* RESERVED */
case 0: case 0:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment