Commit 34b8ab09 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by Alexei Starovoitov

bpf, arm64: use more scalable stadd over ldxr / stxr loop in xadd

Since ARMv8.1 supplement introduced LSE atomic instructions back in 2016,
lets add support for STADD and use that in favor of LDXR / STXR loop for
the XADD mapping if available. STADD is encoded as an alias for LDADD with
XZR as the destination register, therefore add LDADD to the instruction
encoder along with STADD as special case and use it in the JIT for CPUs
that advertise LSE atomics in CPUID register. If immediate offset in the
BPF XADD insn is 0, then use dst register directly instead of temporary
one.
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarJean-Philippe Brucker <jean-philippe.brucker@arm.com>
Acked-by: default avatarWill Deacon <will.deacon@arm.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 8968c67a
...@@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) ...@@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0xB8200000)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
...@@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, ...@@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state, enum aarch64_insn_register state,
enum aarch64_insn_size_type size, enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type); enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src, enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant, int imm, enum aarch64_insn_variant variant,
......
...@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, ...@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
state); state);
} }
u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size)
{
u32 insn = aarch64_insn_get_ldadd_value();
switch (size) {
case AARCH64_INSN_SIZE_32:
case AARCH64_INSN_SIZE_64:
break;
default:
pr_err("%s: unimplemented size encoding %d\n", __func__, size);
return AARCH64_BREAK_FAULT;
}
insn = aarch64_insn_encode_ldst_size(size, insn);
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
result);
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
address);
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
value);
}
u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size)
{
/*
* STADD is simply encoded as an alias for LDADD with XZR as
* the destination register.
*/
return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
value, size);
}
static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy, enum aarch64_insn_prfm_policy policy,
......
...@@ -100,6 +100,10 @@ ...@@ -100,6 +100,10 @@
#define A64_STXR(sf, Rt, Rn, Rs) \ #define A64_STXR(sf, Rt, Rn, Rs) \
A64_LSX(sf, Rt, Rn, Rs, STORE_EX) A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
/* LSE atomics */
#define A64_STADD(sf, Rn, Rs) \
aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
/* Add/subtract (immediate) */ /* Add/subtract (immediate) */
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
......
...@@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, ...@@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const bool is64 = BPF_CLASS(code) == BPF_ALU64 || const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
BPF_CLASS(code) == BPF_JMP; BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW; const bool isdw = BPF_SIZE(code) == BPF_DW;
u8 jmp_cond; u8 jmp_cond, reg;
s32 jmp_offset; s32 jmp_offset;
#define check_imm(bits, imm) do { \ #define check_imm(bits, imm) do { \
...@@ -756,18 +756,28 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, ...@@ -756,18 +756,28 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break; break;
} }
break; break;
/* STX XADD: lock *(u32 *)(dst + off) += src */ /* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W: case BPF_STX | BPF_XADD | BPF_W:
/* STX XADD: lock *(u64 *)(dst + off) += src */ /* STX XADD: lock *(u64 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_DW: case BPF_STX | BPF_XADD | BPF_DW:
emit_a64_mov_i(1, tmp, off, ctx); if (!off) {
emit(A64_ADD(1, tmp, tmp, dst), ctx); reg = dst;
emit(A64_LDXR(isdw, tmp2, tmp), ctx); } else {
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); emit_a64_mov_i(1, tmp, off, ctx);
emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); emit(A64_ADD(1, tmp, tmp, dst), ctx);
jmp_offset = -3; reg = tmp;
check_imm19(jmp_offset); }
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
emit(A64_STADD(isdw, reg, src), ctx);
} else {
emit(A64_LDXR(isdw, tmp2, reg), ctx);
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
}
break; break;
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment