Commit 66ff4d61 authored by Leon Hwang's avatar Leon Hwang Committed by Andrii Nakryiko

bpf, arm64: Fix tailcall hierarchy

This patch fixes a tailcall issue caused by abusing the tailcall in
bpf2bpf feature on arm64 like the way of "bpf, x64: Fix tailcall
hierarchy".

On arm64, when a tail call happens, it uses tail_call_cnt_ptr to
increment tail_call_cnt, too.

At the prologue of main prog, it has to initialize tail_call_cnt and
prepare tail_call_cnt_ptr.

At the prologue of subprog, it pushes x26 register twice, and does not
initialize tail_call_cnt.

At the epilogue, it pops x26 twice, no matter whether it is main prog or
subprog.

Fixes: d4609a5d ("bpf, arm64: Keep tail call count across bpf2bpf calls")
Acked-by: default avatarPuranjay Mohan <puranjay@kernel.org>
Signed-off-by: default avatarLeon Hwang <hffilwlqm@gmail.com>
Link: https://lore.kernel.org/r/20240714123902.32305-3-hffilwlqm@gmail.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parent 116e04ba
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5) #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
...@@ -63,8 +63,8 @@ static const int bpf2a64[] = { ...@@ -63,8 +63,8 @@ static const int bpf2a64[] = {
[TMP_REG_1] = A64_R(10), [TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(11), [TMP_REG_2] = A64_R(11),
[TMP_REG_3] = A64_R(12), [TMP_REG_3] = A64_R(12),
/* tail_call_cnt */ /* tail_call_cnt_ptr */
[TCALL_CNT] = A64_R(26), [TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */ /* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9), [BPF_REG_AX] = A64_R(9),
[FP_BOTTOM] = A64_R(27), [FP_BOTTOM] = A64_R(27),
...@@ -282,13 +282,35 @@ static bool is_lsi_offset(int offset, int scale) ...@@ -282,13 +282,35 @@ static bool is_lsi_offset(int offset, int scale)
* mov x29, sp * mov x29, sp
* stp x19, x20, [sp, #-16]! * stp x19, x20, [sp, #-16]!
* stp x21, x22, [sp, #-16]! * stp x21, x22, [sp, #-16]!
* stp x25, x26, [sp, #-16]! * stp x26, x25, [sp, #-16]!
* stp x26, x25, [sp, #-16]!
* stp x27, x28, [sp, #-16]! * stp x27, x28, [sp, #-16]!
* mov x25, sp * mov x25, sp
* mov tcc, #0 * mov tcc, #0
* // PROLOGUE_OFFSET * // PROLOGUE_OFFSET
*/ */
static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
const u8 ptr = bpf2a64[TCCNT_PTR];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = ptr;
emit(A64_PUSH(ptr, fp, A64_SP), ctx);
if (is_main_prog) {
/* Initialize tail_call_cnt. */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
emit(A64_PUSH(tcc, fp, A64_SP), ctx);
emit(A64_MOV(1, ptr, A64_SP), ctx);
} else {
emit(A64_PUSH(ptr, fp, A64_SP), ctx);
emit(A64_NOP, ctx);
emit(A64_NOP, ctx);
}
}
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
...@@ -296,7 +318,7 @@ static bool is_lsi_offset(int offset, int scale) ...@@ -296,7 +318,7 @@ static bool is_lsi_offset(int offset, int scale)
#define POKE_OFFSET (BTI_INSNS + 1) #define POKE_OFFSET (BTI_INSNS + 1)
/* Tail call offset to jump into */ /* Tail call offset to jump into */
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 10)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
bool is_exception_cb, u64 arena_vm_start) bool is_exception_cb, u64 arena_vm_start)
...@@ -308,7 +330,6 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, ...@@ -308,7 +330,6 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9]; const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = bpf2a64[TCALL_CNT];
const u8 fpb = bpf2a64[FP_BOTTOM]; const u8 fpb = bpf2a64[FP_BOTTOM];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
const int idx0 = ctx->idx; const int idx0 = ctx->idx;
...@@ -359,7 +380,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, ...@@ -359,7 +380,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
/* Save callee-saved registers */ /* Save callee-saved registers */
emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r6, r7, A64_SP), ctx);
emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx);
emit(A64_PUSH(fp, tcc, A64_SP), ctx); prepare_bpf_tail_call_cnt(ctx);
emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
} else { } else {
/* /*
...@@ -372,18 +393,15 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, ...@@ -372,18 +393,15 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
* callee-saved registers. The exception callback will not push * callee-saved registers. The exception callback will not push
* anything and re-use the main program's stack. * anything and re-use the main program's stack.
* *
* 10 registers are on the stack * 12 registers are on the stack
*/ */
emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
} }
/* Set up BPF prog stack base register */ /* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx); emit(A64_MOV(1, fp, A64_SP), ctx);
if (!ebpf_from_cbpf && is_main_prog) { if (!ebpf_from_cbpf && is_main_prog) {
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
cur_offset = ctx->idx - idx0; cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) { if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
...@@ -432,7 +450,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) ...@@ -432,7 +450,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp = bpf2a64[TMP_REG_1];
const u8 prg = bpf2a64[TMP_REG_2]; const u8 prg = bpf2a64[TMP_REG_2];
const u8 tcc = bpf2a64[TCALL_CNT]; const u8 tcc = bpf2a64[TMP_REG_3];
const u8 ptr = bpf2a64[TCCNT_PTR];
const int idx0 = ctx->idx; const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0) #define cur_offset (ctx->idx - idx0)
#define jmp_offset (out_offset - (cur_offset)) #define jmp_offset (out_offset - (cur_offset))
...@@ -449,11 +468,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) ...@@ -449,11 +468,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_B_(A64_COND_CS, jmp_offset), ctx); emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
/* /*
* if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
* goto out; * goto out;
* tail_call_cnt++; * (*tail_call_cnt_ptr)++;
*/ */
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(A64_LDR64I(tcc, ptr, 0), ctx);
emit(A64_CMP(1, tcc, tmp), ctx); emit(A64_CMP(1, tcc, tmp), ctx);
emit(A64_B_(A64_COND_CS, jmp_offset), ctx); emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
emit(A64_ADD_I(1, tcc, tcc, 1), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
...@@ -469,6 +489,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) ...@@ -469,6 +489,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_LDR64(prg, tmp, prg), ctx);
emit(A64_CBZ(1, prg, jmp_offset), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx);
/* Update tail_call_cnt if the slot is populated. */
emit(A64_STR64I(tcc, ptr, 0), ctx);
/* goto *(prog->bpf_func + prologue_offset); */ /* goto *(prog->bpf_func + prologue_offset); */
off = offsetof(struct bpf_prog, bpf_func); off = offsetof(struct bpf_prog, bpf_func);
emit_a64_mov_i64(tmp, off, ctx); emit_a64_mov_i64(tmp, off, ctx);
...@@ -721,6 +744,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) ...@@ -721,6 +744,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9]; const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fp = bpf2a64[BPF_REG_FP];
const u8 ptr = bpf2a64[TCCNT_PTR];
const u8 fpb = bpf2a64[FP_BOTTOM]; const u8 fpb = bpf2a64[FP_BOTTOM];
/* We're done with BPF stack */ /* We're done with BPF stack */
...@@ -738,7 +762,8 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) ...@@ -738,7 +762,8 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
/* Restore x27 and x28 */ /* Restore x27 and x28 */
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
/* Restore fs (x25) and x26 */ /* Restore fs (x25) and x26 */
emit(A64_POP(fp, A64_R(26), A64_SP), ctx); emit(A64_POP(ptr, fp, A64_SP), ctx);
emit(A64_POP(ptr, fp, A64_SP), ctx);
/* Restore callee-saved register */ /* Restore callee-saved register */
emit(A64_POP(r8, r9, A64_SP), ctx); emit(A64_POP(r8, r9, A64_SP), ctx);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment