Commit 4c1cd4fd authored by Yang Shi's avatar Yang Shi Committed by David S. Miller

bpf: arm64: remove callee-save registers use for tmp registers

In the current implementation of ARM64 eBPF JIT, R23 and R24 are used for
tmp registers, which are callee-saved registers. This leads to variable size
of JIT prologue and epilogue. The latest blinding constant change prefers to
constant size of prologue and epilogue. AAPCS reserves R9 ~ R15 for temp
registers which not need to be saved/restored during function call. So, replace
R23 and R24 to R10 and R11, and remove tmp_used flag to save 2 instructions for
some jited BPF program.

CC: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarZi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: default avatarYang Shi <yang.shi@linaro.org>
Acked-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cd9e2e5d
...@@ -51,9 +51,9 @@ static const int bpf2a64[] = { ...@@ -51,9 +51,9 @@ static const int bpf2a64[] = {
[BPF_REG_9] = A64_R(22), [BPF_REG_9] = A64_R(22),
/* read-only frame pointer to access stack */ /* read-only frame pointer to access stack */
[BPF_REG_FP] = A64_R(25), [BPF_REG_FP] = A64_R(25),
/* temporary register for internal BPF JIT */ /* temporary registers for internal BPF JIT */
[TMP_REG_1] = A64_R(23), [TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(24), [TMP_REG_2] = A64_R(11),
/* temporary register for blinding constants */ /* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9), [BPF_REG_AX] = A64_R(9),
}; };
...@@ -61,7 +61,6 @@ static const int bpf2a64[] = { ...@@ -61,7 +61,6 @@ static const int bpf2a64[] = {
struct jit_ctx { struct jit_ctx {
const struct bpf_prog *prog; const struct bpf_prog *prog;
int idx; int idx;
int tmp_used;
int epilogue_offset; int epilogue_offset;
int *offset; int *offset;
u32 *image; u32 *image;
...@@ -154,8 +153,6 @@ static void build_prologue(struct jit_ctx *ctx) ...@@ -154,8 +153,6 @@ static void build_prologue(struct jit_ctx *ctx)
const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9]; const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tmp1 = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
/* /*
* BPF prog stack layout * BPF prog stack layout
...@@ -167,7 +164,7 @@ static void build_prologue(struct jit_ctx *ctx) ...@@ -167,7 +164,7 @@ static void build_prologue(struct jit_ctx *ctx)
* | ... | callee saved registers * | ... | callee saved registers
* +-----+ * +-----+
* | | x25/x26 * | | x25/x26
* BPF fp register => -80:+-----+ <= (BPF_FP) * BPF fp register => -64:+-----+ <= (BPF_FP)
* | | * | |
* | ... | BPF prog stack * | ... | BPF prog stack
* | | * | |
...@@ -189,8 +186,6 @@ static void build_prologue(struct jit_ctx *ctx) ...@@ -189,8 +186,6 @@ static void build_prologue(struct jit_ctx *ctx)
/* Save callee-saved register */ /* Save callee-saved register */
emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r6, r7, A64_SP), ctx);
emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx);
if (ctx->tmp_used)
emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
/* Save fp (x25) and x26. SP requires 16 bytes alignment */ /* Save fp (x25) and x26. SP requires 16 bytes alignment */
emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
...@@ -210,8 +205,6 @@ static void build_epilogue(struct jit_ctx *ctx) ...@@ -210,8 +205,6 @@ static void build_epilogue(struct jit_ctx *ctx)
const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9]; const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tmp1 = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
/* We're done with BPF stack */ /* We're done with BPF stack */
emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
...@@ -220,8 +213,6 @@ static void build_epilogue(struct jit_ctx *ctx) ...@@ -220,8 +213,6 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_POP(fp, A64_R(26), A64_SP), ctx); emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
/* Restore callee-saved register */ /* Restore callee-saved register */
if (ctx->tmp_used)
emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
emit(A64_POP(r8, r9, A64_SP), ctx); emit(A64_POP(r8, r9, A64_SP), ctx);
emit(A64_POP(r6, r7, A64_SP), ctx); emit(A64_POP(r6, r7, A64_SP), ctx);
...@@ -317,7 +308,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -317,7 +308,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit(A64_UDIV(is64, dst, dst, src), ctx); emit(A64_UDIV(is64, dst, dst, src), ctx);
break; break;
case BPF_MOD: case BPF_MOD:
ctx->tmp_used = 1;
emit(A64_UDIV(is64, tmp, dst, src), ctx); emit(A64_UDIV(is64, tmp, dst, src), ctx);
emit(A64_MUL(is64, tmp, tmp, src), ctx); emit(A64_MUL(is64, tmp, tmp, src), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx); emit(A64_SUB(is64, dst, dst, tmp), ctx);
...@@ -390,49 +380,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -390,49 +380,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = dst OP imm */ /* dst = dst OP imm */
case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_ADD(is64, dst, dst, tmp), ctx); emit(A64_ADD(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx); emit(A64_SUB(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_AND(is64, dst, dst, tmp), ctx); emit(A64_AND(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_OR | BPF_K:
case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_ORR(is64, dst, dst, tmp), ctx); emit(A64_ORR(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K:
case BPF_ALU64 | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_EOR(is64, dst, dst, tmp), ctx); emit(A64_EOR(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_MUL(is64, dst, dst, tmp), ctx); emit(A64_MUL(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp, imm, ctx); emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_UDIV(is64, dst, dst, tmp), ctx); emit(A64_UDIV(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(is64, tmp2, imm, ctx); emit_a64_mov_i(is64, tmp2, imm, ctx);
emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
...@@ -503,12 +485,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -503,12 +485,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(1, tmp, imm, ctx); emit_a64_mov_i(1, tmp, imm, ctx);
emit(A64_CMP(1, dst, tmp), ctx); emit(A64_CMP(1, dst, tmp), ctx);
goto emit_cond_jmp; goto emit_cond_jmp;
case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_K:
ctx->tmp_used = 1;
emit_a64_mov_i(1, tmp, imm, ctx); emit_a64_mov_i(1, tmp, imm, ctx);
emit(A64_TST(1, dst, tmp), ctx); emit(A64_TST(1, dst, tmp), ctx);
goto emit_cond_jmp; goto emit_cond_jmp;
...@@ -518,7 +498,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -518,7 +498,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r0 = bpf2a64[BPF_REG_0];
const u64 func = (u64)__bpf_call_base + imm; const u64 func = (u64)__bpf_call_base + imm;
ctx->tmp_used = 1;
emit_a64_mov_i64(tmp, func, ctx); emit_a64_mov_i64(tmp, func, ctx);
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx);
...@@ -564,7 +543,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -564,7 +543,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_MEM | BPF_DW:
ctx->tmp_used = 1;
emit_a64_mov_i(1, tmp, off, ctx); emit_a64_mov_i(1, tmp, off, ctx);
switch (BPF_SIZE(code)) { switch (BPF_SIZE(code)) {
case BPF_W: case BPF_W:
...@@ -588,7 +566,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -588,7 +566,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW: case BPF_ST | BPF_MEM | BPF_DW:
/* Load imm to a register then store it */ /* Load imm to a register then store it */
ctx->tmp_used = 1;
emit_a64_mov_i(1, tmp2, off, ctx); emit_a64_mov_i(1, tmp2, off, ctx);
emit_a64_mov_i(1, tmp, imm, ctx); emit_a64_mov_i(1, tmp, imm, ctx);
switch (BPF_SIZE(code)) { switch (BPF_SIZE(code)) {
...@@ -612,7 +589,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ...@@ -612,7 +589,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW: case BPF_STX | BPF_MEM | BPF_DW:
ctx->tmp_used = 1;
emit_a64_mov_i(1, tmp, off, ctx); emit_a64_mov_i(1, tmp, off, ctx);
switch (BPF_SIZE(code)) { switch (BPF_SIZE(code)) {
case BPF_W: case BPF_W:
...@@ -798,7 +774,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -798,7 +774,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 1. Initial fake pass to compute ctx->idx. */ /* 1. Initial fake pass to compute ctx->idx. */
/* Fake pass to fill in ctx->offset and ctx->tmp_used. */ /* Fake pass to fill in ctx->offset. */
if (build_body(&ctx)) { if (build_body(&ctx)) {
prog = orig_prog; prog = orig_prog;
goto out_off; goto out_off;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment