Commit 57a610f1 authored by Jie Meng's avatar Jie Meng Committed by Daniel Borkmann

bpf, x64: Save bytes for DIV by reducing reg copies

Instead of unconditionally performing push/pop on %rax/%rdx in case of
division/modulo, we can save a few bytes in case of destination register
being either BPF r0 (%rax) or r3 (%rdx) since the result is written in
there anyway.

Also, we do not need to copy the source to %r11 unless the source is either
%rax, %rdx or an immediate.

For example, before the patch:

  22:   push   %rax
  23:   push   %rdx
  24:   mov    %rsi,%r11
  27:   xor    %edx,%edx
  29:   div    %r11
  2c:   mov    %rax,%r11
  2f:   pop    %rdx
  30:   pop    %rax
  31:   mov    %r11,%rax

After:

  22:   push   %rdx
  23:   xor    %edx,%edx
  25:   div    %rsi
  28:   pop    %rdx
Signed-off-by: default avatarJie Meng <jmeng@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Tested-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211002035626.2041910-1-jmeng@fb.com
parent 0640c77c
...@@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: {
EMIT1(0x50); /* push rax */ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
EMIT1(0x52); /* push rdx */
if (dst_reg != BPF_REG_0)
if (BPF_SRC(insn->code) == BPF_X) EMIT1(0x50); /* push rax */
/* mov r11, src_reg */ if (dst_reg != BPF_REG_3)
EMIT_mov(AUX_REG, src_reg); EMIT1(0x52); /* push rdx */
else
if (BPF_SRC(insn->code) == BPF_X) {
if (src_reg == BPF_REG_0 ||
src_reg == BPF_REG_3) {
/* mov r11, src_reg */
EMIT_mov(AUX_REG, src_reg);
src_reg = AUX_REG;
}
} else {
/* mov r11, imm32 */ /* mov r11, imm32 */
EMIT3_off32(0x49, 0xC7, 0xC3, imm32); EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
src_reg = AUX_REG;
}
/* mov rax, dst_reg */ if (dst_reg != BPF_REG_0)
EMIT_mov(BPF_REG_0, dst_reg); /* mov rax, dst_reg */
emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
/* /*
* xor edx, edx * xor edx, edx
...@@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
*/ */
EMIT2(0x31, 0xd2); EMIT2(0x31, 0xd2);
if (BPF_CLASS(insn->code) == BPF_ALU64) if (is64)
/* div r11 */ EMIT1(add_1mod(0x48, src_reg));
EMIT3(0x49, 0xF7, 0xF3); else if (is_ereg(src_reg))
else EMIT1(add_1mod(0x40, src_reg));
/* div r11d */ /* div src_reg */
EMIT3(0x41, 0xF7, 0xF3); EMIT2(0xF7, add_1reg(0xF0, src_reg));
if (BPF_OP(insn->code) == BPF_MOD) if (BPF_OP(insn->code) == BPF_MOD &&
/* mov r11, rdx */ dst_reg != BPF_REG_3)
EMIT3(0x49, 0x89, 0xD3); /* mov dst_reg, rdx */
else emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
/* mov r11, rax */ else if (BPF_OP(insn->code) == BPF_DIV &&
EMIT3(0x49, 0x89, 0xC3); dst_reg != BPF_REG_0)
/* mov dst_reg, rax */
EMIT1(0x5A); /* pop rdx */ emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
EMIT1(0x58); /* pop rax */
if (dst_reg != BPF_REG_3)
/* mov dst_reg, r11 */ EMIT1(0x5A); /* pop rdx */
EMIT_mov(dst_reg, AUX_REG); if (dst_reg != BPF_REG_0)
EMIT1(0x58); /* pop rax */
break; break;
}
case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K:
......
...@@ -102,6 +102,53 @@ ...@@ -102,6 +102,53 @@
.result = ACCEPT, .result = ACCEPT,
.retval = 2, .retval = 2,
}, },
{
"jit: various div tests",
.insns = {
BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
},
{ {
"jit: jsgt, jslt", "jit: jsgt, jslt",
.insns = { .insns = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment