Commit af682b76 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann

bpf: Optimize emit_mov_imm64().

Turned out that bpf prog callback addresses, bpf prog addresses
used in bpf_trampoline, and in other cases the 64-bit address
can be represented as sign extended 32-bit value.

According to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82339
"Skylake has 0.64c throughput for mov r64, imm64, vs. 0.25 for mov r32, imm32."
So use shorter encoding and faster instruction when possible.

Special care is needed in jit_subprogs(), since bpf_pseudo_func()
instruction cannot change its size during the last step of JIT.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/CAADnVQKFfpY-QZBrOU2CG8v2du8Lgyb7MNVmOZVK_yTyOdNbBA@mail.gmail.com
Link: https://lore.kernel.org/bpf/20240401233800.42737-1-alexei.starovoitov@gmail.com
parent 1e9e0b85
...@@ -816,9 +816,10 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, ...@@ -816,9 +816,10 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
static void emit_mov_imm64(u8 **pprog, u32 dst_reg, static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
const u32 imm32_hi, const u32 imm32_lo) const u32 imm32_hi, const u32 imm32_lo)
{ {
u64 imm64 = ((u64)imm32_hi << 32) | (u32)imm32_lo;
u8 *prog = *pprog; u8 *prog = *pprog;
if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { if (is_uimm32(imm64)) {
/* /*
* For emitting plain u32, where sign bit must not be * For emitting plain u32, where sign bit must not be
* propagated LLVM tends to load imm64 over mov32 * propagated LLVM tends to load imm64 over mov32
...@@ -826,6 +827,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, ...@@ -826,6 +827,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
* 'mov %eax, imm32' instead. * 'mov %eax, imm32' instead.
*/ */
emit_mov_imm32(&prog, false, dst_reg, imm32_lo); emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
} else if (is_simm32(imm64)) {
emit_mov_imm32(&prog, true, dst_reg, imm32_lo);
} else { } else {
/* movabsq rax, imm64 */ /* movabsq rax, imm64 */
EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
......
...@@ -19147,12 +19147,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -19147,12 +19147,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
env->insn_aux_data[i].call_imm = insn->imm; env->insn_aux_data[i].call_imm = insn->imm;
/* point imm to __bpf_call_base+1 from JITs point of view */ /* point imm to __bpf_call_base+1 from JITs point of view */
insn->imm = 1; insn->imm = 1;
if (bpf_pseudo_func(insn)) if (bpf_pseudo_func(insn)) {
#if defined(MODULES_VADDR)
u64 addr = MODULES_VADDR;
#else
u64 addr = VMALLOC_START;
#endif
/* jit (e.g. x86_64) may emit fewer instructions /* jit (e.g. x86_64) may emit fewer instructions
* if it learns a u32 imm is the same as a u64 imm. * if it learns a u32 imm is the same as a u64 imm.
* Force a non zero here. * Set close enough to possible prog address.
*/ */
insn[1].imm = 1; insn[0].imm = (u32)addr;
insn[1].imm = addr >> 32;
}
} }
err = bpf_prog_alloc_jited_linfo(prog); err = bpf_prog_alloc_jited_linfo(prog);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment