Commit 9fb410a8 authored by Jiong Wang's avatar Jiong Wang Committed by Daniel Borkmann

nfp: bpf: migrate to advanced reciprocal divide in reciprocal_div.h

As we are doing JIT, we would want to use the advanced version of the
reciprocal divide (reciprocal_value_adv) to trade performance with host.

We could reduce the required ALU instructions from 4 to 2 or 1.
Signed-off-by: default avatarJiong Wang <jiong.wang@netronome.com>
Reviewed-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 2a952b03
...@@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, ...@@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
{ {
swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
struct reciprocal_value rvalue; struct reciprocal_value_adv rvalue;
swreg tmp_b = imm_b(nfp_prog); u8 pre_shift, exp;
swreg magic; swreg magic;
if (imm > U32_MAX) { if (imm > U32_MAX) {
...@@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) ...@@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
return 0; return 0;
} }
rvalue = reciprocal_value(imm); /* NOTE: because we are using "reciprocal_value_adv" which doesn't
* support "divisor > (1u << 31)", we need to JIT separate NFP sequence
* to handle such case which actually equals to the result of unsigned
* comparison "dst >= imm" which could be calculated using the following
* NFP sequence:
*
* alu[--, dst, -, imm]
* immed[imm, 0]
* alu[dst, imm, +carry, 0]
*
*/
if (imm > 1U << 31) {
swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
reg_imm(0));
return 0;
}
rvalue = reciprocal_value_adv(imm, 32);
exp = rvalue.exp;
if (rvalue.is_wide_m && !(imm & 1)) {
pre_shift = fls(imm & -imm) - 1;
rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
} else {
pre_shift = 0;
}
magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic, if (imm == 1U << exp) {
true);
emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
SHF_SC_R_SHF, rvalue.sh1); SHF_SC_R_SHF, exp);
emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b); } else if (rvalue.is_wide_m) {
wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
magic, true);
emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
imm_b(nfp_prog));
emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
SHF_SC_R_SHF, rvalue.sh2); SHF_SC_R_SHF, 1);
emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
imm_b(nfp_prog));
emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
SHF_SC_R_SHF, rvalue.sh - 1);
} else {
if (pre_shift)
emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
dst_b, SHF_SC_R_SHF, pre_shift);
wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
dst_b, SHF_SC_R_SHF, rvalue.sh);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment