Commit 3a70418b authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-inline-lookups'

Alexei Starovoitov says:

====================
bpf: inline bpf_map_lookup_elem()

bpf_map_lookup_elem() is one of the most frequently used helper functions.
Improve JITed program performance by inlining this helper.

bpf_map_type	before  after
hash		58M	74M
array		174M	280M

The values are number of lookups per second in ideal conditions
measured by micro-benchmark in patch 6.

The 'perf report' for HASH map type:
before:
    54.23%  map_perf_test  [kernel.kallsyms]  [k] __htab_map_lookup_elem
    14.24%  map_perf_test  [kernel.kallsyms]  [k] lookup_elem_raw
     8.84%  map_perf_test  [kernel.kallsyms]  [k] htab_map_lookup_elem
     5.93%  map_perf_test  [kernel.kallsyms]  [k] bpf_map_lookup_elem
     2.30%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     1.49%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

after:
    60.03%  map_perf_test  [kernel.kallsyms]  [k] __htab_map_lookup_elem
    18.07%  map_perf_test  [kernel.kallsyms]  [k] lookup_elem_raw
     2.91%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     1.94%  map_perf_test  [kernel.kallsyms]  [k] _einittext
     1.90%  map_perf_test  [kernel.kallsyms]  [k] __audit_syscall_exit
     1.72%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

so the cost of htab_map_lookup_elem() and bpf_map_lookup_elem()
is gone after inlining.

'per-cpu' and 'lru' map types can be optimized similarly in the future.

Note the sparse will complain that bpf is addictive ;)
kernel/bpf/hashtab.c:438:19: sparse: subtraction of functions? Share your drugs
kernel/bpf/verifier.c:3342:38: sparse: subtraction of functions? Share your drugs
it's not a new warning, just in new places.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4396e461 95ff141e
......@@ -35,6 +35,7 @@ struct bpf_map_ops {
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
};
struct bpf_map {
......
......@@ -66,7 +66,10 @@ struct bpf_verifier_state_list {
};
struct bpf_insn_aux_data {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
};
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
......
......@@ -693,6 +693,11 @@ static inline bool bpf_jit_is_ebpf(void)
# endif
}
static inline bool ebpf_jit_enabled(void)
{
return bpf_jit_enable && bpf_jit_is_ebpf();
}
static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
{
return fp->jited && bpf_jit_is_ebpf();
......@@ -753,6 +758,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp);
#else /* CONFIG_BPF_JIT */
static inline bool ebpf_jit_enabled(void)
{
return false;
}
static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
{
return false;
......
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016,2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
......@@ -113,6 +114,33 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + array->elem_size * index;
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = array->elem_size;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, array->map.max_entries,
elem_size == 1 ? 2 : 3);
if (elem_size == 1) {
/* nop */
} else if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
} else {
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
}
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*insn++ = BPF_MOV64_IMM(ret, 0);
return insn - insn_buf;
}
/* Called from eBPF program */
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
{
......@@ -267,6 +295,7 @@ static const struct bpf_map_ops array_ops = {
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
};
static struct bpf_map_type_list array_type __ro_after_init = {
......
......@@ -419,7 +419,11 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
return NULL;
}
/* Called from syscall or from eBPF program */
/* Called from syscall or from eBPF program directly, so
* arguments have to match bpf_map_lookup_elem() exactly.
* The return value is adjusted by BPF instructions
* in htab_map_gen_lookup().
*/
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
......@@ -451,6 +455,30 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
/* inline bpf_map_lookup_elem() call.
* Instead of:
* bpf_prog
* bpf_map_lookup_elem
* map->ops->map_lookup_elem
* htab_map_lookup_elem
* __htab_map_lookup_elem
* do:
* bpf_prog
* __htab_map_lookup_elem
*/
static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
round_up(map->key_size, 8));
return insn - insn_buf;
}
static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
{
struct htab_elem *l = __htab_map_lookup_elem(map, key);
......@@ -1062,6 +1090,7 @@ static const struct bpf_map_ops htab_ops = {
.map_lookup_elem = htab_map_lookup_elem,
.map_update_elem = htab_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup,
};
static struct bpf_map_type_list htab_type __ro_after_init = {
......
......@@ -586,59 +586,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl)
list_add(&tl->list_node, &bpf_prog_types);
}
/* fixup insn->imm field of bpf_call instructions:
* if (insn->imm == BPF_FUNC_map_lookup_elem)
* insn->imm = bpf_map_lookup_elem - __bpf_call_base;
* else if (insn->imm == BPF_FUNC_map_update_elem)
* insn->imm = bpf_map_update_elem - __bpf_call_base;
* else ...
*
* this function is called after eBPF program passed verification
*/
static void fixup_bpf_calls(struct bpf_prog *prog)
{
const struct bpf_func_proto *fn;
int i;
for (i = 0; i < prog->len; i++) {
struct bpf_insn *insn = &prog->insnsi[i];
if (insn->code == (BPF_JMP | BPF_CALL)) {
/* we reach here when program has bpf_call instructions
* and it passed bpf_check(), means that
* ops->get_func_proto must have been supplied, check it
*/
BUG_ON(!prog->aux->ops->get_func_proto);
if (insn->imm == BPF_FUNC_get_route_realm)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
* interpeter for every normal call
* and to prevent accidental JITing by
* JIT compiler that doesn't support
* bpf_tail_call yet
*/
insn->imm = 0;
insn->code |= BPF_X;
continue;
}
fn = prog->aux->ops->get_func_proto(insn->imm);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
BUG_ON(!fn->func);
insn->imm = fn->func - __bpf_call_base;
}
}
}
/* drop refcnt on maps used by eBPF program and free auxilary data */
static void free_used_maps(struct bpf_prog_aux *aux)
{
......@@ -892,9 +839,6 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
/* fixup BPF_CALL->imm field */
fixup_bpf_calls(prog);
/* eBPF program is ready to be JITed */
prog = bpf_prog_select_runtime(prog, &err);
if (err < 0)
......
......@@ -1273,7 +1273,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}
}
static int check_call(struct bpf_verifier_env *env, int func_id)
static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
......@@ -1369,6 +1369,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].id = ++env->id_gen;
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
} else {
verbose("unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
......@@ -2940,7 +2941,7 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
err = check_call(env, insn->imm);
err = check_call(env, insn->imm, insn_idx);
if (err)
return err;
......@@ -3162,6 +3163,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
insn->src_reg = 0;
}
/* single env->prog->insni[off] instruction was replaced with the range
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
u32 off, u32 cnt)
{
struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
if (cnt == 1)
return 0;
new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
if (!new_data)
return -ENOMEM;
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
env->insn_aux_data = new_data;
vfree(old_data);
return 0;
}
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
struct bpf_prog *new_prog;
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
if (!new_prog)
return NULL;
if (adjust_insn_aux_data(env, new_prog->len, off, len))
return NULL;
return new_prog;
}
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
......@@ -3181,10 +3217,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
new_prog = bpf_patch_insn_single(env->prog, 0,
insn_buf, cnt);
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
env->prog = new_prog;
delta += cnt - 1;
}
......@@ -3209,7 +3245,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
else
continue;
if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
......@@ -3218,8 +3254,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return -EINVAL;
}
new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
cnt);
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
......@@ -3233,6 +3268,83 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return 0;
}
/* fixup insn->imm field of bpf_call instructions
* and inline eligible helpers as explicit sequence of BPF instructions
*
* this function is called after eBPF program passed verification
*/
static int fixup_bpf_calls(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
int i, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
if (insn->imm == BPF_FUNC_get_route_realm)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
* call and to prevent accidental JITing by JIT compiler
* that doesn't support bpf_tail_call yet
*/
insn->imm = 0;
insn->code |= BPF_X;
continue;
}
if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) {
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (!map_ptr->ops->map_gen_lookup)
goto patch_call_imm;
cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
/* keep walking new program and skip insns we just inserted */
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}
patch_call_imm:
fn = prog->aux->ops->get_func_proto(insn->imm);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
if (!fn->func) {
verbose("kernel subsystem misconfigured func %s#%d\n",
func_id_name(insn->imm), insn->imm);
return -EFAULT;
}
insn->imm = fn->func - __bpf_call_base;
}
return 0;
}
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
......@@ -3328,6 +3440,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
if (ret == 0)
ret = fixup_bpf_calls(env);
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
......
......@@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
.map_flags = BPF_F_NO_PREALLOC,
};
struct bpf_map_def SEC("maps") array_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
};
SEC("kprobe/sys_getuid")
int stress_hmap(struct pt_regs *ctx)
{
......@@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
return 0;
}
SEC("kprobe/sys_getpgid")
int stress_hash_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
long *value;
#pragma clang loop unroll(full)
for (i = 0; i < 64; ++i)
value = bpf_map_lookup_elem(&hash_map, &key);
return 0;
}
SEC("kprobe/sys_getpgrp")
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
long *value;
#pragma clang loop unroll(full)
for (i = 0; i < 64; ++i)
value = bpf_map_lookup_elem(&array_map, &key);
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
......@@ -38,6 +38,8 @@ static __u64 time_get_ns(void)
#define LRU_HASH_PREALLOC (1 << 4)
#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
#define LPM_KMALLOC (1 << 6)
#define HASH_LOOKUP (1 << 7)
#define ARRAY_LOOKUP (1 << 8)
static int test_flags = ~0;
......@@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu)
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
}
static void test_hash_lookup(int cpu)
{
__u64 start_time;
int i;
start_time = time_get_ns();
for (i = 0; i < MAX_CNT; i++)
syscall(__NR_getpgid, 0);
printf("%d:hash_lookup %lld lookups per sec\n",
cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
}
static void test_array_lookup(int cpu)
{
__u64 start_time;
int i;
start_time = time_get_ns();
for (i = 0; i < MAX_CNT; i++)
syscall(__NR_getpgrp, 0);
printf("%d:array_lookup %lld lookups per sec\n",
cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
}
static void loop(int cpu)
{
cpu_set_t cpuset;
......@@ -153,6 +179,12 @@ static void loop(int cpu)
if (test_flags & LPM_KMALLOC)
test_lpm_kmalloc(cpu);
if (test_flags & HASH_LOOKUP)
test_hash_lookup(cpu);
if (test_flags & ARRAY_LOOKUP)
test_array_lookup(cpu);
}
static void run_perf_test(int tasks)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment