Commit cc441a69 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-verifier-scalability'

Alexei Starovoitov says:

====================
v1->v2:
- fixed typo in patch 1
- added a patch to convert kcalloc to kvcalloc
- added a patch to verbose 16-bit jump offset check
- added a test with 1m insns

This patch set is the first step to be able to accept large programs.
The verifier still suffers from its brute force algorithm and
large programs can easily hit 1M insn_processed limit.
A lot more work is necessary to be able to verify large programs.

v1:
Realize two key ideas to speed up verification speed by ~20 times
1. every 'branching' instructions records all verifier states.
   not all of them are useful for search pruning.
   add a simple heuristic to keep states that were successful in search pruning
   and remove those that were not
2. mark_reg_read walks parentage chain of registers to mark parents as LIVE_READ.
   Once the register is marked there is no need to remark it again in the future.
   Hence stop walking the chain once first LIVE_READ is seen.

1st optimization gives 10x speed up on large programs
and 2nd optimization reduces the cost of mark_reg_read from ~40% of cpu to <1%.
Combined the deliver ~20x speedup on large programs.

Faster and bounded verification time allows to increase insn_processed
limit to 1 million from 130k.
Worst case it takes 1/10 of a second to process that many instructions
and peak memory consumption is peak_states * sizeof(struct bpf_verifier_state)
which is around ~5Mbyte.

Increase insn_per_program limit for root to insn_processed limit.

Add verification stats and stress tests for verifier scalability.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e83b9f55 8aa2d4b4
......@@ -421,6 +421,7 @@ struct bpf_array {
};
};
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32
struct bpf_event_entry {
......
......@@ -207,6 +207,7 @@ struct bpf_verifier_state {
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
int miss_cnt, hit_cnt;
};
/* Possible states for alu_state member. */
......@@ -248,6 +249,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
return log->len_used >= log->len_total - 1;
}
#define BPF_LOG_LEVEL1 1
#define BPF_LOG_LEVEL2 2
#define BPF_LOG_STATS 4
#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return log->level && log->ubuf && !bpf_verifier_log_full(log);
......@@ -274,6 +281,7 @@ struct bpf_verifier_env {
bool strict_alignment; /* perform strict pointer alignment checks */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_verifier_state_list *free_list;
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
......@@ -284,6 +292,21 @@ struct bpf_verifier_env {
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
u32 insn_processed;
/* total verification time */
u64 verification_time;
/* maximum number of verifier states kept in 'branching' instructions */
u32 max_states_per_insn;
/* total number of allocated verifier states */
u32 total_states;
/* some states are freed during program analysis.
* this is peak number of states. this number dominates kernel
* memory consumption during verification
*/
u32 peak_states;
/* longest register parentage chain walked for liveness marking */
u32 longest_mark_read_walk;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
......
......@@ -438,6 +438,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
const u32 cnt_max = S16_MAX;
struct bpf_prog *prog_adj;
int err;
/* Since our patchlet doesn't expand the image, we're done. */
if (insn_delta == 0) {
......@@ -453,8 +454,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* we afterwards may not fail anymore.
*/
if (insn_adj_cnt > cnt_max &&
bpf_adj_branches(prog, off, off + 1, off + len, true))
return NULL;
(err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
return ERR_PTR(err);
/* Several new instructions need to be inserted. Make room
* for them. Likely, there's no need for a new allocation as
......@@ -463,7 +464,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
GFP_USER);
if (!prog_adj)
return NULL;
return ERR_PTR(-ENOMEM);
prog_adj->len = insn_adj_cnt;
......@@ -1096,13 +1097,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
continue;
tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
if (!tmp) {
if (IS_ERR(tmp)) {
/* Patching may have repointed aux->prog during
* realloc from the original one, so we need to
* fix it up here on error.
*/
bpf_jit_prog_release_other(prog, clone);
return ERR_PTR(-ENOMEM);
return tmp;
}
clone = tmp;
......
......@@ -1557,7 +1557,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);
if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
if (attr->insn_cnt == 0 ||
attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
......
This diff is collapsed.
......@@ -223,7 +223,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
return -EINVAL;
log_level = load_attr->log_level;
if (log_level > 2 || (log_level && !log_buf))
if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
return -EINVAL;
name_len = load_attr->name ? strlen(load_attr->name) : 0;
......
......@@ -92,7 +92,7 @@ struct bpf_load_program_attr {
#define MAPS_RELAX_COMPAT 0x01
/* Recommend log buffer size */
#define BPF_LOG_BUF_SIZE (256 * 1024)
#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */
LIBBPF_API int
bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz);
......
......@@ -152,6 +152,7 @@ struct bpf_program {
};
} *reloc_desc;
int nr_reloc;
int log_level;
struct {
int nr;
......@@ -1494,6 +1495,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
int log_buf_size = BPF_LOG_BUF_SIZE;
char *log_buf;
int ret;
......@@ -1514,21 +1516,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info = prog->line_info;
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
load_attr.log_level = prog->log_level;
if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
log_buf = malloc(BPF_LOG_BUF_SIZE);
retry_load:
log_buf = malloc(log_buf_size);
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
if (ret >= 0) {
if (load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
*pfd = ret;
ret = 0;
goto out;
}
if (errno == ENOSPC) {
log_buf_size <<= 1;
free(log_buf);
goto retry_load;
}
ret = -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
......@@ -2938,6 +2949,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
bpf_program__set_expected_attach_type(prog,
expected_attach_type);
prog->log_level = attr->log_level;
if (!first_prog)
first_prog = prog;
}
......
......@@ -314,6 +314,7 @@ struct bpf_prog_load_attr {
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
int ifindex;
int log_level;
};
LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <test_progs.h>
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
if (level != LIBBPF_DEBUG)
return 0;
if (!strstr(format, "verifier log"))
return 0;
return vfprintf(stderr, "%s", args);
}
static int check_load(const char *file)
{
struct bpf_prog_load_attr attr;
struct bpf_object *obj;
int err, prog_fd;
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = file;
attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
attr.log_level = 4;
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
bpf_object__close(obj);
if (err)
error_cnt++;
return err;
}
void test_bpf_verif_scale(void)
{
const char *file1 = "./test_verif_scale1.o";
const char *file2 = "./test_verif_scale2.o";
const char *file3 = "./test_verif_scale3.o";
int err;
if (verifier_stats)
libbpf_set_print(libbpf_debug_print);
err = check_load(file1);
err |= check_load(file2);
err |= check_load(file3);
if (!err)
printf("test_verif_scale:OK\n");
else
printf("test_verif_scale:FAIL\n");
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
typedef unsigned int u32;
static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
#define __jhash_mix(a, b, c) \
{ \
a -= c; a ^= rol32(c, 4); c += b; \
b -= a; b ^= rol32(a, 6); a += c; \
c -= b; c ^= rol32(b, 8); b += a; \
a -= c; a ^= rol32(c, 16); c += b; \
b -= a; b ^= rol32(a, 19); a += c; \
c -= b; c ^= rol32(b, 4); b += a; \
}
#define __jhash_final(a, b, c) \
{ \
c ^= b; c -= rol32(b, 14); \
a ^= c; a -= rol32(c, 11); \
b ^= a; b -= rol32(a, 25); \
c ^= b; c -= rol32(b, 16); \
a ^= c; a -= rol32(c, 4); \
b ^= a; b -= rol32(a, 14); \
c ^= b; c -= rol32(b, 24); \
}
#define JHASH_INITVAL 0xdeadbeef
static ATTR
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
a = b = c = JHASH_INITVAL + length + initval;
while (length > 12) {
a += *(volatile u32 *)(k);
b += *(volatile u32 *)(k + 4);
c += *(volatile u32 *)(k + 8);
__jhash_mix(a, b, c);
length -= 12;
k += 12;
}
switch (length) {
case 12: c += (u32)k[11]<<24;
case 11: c += (u32)k[10]<<16;
case 10: c += (u32)k[9]<<8;
case 9: c += k[8];
case 8: b += (u32)k[7]<<24;
case 7: b += (u32)k[6]<<16;
case 6: b += (u32)k[5]<<8;
case 5: b += k[4];
case 4: a += (u32)k[3]<<24;
case 3: a += (u32)k[2]<<16;
case 2: a += (u32)k[1]<<8;
case 1: a += k[0];
c ^= a;
__jhash_final(a, b, c);
case 0: /* Nothing left to add */
break;
}
return c;
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
SEC("scale90_noinline")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
int ret = 0, nh_off, i = 0;
nh_off = 14;
/* pragma unroll doesn't work on large loops */
#define C do { \
ptr = data + i; \
if (ptr + nh_off > data_end) \
break; \
ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
} while (0);
#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
C30;C30;C30; /* 90 calls */
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define ATTR __attribute__((always_inline))
#include "test_jhash.h"
SEC("scale90_inline")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
int ret = 0, nh_off, i = 0;
nh_off = 14;
/* pragma unroll doesn't work on large loops */
#define C do { \
ptr = data + i; \
if (ptr + nh_off > data_end) \
break; \
ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
} while (0);
#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
C30;C30;C30; /* 90 calls */
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
SEC("scale90_noinline32")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
int ret = 0, nh_off, i = 0;
nh_off = 32;
/* pragma unroll doesn't work on large loops */
#define C do { \
ptr = data + i; \
if (ptr + nh_off > data_end) \
break; \
ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
} while (0);
#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
C30;C30;C30; /* 90 calls */
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -9,6 +9,7 @@
int error_cnt, pass_cnt;
bool jit_enabled;
bool verifier_stats = false;
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
......@@ -162,12 +163,15 @@ void *spin_lock_thread(void *arg)
#include <prog_tests/tests.h>
#undef DECLARE
int main(void)
int main(int ac, char **av)
{
srand(time(NULL));
jit_enabled = is_jit_enabled();
if (ac == 2 && strcmp(av[1], "-s") == 0)
verifier_stats = true;
#define CALL
#include <prog_tests/tests.h>
#undef CALL
......
......@@ -40,6 +40,7 @@ typedef __u16 __sum16;
extern int error_cnt, pass_cnt;
extern bool jit_enabled;
extern bool verifier_stats;
#define MAGIC_BYTES 123
......
......@@ -50,6 +50,7 @@
#include "../../../include/linux/filter.h"
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
#define MAX_NR_MAPS 14
#define MAX_TEST_RUNS 8
......@@ -66,6 +67,7 @@ static int skips;
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
struct bpf_insn *fill_insns;
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
......@@ -83,6 +85,7 @@ struct bpf_test {
const char *errstr;
const char *errstr_unpriv;
uint32_t retval, retval_unpriv, insn_processed;
int prog_len;
enum {
UNDEF,
ACCEPT,
......@@ -119,10 +122,11 @@ struct other_val {
static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
{
/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
/* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */
#define PUSH_CNT 51
unsigned int len = BPF_MAXINSNS;
struct bpf_insn *insn = self->insns;
/* jump range is limited to 16 bit. PUSH_CNT of ld_abs needs room */
unsigned int len = (1 << 15) - PUSH_CNT * 2 * 5 * 6;
struct bpf_insn *insn = self->fill_insns;
int i = 0, j, k = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
......@@ -156,12 +160,14 @@ static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
for (; i < len - 1; i++)
insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
insn[len - 1] = BPF_EXIT_INSN();
self->prog_len = len;
}
static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
{
struct bpf_insn *insn = self->insns;
unsigned int len = BPF_MAXINSNS;
struct bpf_insn *insn = self->fill_insns;
/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
unsigned int len = (1 << 15) / 6;
int i = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
......@@ -171,11 +177,12 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
while (i < len - 1)
insn[i++] = BPF_LD_ABS(BPF_B, 1);
insn[i] = BPF_EXIT_INSN();
self->prog_len = i + 1;
}
static void bpf_fill_rand_ld_dw(struct bpf_test *self)
{
struct bpf_insn *insn = self->insns;
struct bpf_insn *insn = self->fill_insns;
uint64_t res = 0;
int i = 0;
......@@ -193,6 +200,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
insn[i] = BPF_EXIT_INSN();
self->prog_len = i + 1;
res ^= (res >> 32);
self->retval = (uint32_t)res;
}
......@@ -520,8 +528,10 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
int *fixup_map_spin_lock = test->fixup_map_spin_lock;
if (test->fill_helper)
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
test->fill_helper(test);
}
/* Allocating HTs with 1 elem is fine here, since we only test
* for verifier and not do a runtime lookup, so the only thing
......@@ -718,12 +728,17 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
do_test_fixup(test, prog_type, prog, map_fds);
if (test->fill_insns) {
prog = test->fill_insns;
prog_len = test->prog_len;
} else {
prog_len = probe_filter_length(prog);
}
/* If there were some map skips during fixup due to missing bpf
* features, skip this test.
*/
if (fixup_skips != skips)
return;
prog_len = probe_filter_length(prog);
pflags = 0;
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
......@@ -731,7 +746,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
pflags |= BPF_F_ANY_ALIGNMENT;
fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
"GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
"GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
......@@ -830,6 +845,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
goto fail_log;
}
close_fds:
if (test->fill_insns)
free(test->fill_insns);
close(fd_prog);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
......
......@@ -34,3 +34,12 @@
.result = ACCEPT,
.retval = 5,
},
{
"ld_dw: xor semi-random 64 bit imms, test 5",
.insns = { },
.data = { },
.fill_helper = bpf_fill_rand_ld_dw,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 1000000 - 6,
},
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment