Commit 30cfe3b4 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-multi-prog-improvements'

Sandipan Das says:

====================
[1] Support for bpf-to-bpf function calls in the powerpc64 JIT compiler.

[2] Provide a way for resolving function calls because of the way JITed
    images are allocated in powerpc64.

[3] Fix to get JITed instruction dumps for multi-function programs from
    the bpf system call.

[4] Fix for bpftool to show delimited multi-function JITed image dumps.

v4:
 - Incorporate review comments from Jakub.
 - Fix JSON output for bpftool.

v3:
 - Change base tree tag to bpf-next.
 - Incorporate review comments from Alexei, Daniel and Jakub.
 - Make sure that the JITed image does not grow or shrink after
   the last pass due to the way the instruction sequence used
   to load a callee's address maybe optimized.
 - Make additional changes to the bpf system call and bpftool to
   make multi-function JITed dumps easier to correlate.

v2:
 - Incorporate review comments from Jakub.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents a2889a4c f7f62c71
......@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{
unsigned int i, ctx_idx = ctx->idx;
/* Load function address into r12 */
PPC_LI64(12, func);
/* For bpf-to-bpf function calls, the callee's address is unknown
* until the last extra pass. As seen above, we use PPC_LI64() to
* load the callee's address, but this may optimize the number of
* instructions required based on the nature of the address.
*
* Since we don't want the number of instructions emitted to change,
* we pad the optimized PPC_LI64() call with NOPs to guarantee that
* we always have a five-instruction sequence, which is the maximum
* that PPC_LI64() can emit.
*/
for (i = ctx->idx - ctx_idx; i < 5; i++)
PPC_NOP();
#ifdef PPC64_ELF_ABI_v1
/* func points to the function descriptor */
PPC_LI64(b2p[TMP_REG_2], func);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
/* ... and move it to LR */
PPC_MTLR(b2p[TMP_REG_1]);
/*
* Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a
* function pointer (so caller will save/restore r2)
* and since we don't use a TOC ourself.
*/
PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
#else
/* We can clobber r12 */
PPC_FUNC_ADDR(12, func);
PPC_MTLR(12);
PPC_BPF_LL(2, 12, 8);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(12, 12, 0);
#endif
PPC_MTLR(12);
PPC_BLRL();
}
......@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
u32 *addrs)
u32 *addrs, bool extra_pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
......@@ -712,11 +724,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
/*
* Call kernel helper
* Call kernel helper or bpf function
*/
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
func = (u8 *) __bpf_call_base + imm;
/* bpf function call */
if (insn[i].src_reg == BPF_PSEUDO_CALL)
if (!extra_pass)
func = NULL;
else if (fp->aux->func && off < fp->aux->func_cnt)
/* use the subprog id from the off
* field to lookup the callee address
*/
func = (u8 *) fp->aux->func[off]->bpf_func;
else
return -EINVAL;
/* kernel helper call */
else
func = (u8 *) __bpf_call_base + imm;
bpf_jit_emit_func_call(image, ctx, (u64)func);
......@@ -864,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
return 0;
}
struct powerpc64_jit_data {
struct bpf_binary_header *header;
u32 *addrs;
u8 *image;
u32 proglen;
struct codegen_context ctx;
};
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
......@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
u8 *image = NULL;
u32 *code_base;
u32 *addrs;
struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx;
int pass;
int flen;
......@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *org_fp = fp;
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
bool extra_pass = false;
if (!fp->jit_requested)
return org_fp;
......@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp_fp;
}
jit_data = fp->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
fp = org_fp;
goto out;
}
fp->aux->jit_data = jit_data;
}
flen = fp->len;
addrs = jit_data->addrs;
if (addrs) {
cgctx = jit_data->ctx;
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL) {
fp = org_fp;
goto out;
goto out_addrs;
}
memset(&cgctx, 0, sizeof(struct codegen_context));
......@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out;
goto out_addrs;
}
/*
......@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
fp = org_fp;
goto out;
goto out_addrs;
}
skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */
......@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
bpf_jit_build_body(fp, code_base, &cgctx, addrs);
bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
......@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
if (!fp->is_func || extra_pass) {
out_addrs:
kfree(addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
} else {
jit_data->addrs = addrs;
jit_data->ctx = cgctx;
jit_data->proglen = proglen;
jit_data->image = image;
jit_data->header = bpf_hdr;
}
out:
kfree(addrs);
if (bpf_blinded)
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
......
......@@ -2205,6 +2205,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
__u32 nr_jited_ksyms;
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
......
......@@ -1933,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
goto done;
}
......@@ -1969,18 +1970,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
* for offload.
*/
ulen = info.jited_prog_len;
info.jited_prog_len = prog->jited_len;
if (prog->aux->func_cnt) {
u32 i;
info.jited_prog_len = 0;
for (i = 0; i < prog->aux->func_cnt; i++)
info.jited_prog_len += prog->aux->func[i]->jited_len;
} else {
info.jited_prog_len = prog->jited_len;
}
if (info.jited_prog_len && ulen) {
if (bpf_dump_raw_ok()) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
/* for multi-function programs, copy the JITed
* instructions for all the functions
*/
if (prog->aux->func_cnt) {
u32 len, free, i;
u8 *img;
free = ulen;
for (i = 0; i < prog->aux->func_cnt; i++) {
len = prog->aux->func[i]->jited_len;
len = min_t(u32, len, free);
img = (u8 *) prog->aux->func[i]->bpf_func;
if (copy_to_user(uinsns, img, len))
return -EFAULT;
uinsns += len;
free -= len;
if (!free)
break;
}
} else {
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
}
} else {
info.jited_prog_insns = 0;
}
}
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt;
if (info.nr_jited_ksyms && ulen) {
if (bpf_dump_raw_ok()) {
u64 __user *user_ksyms;
ulong ksym_addr;
u32 i;
/* copy the address of the kernel symbol
* corresponding to each function
*/
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
for (i = 0; i < ulen; i++) {
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
ksym_addr &= PAGE_MASK;
if (put_user((u64) ksym_addr, &user_ksyms[i]))
return -EFAULT;
}
} else {
info.jited_ksyms = 0;
}
}
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt;
if (info.nr_jited_func_lens && ulen) {
if (bpf_dump_raw_ok()) {
u32 __user *user_lens;
u32 func_len, i;
/* copy the JITed image lengths for each function */
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
user_lens = u64_to_user_ptr(info.jited_func_lens);
for (i = 0; i < ulen; i++) {
func_len = prog->aux->func[i]->jited_len;
if (put_user(func_len, &user_lens[i]))
return -EFAULT;
}
} else {
info.jited_func_lens = 0;
}
}
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
......
......@@ -5383,11 +5383,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->src_reg != BPF_PSEUDO_CALL)
continue;
subprog = insn->off;
insn->off = 0;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
func[subprog]->bpf_func -
__bpf_call_base;
}
/* we use the aux data to keep a list of the start addresses
* of the JITed images for each function in the program
*
* for some architectures, such as powerpc64, the imm field
* might not be large enough to hold the offset of the start
* address of the callee's JITed image from __bpf_call_base
*
* in such cases, we can lookup the start address of a callee
* by using its subprog id, available from the off field of
* the call instruction, as an index for this list
*/
func[i]->aux->func = func;
func[i]->aux->func_cnt = env->subprog_cnt;
}
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
......@@ -5413,17 +5426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
unsigned long addr;
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
addr = (unsigned long)func[subprog]->bpf_func;
addr &= PAGE_MASK;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
addr - __bpf_call_base;
insn->imm = subprog;
}
prog->jited = 1;
......
......@@ -420,7 +420,11 @@ static int do_show(int argc, char **argv)
static int do_dump(int argc, char **argv)
{
unsigned long *func_ksyms = NULL;
struct bpf_prog_info info = {};
unsigned int *func_lens = NULL;
unsigned int nr_func_ksyms;
unsigned int nr_func_lens;
struct dump_data dd = {};
__u32 len = sizeof(info);
unsigned int buf_size;
......@@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv)
return -1;
}
nr_func_ksyms = info.nr_jited_ksyms;
if (nr_func_ksyms) {
func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
if (!func_ksyms) {
p_err("mem alloc failed");
close(fd);
goto err_free;
}
}
nr_func_lens = info.nr_jited_func_lens;
if (nr_func_lens) {
func_lens = malloc(nr_func_lens * sizeof(__u32));
if (!func_lens) {
p_err("mem alloc failed");
close(fd);
goto err_free;
}
}
memset(&info, 0, sizeof(info));
*member_ptr = ptr_to_u64(buf);
*member_len = buf_size;
info.jited_ksyms = ptr_to_u64(func_ksyms);
info.nr_jited_ksyms = nr_func_ksyms;
info.jited_func_lens = ptr_to_u64(func_lens);
info.nr_jited_func_lens = nr_func_lens;
err = bpf_obj_get_info_by_fd(fd, &info, &len);
close(fd);
......@@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
if (info.nr_jited_ksyms > nr_func_ksyms) {
p_err("too many addresses returned");
goto err_free;
}
if (info.nr_jited_func_lens > nr_func_lens) {
p_err("too many values returned");
goto err_free;
}
if ((member_len == &info.jited_prog_len &&
info.jited_prog_insns == 0) ||
(member_len == &info.xlated_prog_len &&
......@@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
disasm_print_insn(buf, *member_len, opcodes, name);
if (info.nr_jited_func_lens && info.jited_func_lens) {
struct kernel_sym *sym = NULL;
char sym_name[SYM_MAX_NAME];
unsigned char *img = buf;
__u64 *ksyms = NULL;
__u32 *lens;
__u32 i;
if (info.nr_jited_ksyms) {
kernel_syms_load(&dd);
ksyms = (__u64 *) info.jited_ksyms;
}
if (json_output)
jsonw_start_array(json_wtr);
lens = (__u32 *) info.jited_func_lens;
for (i = 0; i < info.nr_jited_func_lens; i++) {
if (ksyms) {
sym = kernel_syms_search(&dd, ksyms[i]);
if (sym)
sprintf(sym_name, "%s", sym->name);
else
sprintf(sym_name, "0x%016llx", ksyms[i]);
} else {
strcpy(sym_name, "unknown");
}
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "name");
jsonw_string(json_wtr, sym_name);
jsonw_name(json_wtr, "insns");
} else {
printf("%s:\n", sym_name);
}
disasm_print_insn(img, lens[i], opcodes, name);
img += lens[i];
if (json_output)
jsonw_end_object(json_wtr);
else
printf("\n");
}
if (json_output)
jsonw_end_array(json_wtr);
} else {
disasm_print_insn(buf, *member_len, opcodes, name);
}
} else if (visual) {
if (json_output)
jsonw_null(json_wtr);
......@@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv)
dump_xlated_cfg(buf, *member_len);
} else {
kernel_syms_load(&dd);
dd.nr_jited_ksyms = info.nr_jited_ksyms;
dd.jited_ksyms = (__u64 *) info.jited_ksyms;
if (json_output)
dump_xlated_json(&dd, buf, *member_len, opcodes);
else
......@@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv)
}
free(buf);
free(func_ksyms);
free(func_lens);
return 0;
err_free:
free(buf);
free(func_ksyms);
free(func_lens);
return -1;
}
......
......@@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
free(dd->sym_mapping);
}
static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
unsigned long key)
struct kernel_sym *kernel_syms_search(struct dump_data *dd,
unsigned long key)
{
struct kernel_sym sym = {
.address = key,
......@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
unsigned long address,
const struct bpf_insn *insn)
{
if (sym)
if (!dd->nr_jited_ksyms)
/* Do not show address for interpreted programs */
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"%+d", insn->off);
else if (sym)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"%+d#%s", insn->off, sym->name);
else
......@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
unsigned long address = dd->address_call_base + insn->imm;
struct kernel_sym *sym;
if (insn->src_reg == BPF_PSEUDO_CALL &&
(__u32) insn->imm < dd->nr_jited_ksyms)
address = dd->jited_ksyms[insn->imm];
sym = kernel_syms_search(dd, address);
if (insn->src_reg == BPF_PSEUDO_CALL)
return print_call_pcrel(dd, sym, address, insn);
......
......@@ -49,11 +49,14 @@ struct dump_data {
unsigned long address_call_base;
struct kernel_sym *sym_mapping;
__u32 sym_count;
__u64 *jited_ksyms;
__u32 nr_jited_ksyms;
char scratch_buff[SYM_MAX_NAME + 8];
};
void kernel_syms_load(struct dump_data *dd);
void kernel_syms_destroy(struct dump_data *dd);
struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
bool opcodes);
void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
......
......@@ -2205,6 +2205,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
__u32 nr_jited_ksyms;
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment