Commit 5d52c906 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2021-07-09

The following pull-request contains BPF updates for your *net* tree.

We've added 9 non-merge commits during the last 9 day(s) which contain
a total of 13 files changed, 118 insertions(+), 62 deletions(-).

The main changes are:

1) Fix runqslower task->state access from BPF, from SanjayKumar Jeyakumar.

2) Fix subprog poke descriptor tracking use-after-free, from John Fastabend.

3) Fix sparse complaint from prior devmap RCU conversion, from Toke Høiland-Jørgensen.

4) Fix missing va_end in bpftool JIT json dump's error path, from Gu Shengxian.

5) Fix tools/bpf install target from missing runqslower install, from Wei Li.

6) Fix xdpsock BPF sample to unload program on shared umem option, from Wang Hai.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 67a9c943 1fb5ba29
...@@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) ...@@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
for (i = 0; i < prog->aux->size_poke_tab; i++) { for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i]; poke = &prog->aux->poke_tab[i];
if (poke->aux && poke->aux != prog->aux)
continue;
WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
if (poke->reason != BPF_POKE_REASON_TAIL_CALL) if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
......
...@@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor { ...@@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor {
void *tailcall_target; void *tailcall_target;
void *tailcall_bypass; void *tailcall_bypass;
void *bypass_addr; void *bypass_addr;
void *aux;
union { union {
struct { struct {
struct bpf_map *map; struct bpf_map *map;
......
...@@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work) ...@@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif #endif
if (aux->dst_trampoline) if (aux->dst_trampoline)
bpf_trampoline_put(aux->dst_trampoline); bpf_trampoline_put(aux->dst_trampoline);
for (i = 0; i < aux->func_cnt; i++) for (i = 0; i < aux->func_cnt; i++) {
/* We can just unlink the subprog poke descriptor table as
* it was originally linked to the main program and is also
* released along with it.
*/
aux->func[i]->aux->poke_tab = NULL;
bpf_jit_free(aux->func[i]); bpf_jit_free(aux->func[i]);
}
if (aux->func_cnt) { if (aux->func_cnt) {
kfree(aux->func); kfree(aux->func);
bpf_prog_unlock_free(aux->prog); bpf_prog_unlock_free(aux->prog);
......
...@@ -558,7 +558,8 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, ...@@ -558,7 +558,8 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
if (map->map_type == BPF_MAP_TYPE_DEVMAP) { if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) { for (i = 0; i < map->max_entries; i++) {
dst = READ_ONCE(dtab->netdev_map[i]); dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!is_valid_dst(dst, xdp, exclude_ifindex)) if (!is_valid_dst(dst, xdp, exclude_ifindex))
continue; continue;
...@@ -654,7 +655,8 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, ...@@ -654,7 +655,8 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
if (map->map_type == BPF_MAP_TYPE_DEVMAP) { if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) { for (i = 0; i < map->max_entries; i++) {
dst = READ_ONCE(dtab->netdev_map[i]); dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!dst || dst->dev->ifindex == exclude_ifindex) if (!dst || dst->dev->ifindex == exclude_ifindex)
continue; continue;
......
...@@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
goto out_free; goto out_free;
func[i]->is_func = 1; func[i]->is_func = 1;
func[i]->aux->func_idx = i; func[i]->aux->func_idx = i;
/* the btf and func_info will be freed only at prog->aux */ /* Below members will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf; func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info; func[i]->aux->func_info = prog->aux->func_info;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
for (j = 0; j < prog->aux->size_poke_tab; j++) { for (j = 0; j < prog->aux->size_poke_tab; j++) {
u32 insn_idx = prog->aux->poke_tab[j].insn_idx; struct bpf_jit_poke_descriptor *poke;
int ret;
if (!(insn_idx >= subprog_start &&
insn_idx <= subprog_end))
continue;
ret = bpf_jit_add_poke_descriptor(func[i],
&prog->aux->poke_tab[j]);
if (ret < 0) {
verbose(env, "adding tail call poke descriptor failed\n");
goto out_free;
}
func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; poke = &prog->aux->poke_tab[j];
ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); if (poke->insn_idx < subprog_end &&
if (ret < 0) { poke->insn_idx >= subprog_start)
verbose(env, "tracking tail call prog failed\n"); poke->aux = func[i]->aux;
goto out_free;
}
} }
/* Use bpf_prog_F_tag to indicate functions in stack traces. /* Use bpf_prog_F_tag to indicate functions in stack traces.
...@@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
cond_resched(); cond_resched();
} }
/* Untrack main program's aux structs so that during map_poke_run()
* we will not stumble upon the unfilled poke descriptors; each
* of the main program's poke descs got distributed across subprogs
* and got tracked onto map, so we are sure that none of them will
* be missed after the operation below
*/
for (i = 0; i < prog->aux->size_poke_tab; i++) {
map_ptr = prog->aux->poke_tab[i].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
}
/* at this point all bpf functions were successfully JITed /* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and * now populate all bpf_calls with correct addresses and
* run last pass of JIT * run last pass of JIT
...@@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_jit_attempt_done(prog); bpf_prog_jit_attempt_done(prog);
return 0; return 0;
out_free: out_free:
/* We failed JIT'ing, so at this point we need to unregister poke
* descriptors from subprogs, so that kernel is not attempting to
* patch it anymore as we're freeing the subprog JIT memory.
*/
for (i = 0; i < prog->aux->size_poke_tab; i++) {
map_ptr = prog->aux->poke_tab[i].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
}
/* At this point we're guaranteed that poke descriptors are not
* live anymore. We can just unlink its descriptor table as it's
* released with the main prog.
*/
for (i = 0; i < env->subprog_cnt; i++) { for (i = 0; i < env->subprog_cnt; i++) {
if (!func[i]) if (!func[i])
continue; continue;
func[i]->aux->poke_tab = NULL;
for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
}
bpf_jit_free(func[i]); bpf_jit_free(func[i]);
} }
kfree(func); kfree(func);
......
...@@ -331,6 +331,7 @@ $(obj)/%.o: $(src)/%.c ...@@ -331,6 +331,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \ -Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-fno-asynchronous-unwind-tables \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
......
...@@ -96,6 +96,7 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; ...@@ -96,6 +96,7 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000; static int opt_timeout = 1000;
static bool opt_need_wakeup = true; static bool opt_need_wakeup = true;
static u32 opt_num_xsks = 1; static u32 opt_num_xsks = 1;
static u32 prog_id;
static bool opt_busy_poll; static bool opt_busy_poll;
static bool opt_reduced_cap; static bool opt_reduced_cap;
...@@ -461,6 +462,23 @@ static void *poller(void *arg) ...@@ -461,6 +462,23 @@ static void *poller(void *arg)
return NULL; return NULL;
} }
static void remove_xdp_program(void)
{
u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAILURE);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
printf("program on interface changed, not removing\n");
}
static void int_exit(int sig) static void int_exit(int sig)
{ {
benchmark_done = true; benchmark_done = true;
...@@ -471,6 +489,9 @@ static void __exit_with_error(int error, const char *file, const char *func, ...@@ -471,6 +489,9 @@ static void __exit_with_error(int error, const char *file, const char *func,
{ {
fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
line, error, strerror(error)); line, error, strerror(error));
if (opt_num_xsks > 1)
remove_xdp_program();
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
...@@ -490,6 +511,9 @@ static void xdpsock_cleanup(void) ...@@ -490,6 +511,9 @@ static void xdpsock_cleanup(void)
if (write(sock, &cmd, sizeof(int)) < 0) if (write(sock, &cmd, sizeof(int)) < 0)
exit_with_error(errno); exit_with_error(errno);
} }
if (opt_num_xsks > 1)
remove_xdp_program();
} }
static void swap_mac_addresses(void *data) static void swap_mac_addresses(void *data)
...@@ -857,6 +881,10 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, ...@@ -857,6 +881,10 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
if (ret) if (ret)
exit_with_error(-ret); exit_with_error(-ret);
ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
if (ret)
exit_with_error(-ret);
xsk->app_stats.rx_empty_polls = 0; xsk->app_stats.rx_empty_polls = 0;
xsk->app_stats.fill_fail_polls = 0; xsk->app_stats.fill_fail_polls = 0;
xsk->app_stats.copy_tx_sendtos = 0; xsk->app_stats.copy_tx_sendtos = 0;
......
...@@ -97,7 +97,7 @@ clean: bpftool_clean runqslower_clean resolve_btfids_clean ...@@ -97,7 +97,7 @@ clean: bpftool_clean runqslower_clean resolve_btfids_clean
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
$(Q)$(RM) -r -- $(OUTPUT)feature $(Q)$(RM) -r -- $(OUTPUT)feature
install: $(PROGS) bpftool_install runqslower_install install: $(PROGS) bpftool_install
$(call QUIET_INSTALL, bpf_jit_disasm) $(call QUIET_INSTALL, bpf_jit_disasm)
$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
$(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
...@@ -118,9 +118,6 @@ bpftool_clean: ...@@ -118,9 +118,6 @@ bpftool_clean:
runqslower: runqslower:
$(call descend,runqslower) $(call descend,runqslower)
runqslower_install:
$(call descend,runqslower,install)
runqslower_clean: runqslower_clean:
$(call descend,runqslower,clean) $(call descend,runqslower,clean)
...@@ -131,5 +128,5 @@ resolve_btfids_clean: ...@@ -131,5 +128,5 @@ resolve_btfids_clean:
$(call descend,resolve_btfids,clean) $(call descend,resolve_btfids,clean)
.PHONY: all install clean bpftool bpftool_install bpftool_clean \ .PHONY: all install clean bpftool bpftool_install bpftool_clean \
runqslower runqslower_install runqslower_clean \ runqslower runqslower_clean \
resolve_btfids resolve_btfids_clean resolve_btfids resolve_btfids_clean
...@@ -43,11 +43,13 @@ static int fprintf_json(void *out, const char *fmt, ...) ...@@ -43,11 +43,13 @@ static int fprintf_json(void *out, const char *fmt, ...)
{ {
va_list ap; va_list ap;
char *s; char *s;
int err;
va_start(ap, fmt); va_start(ap, fmt);
if (vasprintf(&s, fmt, ap) < 0) err = vasprintf(&s, fmt, ap);
return -1;
va_end(ap); va_end(ap);
if (err < 0)
return -1;
if (!oper_count) { if (!oper_count) {
int i; int i;
......
...@@ -74,7 +74,7 @@ int handle__sched_switch(u64 *ctx) ...@@ -74,7 +74,7 @@ int handle__sched_switch(u64 *ctx)
u32 pid; u32 pid;
/* ivcsw: treat like an enqueue event and store timestamp */ /* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING) if (prev->__state == TASK_RUNNING)
trace_enqueue(prev); trace_enqueue(prev);
pid = next->pid; pid = next->pid;
......
...@@ -10136,7 +10136,7 @@ int bpf_link__unpin(struct bpf_link *link) ...@@ -10136,7 +10136,7 @@ int bpf_link__unpin(struct bpf_link *link)
err = unlink(link->pin_path); err = unlink(link->pin_path);
if (err != 0) if (err != 0)
return libbpf_err_errno(err); return -errno;
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
zfree(&link->pin_path); zfree(&link->pin_path);
...@@ -11197,7 +11197,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) ...@@ -11197,7 +11197,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
if (cnt < 0) if (cnt < 0)
return libbpf_err_errno(cnt); return -errno;
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
......
...@@ -715,6 +715,8 @@ static void test_tailcall_bpf2bpf_3(void) ...@@ -715,6 +715,8 @@ static void test_tailcall_bpf2bpf_3(void)
bpf_object__close(obj); bpf_object__close(obj);
} }
#include "tailcall_bpf2bpf4.skel.h"
/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved /* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
* across tailcalls combined with bpf2bpf calls. for making sure that tailcall * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
* counter behaves correctly, bpf program will go through following flow: * counter behaves correctly, bpf program will go through following flow:
...@@ -727,10 +729,15 @@ static void test_tailcall_bpf2bpf_3(void) ...@@ -727,10 +729,15 @@ static void test_tailcall_bpf2bpf_3(void)
* the loop begins. At the end of the test make sure that the global counter is * the loop begins. At the end of the test make sure that the global counter is
* equal to 31, because tailcall counter includes the first two tailcalls * equal to 31, because tailcall counter includes the first two tailcalls
* whereas global counter is incremented only on loop presented on flow above. * whereas global counter is incremented only on loop presented on flow above.
*
* The noise parameter is used to insert bpf_map_update calls into the logic
* to force verifier to patch instructions. This allows us to ensure jump
* logic remains correct with instruction movement.
*/ */
static void test_tailcall_bpf2bpf_4(void) static void test_tailcall_bpf2bpf_4(bool noise)
{ {
int err, map_fd, prog_fd, main_fd, data_fd, i, val; int err, map_fd, prog_fd, main_fd, data_fd, i;
struct tailcall_bpf2bpf4__bss val;
struct bpf_map *prog_array, *data_map; struct bpf_map *prog_array, *data_map;
struct bpf_program *prog; struct bpf_program *prog;
struct bpf_object *obj; struct bpf_object *obj;
...@@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void) ...@@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void)
goto out; goto out;
} }
err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
&duration, &retval, NULL);
CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
return; return;
...@@ -787,10 +789,22 @@ static void test_tailcall_bpf2bpf_4(void) ...@@ -787,10 +789,22 @@ static void test_tailcall_bpf2bpf_4(void)
if (CHECK_FAIL(map_fd < 0)) if (CHECK_FAIL(map_fd < 0))
return; return;
i = 0;
val.noise = noise;
val.count = 0;
err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
&duration, &retval, NULL);
CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);
i = 0; i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val); err = bpf_map_lookup_elem(data_fd, &i, &val);
CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
err, errno, val); err, errno, val.count);
out: out:
bpf_object__close(obj); bpf_object__close(obj);
...@@ -815,5 +829,7 @@ void test_tailcalls(void) ...@@ -815,5 +829,7 @@ void test_tailcalls(void)
if (test__start_subtest("tailcall_bpf2bpf_3")) if (test__start_subtest("tailcall_bpf2bpf_3"))
test_tailcall_bpf2bpf_3(); test_tailcall_bpf2bpf_3();
if (test__start_subtest("tailcall_bpf2bpf_4")) if (test__start_subtest("tailcall_bpf2bpf_4"))
test_tailcall_bpf2bpf_4(); test_tailcall_bpf2bpf_4(false);
if (test__start_subtest("tailcall_bpf2bpf_5"))
test_tailcall_bpf2bpf_4(true);
} }
...@@ -2,6 +2,13 @@ ...@@ -2,6 +2,13 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} nop_table SEC(".maps");
struct { struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY); __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 3); __uint(max_entries, 3);
...@@ -10,10 +17,21 @@ struct { ...@@ -10,10 +17,21 @@ struct {
} jmp_table SEC(".maps"); } jmp_table SEC(".maps");
int count = 0; int count = 0;
int noise = 0;
__always_inline int subprog_noise(void)
{
__u32 key = 0;
bpf_map_lookup_elem(&nop_table, &key);
return 0;
}
__noinline __noinline
int subprog_tail_2(struct __sk_buff *skb) int subprog_tail_2(struct __sk_buff *skb)
{ {
if (noise)
subprog_noise();
bpf_tail_call_static(skb, &jmp_table, 2); bpf_tail_call_static(skb, &jmp_table, 2);
return skb->len * 3; return skb->len * 3;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment