Commit fdf48dc2 authored by Dave Marchevsky's avatar Dave Marchevsky Committed by Alexei Starovoitov

selftests/bpf: Add rbtree test exercising race which 'owner' field prevents

This patch adds a runnable version of one of the races described by
Kumar in [0]. Specifically, this interleaving:

(rbtree1 and list head protected by lock1, rbtree2 protected by lock2)

Prog A                          Prog B
======================================
n = bpf_obj_new(...)
m = bpf_refcount_acquire(n)
kptr_xchg(map, m)

                                m = kptr_xchg(map, NULL)
                                lock(lock2)
				bpf_rbtree_add(rbtree2, m->r, less)
				unlock(lock2)

lock(lock1)
bpf_list_push_back(head, n->l)
/* make n non-owning ref */
bpf_rbtree_remove(rbtree1, n->r)
unlock(lock1)

The above interleaving, the node's struct bpf_rb_node *r can be used to
add it to either rbtree1 or rbtree2, which are protected by different
locks. If the node has been added to rbtree2, we should not be allowed
to remove it while holding rbtree1's lock.

Before changes in the previous patch in this series, the rbtree_remove
in the second part of Prog A would succeed as the verifier has no way of
knowing which tree owns a particular node at verification time. The
addition of 'owner' field results in bpf_rbtree_remove correctly
failing.

The test added in this patch splits "Prog A" above into two separate BPF
programs - A1 and A2 - and uses a second mapval + kptr_xchg to pass n
from A1 to A2 similarly to the pass from A1 to B. If the test is run
without the fix applied, the remove will succeed.

Kumar's example had the two programs running on separate CPUs. This
patch doesn't do this as it's not necessary to exercise the broken
behavior / validate fixed behavior.

  [0]: https://lore.kernel.org/bpf/d7hyspcow5wtjcmw4fugdgyp3fwhljwuscp3xyut5qnwivyeru@ysdq543otzv2Signed-off-by: default avatarDave Marchevsky <davemarchevsky@fb.com>
Suggested-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230718083813.3416104-5-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent c3c510ce
...@@ -14,3 +14,31 @@ void test_refcounted_kptr(void) ...@@ -14,3 +14,31 @@ void test_refcounted_kptr(void)
void test_refcounted_kptr_fail(void) void test_refcounted_kptr_fail(void)
{ {
} }
void test_refcounted_kptr_wrong_owner(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
);
struct refcounted_kptr *skel;
int ret;
skel = refcounted_kptr__open_and_load();
if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load"))
return;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts);
ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1");
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval");
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts);
ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b");
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval");
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts);
ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2");
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
refcounted_kptr__destroy(skel);
}
...@@ -24,7 +24,7 @@ struct { ...@@ -24,7 +24,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY); __uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int); __type(key, int);
__type(value, struct map_value); __type(value, struct map_value);
__uint(max_entries, 1); __uint(max_entries, 2);
} stashed_nodes SEC(".maps"); } stashed_nodes SEC(".maps");
struct node_acquire { struct node_acquire {
...@@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l); ...@@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l);
private(B) struct bpf_spin_lock alock; private(B) struct bpf_spin_lock alock;
private(B) struct bpf_rb_root aroot __contains(node_acquire, node); private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
private(C) struct bpf_spin_lock block;
private(C) struct bpf_rb_root broot __contains(node_data, r);
static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b) static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
{ {
struct node_data *a; struct node_data *a;
...@@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) ...@@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
return 0; return 0;
} }
static long __stash_map_empty_xchg(struct node_data *n, int idx)
{
struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval) {
bpf_obj_drop(n);
return 1;
}
n = bpf_kptr_xchg(&mapval->node, n);
if (n) {
bpf_obj_drop(n);
return 2;
}
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a1(void *ctx)
{
struct node_data *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return 1;
m = bpf_refcount_acquire(n);
if (__stash_map_empty_xchg(n, 0)) {
bpf_obj_drop(m);
return 2;
}
if (__stash_map_empty_xchg(m, 1))
return 3;
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_b(void *ctx)
{
struct map_value *mapval;
struct node_data *n;
int idx = 0;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
n = bpf_kptr_xchg(&mapval->node, NULL);
if (!n)
return 2;
bpf_spin_lock(&block);
bpf_rbtree_add(&broot, &n->r, less);
bpf_spin_unlock(&block);
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a2(void *ctx)
{
struct map_value *mapval;
struct bpf_rb_node *res;
struct node_data *m;
int idx = 1;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
m = bpf_kptr_xchg(&mapval->node, NULL);
if (!m)
return 2;
bpf_spin_lock(&lock);
/* make m non-owning ref */
bpf_list_push_back(&head, &m->l);
res = bpf_rbtree_remove(&root, &m->r);
bpf_spin_unlock(&lock);
if (res) {
bpf_obj_drop(container_of(res, struct node_data, r));
return 3;
}
return 0;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment