Commit 4b3ccca5 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-refcount-followups-2-owner-field'

Dave Marchevsky says:

====================
BPF Refcount followups 2: owner field

This series adds an 'owner' field to bpf_{list,rb}_node structs, to be
used by the runtime to determine whether insertion or removal operations
are valid in shared ownership scenarios. Both the races which the series
fixes and the fix itself are inspired by Kumar's suggestions in [0].

Aside from insertion and removal having more reasons to fail, there are
no user-facing changes as a result of this series.

* Patch 1 reverts disabling of bpf_refcount_acquire so that the fixed
logic can be exercised by CI. It should _not_ be applied.
* Patch 2 adds internal definitions of bpf_{rb,list}_node so that
their fields are easier to access.
* Patch 3 is the meat of the series - it adds 'owner' field and
enforcement of correct owner to insertion and removal helpers.
* Patch 4 adds a test based on Kumar's examples.
* Patch 5 disables the test until bpf_refcount_acquire is re-enabled.
* Patch 6 reverts disabling of test added in this series
logic can be exercised by CI. It should _not_ be applied.

  [0]: https://lore.kernel.org/bpf/d7hyspcow5wtjcmw4fugdgyp3fwhljwuscp3xyut5qnwivyeru@ysdq543otzv2/

Changelog:

v1 -> v2: lore.kernel.org/bpf/20230711175945.3298231-1-davemarchevsky@fb.com/

Patch 2 ("Introduce internal definitions for UAPI-opaque bpf_{rb,list}_node")
  * Rename bpf_{rb,list}_node_internal -> bpf_{list,rb}_node_kern (Alexei)

Patch 3 ("bpf: Add 'owner' field to bpf_{list,rb}_node")
  * WARN_ON_ONCE in __bpf_list_del when node has wrong owner. This shouldn't
    happen, but worth checking regardless (Alexei, offline convo)
  * Continue previous patch's renaming changes
====================

Link: https://lore.kernel.org/r/20230718083813.3416104-1-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 60cc1f7d f3514a5d
...@@ -228,6 +228,18 @@ struct btf_record { ...@@ -228,6 +228,18 @@ struct btf_record {
struct btf_field fields[]; struct btf_field fields[];
}; };
/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
struct bpf_rb_node_kern {
struct rb_node rb_node;
void *owner;
} __attribute__((aligned(8)));
/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
struct bpf_list_node_kern {
struct list_head list_head;
void *owner;
} __attribute__((aligned(8)));
struct bpf_map { struct bpf_map {
/* The first two cachelines with read-mostly members of which some /* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries). * are also accessed in fast-path (e.g. ops, max_entries).
......
...@@ -7052,6 +7052,7 @@ struct bpf_list_head { ...@@ -7052,6 +7052,7 @@ struct bpf_list_head {
struct bpf_list_node { struct bpf_list_node {
__u64 :64; __u64 :64;
__u64 :64; __u64 :64;
__u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_rb_root { struct bpf_rb_root {
...@@ -7063,6 +7064,7 @@ struct bpf_rb_node { ...@@ -7063,6 +7064,7 @@ struct bpf_rb_node {
__u64 :64; __u64 :64;
__u64 :64; __u64 :64;
__u64 :64; __u64 :64;
__u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_refcount { struct bpf_refcount {
......
...@@ -1942,23 +1942,29 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta ...@@ -1942,23 +1942,29 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
return (void *)p__refcounted_kptr; return (void *)p__refcounted_kptr;
} }
static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, static int __bpf_list_add(struct bpf_list_node_kern *node,
struct bpf_list_head *head,
bool tail, struct btf_record *rec, u64 off) bool tail, struct btf_record *rec, u64 off)
{ {
struct list_head *n = (void *)node, *h = (void *)head; struct list_head *n = &node->list_head, *h = (void *)head;
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
* called on its fields, so init here * called on its fields, so init here
*/ */
if (unlikely(!h->next)) if (unlikely(!h->next))
INIT_LIST_HEAD(h); INIT_LIST_HEAD(h);
if (!list_empty(n)) {
/* node->owner != NULL implies !list_empty(n), no need to separately
* check the latter
*/
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */ /* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec); __bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL; return -EINVAL;
} }
tail ? list_add_tail(n, h) : list_add(n, h); tail ? list_add_tail(n, h) : list_add(n, h);
WRITE_ONCE(node->owner, head);
return 0; return 0;
} }
...@@ -1967,25 +1973,26 @@ __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, ...@@ -1967,25 +1973,26 @@ __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
struct bpf_list_node *node, struct bpf_list_node *node,
void *meta__ign, u64 off) void *meta__ign, u64 off)
{ {
struct bpf_list_node_kern *n = (void *)node;
struct btf_struct_meta *meta = meta__ign; struct btf_struct_meta *meta = meta__ign;
return __bpf_list_add(node, head, false, return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
meta ? meta->record : NULL, off);
} }
__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
struct bpf_list_node *node, struct bpf_list_node *node,
void *meta__ign, u64 off) void *meta__ign, u64 off)
{ {
struct bpf_list_node_kern *n = (void *)node;
struct btf_struct_meta *meta = meta__ign; struct btf_struct_meta *meta = meta__ign;
return __bpf_list_add(node, head, true, return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
meta ? meta->record : NULL, off);
} }
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
{ {
struct list_head *n, *h = (void *)head; struct list_head *n, *h = (void *)head;
struct bpf_list_node_kern *node;
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
* called on its fields, so init here * called on its fields, so init here
...@@ -1994,8 +2001,14 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai ...@@ -1994,8 +2001,14 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
INIT_LIST_HEAD(h); INIT_LIST_HEAD(h);
if (list_empty(h)) if (list_empty(h))
return NULL; return NULL;
n = tail ? h->prev : h->next; n = tail ? h->prev : h->next;
node = container_of(n, struct bpf_list_node_kern, list_head);
if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
return NULL;
list_del_init(n); list_del_init(n);
WRITE_ONCE(node->owner, NULL);
return (struct bpf_list_node *)n; return (struct bpf_list_node *)n;
} }
...@@ -2012,29 +2025,38 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) ...@@ -2012,29 +2025,38 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
struct bpf_rb_node *node) struct bpf_rb_node *node)
{ {
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_root_cached *r = (struct rb_root_cached *)root;
struct rb_node *n = (struct rb_node *)node; struct rb_node *n = &node_internal->rb_node;
if (RB_EMPTY_NODE(n)) /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
* n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
*/
if (READ_ONCE(node_internal->owner) != root)
return NULL; return NULL;
rb_erase_cached(n, r); rb_erase_cached(n, r);
RB_CLEAR_NODE(n); RB_CLEAR_NODE(n);
WRITE_ONCE(node_internal->owner, NULL);
return (struct bpf_rb_node *)n; return (struct bpf_rb_node *)n;
} }
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
* program * program
*/ */
static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, static int __bpf_rbtree_add(struct bpf_rb_root *root,
struct bpf_rb_node_kern *node,
void *less, struct btf_record *rec, u64 off) void *less, struct btf_record *rec, u64 off)
{ {
struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
struct rb_node *parent = NULL, *n = (struct rb_node *)node; struct rb_node *parent = NULL, *n = &node->rb_node;
bpf_callback_t cb = (bpf_callback_t)less; bpf_callback_t cb = (bpf_callback_t)less;
bool leftmost = true; bool leftmost = true;
if (!RB_EMPTY_NODE(n)) { /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
* check the latter
*/
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
/* Only called from BPF prog, no need to migrate_disable */ /* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec); __bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL; return -EINVAL;
...@@ -2052,6 +2074,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, ...@@ -2052,6 +2074,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
rb_link_node(n, parent, link); rb_link_node(n, parent, link);
rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
WRITE_ONCE(node->owner, root);
return 0; return 0;
} }
...@@ -2060,8 +2083,9 @@ __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node ...@@ -2060,8 +2083,9 @@ __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node
void *meta__ign, u64 off) void *meta__ign, u64 off)
{ {
struct btf_struct_meta *meta = meta__ign; struct btf_struct_meta *meta = meta__ign;
struct bpf_rb_node_kern *n = (void *)node;
return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off); return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
} }
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
......
...@@ -14,3 +14,7 @@ void test_refcounted_kptr(void) ...@@ -14,3 +14,7 @@ void test_refcounted_kptr(void)
void test_refcounted_kptr_fail(void) void test_refcounted_kptr_fail(void)
{ {
} }
void test_refcounted_kptr_wrong_owner(void)
{
}
...@@ -24,7 +24,7 @@ struct { ...@@ -24,7 +24,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY); __uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int); __type(key, int);
__type(value, struct map_value); __type(value, struct map_value);
__uint(max_entries, 1); __uint(max_entries, 2);
} stashed_nodes SEC(".maps"); } stashed_nodes SEC(".maps");
struct node_acquire { struct node_acquire {
...@@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l); ...@@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l);
private(B) struct bpf_spin_lock alock; private(B) struct bpf_spin_lock alock;
private(B) struct bpf_rb_root aroot __contains(node_acquire, node); private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
private(C) struct bpf_spin_lock block;
private(C) struct bpf_rb_root broot __contains(node_data, r);
static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b) static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
{ {
struct node_data *a; struct node_data *a;
...@@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) ...@@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
return 0; return 0;
} }
static long __stash_map_empty_xchg(struct node_data *n, int idx)
{
struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval) {
bpf_obj_drop(n);
return 1;
}
n = bpf_kptr_xchg(&mapval->node, n);
if (n) {
bpf_obj_drop(n);
return 2;
}
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a1(void *ctx)
{
struct node_data *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return 1;
m = bpf_refcount_acquire(n);
if (__stash_map_empty_xchg(n, 0)) {
bpf_obj_drop(m);
return 2;
}
if (__stash_map_empty_xchg(m, 1))
return 3;
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_b(void *ctx)
{
struct map_value *mapval;
struct node_data *n;
int idx = 0;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
n = bpf_kptr_xchg(&mapval->node, NULL);
if (!n)
return 2;
bpf_spin_lock(&block);
bpf_rbtree_add(&broot, &n->r, less);
bpf_spin_unlock(&block);
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a2(void *ctx)
{
struct map_value *mapval;
struct bpf_rb_node *res;
struct node_data *m;
int idx = 1;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
m = bpf_kptr_xchg(&mapval->node, NULL);
if (!m)
return 2;
bpf_spin_lock(&lock);
/* make m non-owning ref */
bpf_list_push_back(&head, &m->l);
res = bpf_rbtree_remove(&root, &m->r);
bpf_spin_unlock(&lock);
if (res) {
bpf_obj_drop(container_of(res, struct node_data, r));
return 3;
}
return 0;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment