Commit d54730b5 authored by Dave Marchevsky's avatar Dave Marchevsky Committed by Alexei Starovoitov

bpf: Introduce opaque bpf_refcount struct and add btf_record plumbing

A 'struct bpf_refcount' is added to the set of opaque uapi/bpf.h types
meant for use in BPF programs. Similarly to other opaque types like
bpf_spin_lock and bpf_rbtree_node, the verifier needs to know where in
user-defined struct types a bpf_refcount can be located, so necessary
btf_record plumbing is added to enable this. bpf_refcount is sized to
hold a refcount_t.

Similarly to bpf_spin_lock, the offset of a bpf_refcount is cached in
btf_record as refcount_off in addition to being in the field array.
Caching refcount_off makes sense for this field because further patches
in the series will modify functions that take local kptrs (e.g.
bpf_obj_drop) to change their behavior if the type they're operating on
is refcounted. So enabling fast "is this type refcounted?" checks is
desirable.

No such verifier behavior changes are introduced in this patch, just
logic to recognize 'struct bpf_refcount' in btf_record.
Signed-off-by: default avatarDave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-3-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent cd2a8079
...@@ -187,6 +187,7 @@ enum btf_field_type { ...@@ -187,6 +187,7 @@ enum btf_field_type {
BPF_RB_NODE = (1 << 7), BPF_RB_NODE = (1 << 7),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
BPF_RB_NODE | BPF_RB_ROOT, BPF_RB_NODE | BPF_RB_ROOT,
BPF_REFCOUNT = (1 << 8),
}; };
typedef void (*btf_dtor_kfunc_t)(void *); typedef void (*btf_dtor_kfunc_t)(void *);
...@@ -223,6 +224,7 @@ struct btf_record { ...@@ -223,6 +224,7 @@ struct btf_record {
u32 field_mask; u32 field_mask;
int spin_lock_off; int spin_lock_off;
int timer_off; int timer_off;
int refcount_off;
struct btf_field fields[]; struct btf_field fields[];
}; };
...@@ -293,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) ...@@ -293,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "bpf_rb_root"; return "bpf_rb_root";
case BPF_RB_NODE: case BPF_RB_NODE:
return "bpf_rb_node"; return "bpf_rb_node";
case BPF_REFCOUNT:
return "bpf_refcount";
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return "unknown"; return "unknown";
...@@ -317,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) ...@@ -317,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_rb_root); return sizeof(struct bpf_rb_root);
case BPF_RB_NODE: case BPF_RB_NODE:
return sizeof(struct bpf_rb_node); return sizeof(struct bpf_rb_node);
case BPF_REFCOUNT:
return sizeof(struct bpf_refcount);
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return 0; return 0;
...@@ -341,6 +347,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) ...@@ -341,6 +347,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_rb_root); return __alignof__(struct bpf_rb_root);
case BPF_RB_NODE: case BPF_RB_NODE:
return __alignof__(struct bpf_rb_node); return __alignof__(struct bpf_rb_node);
case BPF_REFCOUNT:
return __alignof__(struct bpf_refcount);
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return 0; return 0;
......
...@@ -6985,6 +6985,10 @@ struct bpf_rb_node { ...@@ -6985,6 +6985,10 @@ struct bpf_rb_node {
__u64 :64; __u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_refcount {
__u32 :32;
} __attribute__((aligned(4)));
struct bpf_sysctl { struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1). __u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write. * Allows 1,2,4-byte read, but no write.
......
...@@ -3391,6 +3391,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, ...@@ -3391,6 +3391,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */ /* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) { if (field_mask & BPF_KPTR) {
...@@ -3439,6 +3440,7 @@ static int btf_find_struct_field(const struct btf *btf, ...@@ -3439,6 +3440,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_TIMER: case BPF_TIMER:
case BPF_LIST_NODE: case BPF_LIST_NODE:
case BPF_RB_NODE: case BPF_RB_NODE:
case BPF_REFCOUNT:
ret = btf_find_struct(btf, member_type, off, sz, field_type, ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp); idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0) if (ret < 0)
...@@ -3504,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, ...@@ -3504,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_TIMER: case BPF_TIMER:
case BPF_LIST_NODE: case BPF_LIST_NODE:
case BPF_RB_NODE: case BPF_RB_NODE:
case BPF_REFCOUNT:
ret = btf_find_struct(btf, var_type, off, sz, field_type, ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp); idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0) if (ret < 0)
...@@ -3734,6 +3737,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ...@@ -3734,6 +3737,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->spin_lock_off = -EINVAL; rec->spin_lock_off = -EINVAL;
rec->timer_off = -EINVAL; rec->timer_off = -EINVAL;
rec->refcount_off = -EINVAL;
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
field_type_size = btf_field_type_size(info_arr[i].type); field_type_size = btf_field_type_size(info_arr[i].type);
if (info_arr[i].off + field_type_size > value_size) { if (info_arr[i].off + field_type_size > value_size) {
...@@ -3763,6 +3767,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ...@@ -3763,6 +3767,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* Cache offset for faster lookup at runtime */ /* Cache offset for faster lookup at runtime */
rec->timer_off = rec->fields[i].offset; rec->timer_off = rec->fields[i].offset;
break; break;
case BPF_REFCOUNT:
WARN_ON_ONCE(rec->refcount_off >= 0);
/* Cache offset for faster lookup at runtime */
rec->refcount_off = rec->fields[i].offset;
break;
case BPF_KPTR_UNREF: case BPF_KPTR_UNREF:
case BPF_KPTR_REF: case BPF_KPTR_REF:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
...@@ -5308,6 +5317,7 @@ static const char *alloc_obj_fields[] = { ...@@ -5308,6 +5317,7 @@ static const char *alloc_obj_fields[] = {
"bpf_list_node", "bpf_list_node",
"bpf_rb_root", "bpf_rb_root",
"bpf_rb_node", "bpf_rb_node",
"bpf_refcount",
}; };
static struct btf_struct_metas * static struct btf_struct_metas *
...@@ -5381,7 +5391,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) ...@@ -5381,7 +5391,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt]; type = &tab->types[tab->cnt];
type->btf_id = i; type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
BPF_RB_ROOT | BPF_RB_NODE, t->size); BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
/* The record cannot be unset, treat it as an error if so */ /* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) { if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
......
...@@ -552,6 +552,7 @@ void btf_record_free(struct btf_record *rec) ...@@ -552,6 +552,7 @@ void btf_record_free(struct btf_record *rec)
case BPF_RB_NODE: case BPF_RB_NODE:
case BPF_SPIN_LOCK: case BPF_SPIN_LOCK:
case BPF_TIMER: case BPF_TIMER:
case BPF_REFCOUNT:
/* Nothing to release */ /* Nothing to release */
break; break;
default: default:
...@@ -599,6 +600,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) ...@@ -599,6 +600,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_RB_NODE: case BPF_RB_NODE:
case BPF_SPIN_LOCK: case BPF_SPIN_LOCK:
case BPF_TIMER: case BPF_TIMER:
case BPF_REFCOUNT:
/* Nothing to acquire */ /* Nothing to acquire */
break; break;
default: default:
...@@ -705,6 +707,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) ...@@ -705,6 +707,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
break; break;
case BPF_LIST_NODE: case BPF_LIST_NODE:
case BPF_RB_NODE: case BPF_RB_NODE:
case BPF_REFCOUNT:
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
...@@ -1032,7 +1035,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, ...@@ -1032,7 +1035,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->record = btf_parse_fields(btf, value_type, map->record = btf_parse_fields(btf, value_type,
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
BPF_RB_ROOT, BPF_RB_ROOT | BPF_REFCOUNT,
map->value_size); map->value_size);
if (!IS_ERR_OR_NULL(map->record)) { if (!IS_ERR_OR_NULL(map->record)) {
int i; int i;
...@@ -1071,6 +1074,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, ...@@ -1071,6 +1074,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
break; break;
case BPF_KPTR_UNREF: case BPF_KPTR_UNREF:
case BPF_KPTR_REF: case BPF_KPTR_REF:
case BPF_REFCOUNT:
if (map->map_type != BPF_MAP_TYPE_HASH && if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH &&
......
...@@ -6985,6 +6985,10 @@ struct bpf_rb_node { ...@@ -6985,6 +6985,10 @@ struct bpf_rb_node {
__u64 :64; __u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_refcount {
__u32 :32;
} __attribute__((aligned(4)));
struct bpf_sysctl { struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1). __u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write. * Allows 1,2,4-byte read, but no write.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment