Commit f0362eab authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-fd-array-release'

Daniel Borkmann says:

====================
bpf: improve fd array release

This set improves BPF perf fd array map release wrt to purging
entries, first two extend the API as needed. Please see individual
patches for more details.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b478af0c 3b1efb19
......@@ -13,13 +13,15 @@
#include <linux/percpu.h>
#include <linux/err.h>
struct perf_event;
struct bpf_map;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_free)(struct bpf_map *);
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
/* funcs callable from userspace and from eBPF programs */
......@@ -28,8 +30,9 @@ struct bpf_map_ops {
int (*map_delete_elem)(struct bpf_map *map, void *key);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
void (*map_fd_put_ptr) (void *ptr);
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
};
struct bpf_map {
......@@ -164,11 +167,19 @@ struct bpf_array {
void __percpu *pptrs[0] __aligned(8);
};
};
#define MAX_TAIL_CALL_CNT 32
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
struct file *map_file;
struct rcu_head rcu;
};
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_fd_array_map_clear(struct bpf_map *map);
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
......@@ -206,8 +217,13 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
void bpf_fd_array_map_clear(struct bpf_map *map);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
* Best-effort only. No barriers here, since it _will_ race with concurrent
......
......@@ -328,8 +328,8 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
}
/* only called from syscall */
static int fd_array_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *new_ptr, *old_ptr;
......@@ -342,7 +342,7 @@ static int fd_array_map_update_elem(struct bpf_map *map, void *key,
return -E2BIG;
ufd = *(u32 *)value;
new_ptr = map->ops->map_fd_get_ptr(map, ufd);
new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
......@@ -371,10 +371,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
}
static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
......@@ -382,6 +384,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
return prog;
}
......@@ -407,7 +410,6 @@ static const struct bpf_map_ops prog_array_ops = {
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
......@@ -425,59 +427,105 @@ static int __init register_prog_array_map(void)
}
late_initcall(register_prog_array_map);
static void perf_event_array_map_free(struct bpf_map *map)
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
struct file *map_file)
{
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
struct bpf_event_entry *ee;
ee = kzalloc(sizeof(*ee), GFP_KERNEL);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
ee->map_file = map_file;
}
return ee;
}
static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
static void __bpf_event_entry_free(struct rcu_head *rcu)
{
struct perf_event *event;
const struct perf_event_attr *attr;
struct file *file;
struct bpf_event_entry *ee;
file = perf_event_get(fd);
if (IS_ERR(file))
return file;
ee = container_of(rcu, struct bpf_event_entry, rcu);
fput(ee->perf_file);
kfree(ee);
}
event = file->private_data;
static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
{
call_rcu(&ee->rcu, __bpf_event_entry_free);
}
attr = perf_event_attrs(event);
if (IS_ERR(attr))
goto err;
static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
const struct perf_event_attr *attr;
struct bpf_event_entry *ee;
struct perf_event *event;
struct file *perf_file;
if (attr->inherit)
goto err;
perf_file = perf_event_get(fd);
if (IS_ERR(perf_file))
return perf_file;
if (attr->type == PERF_TYPE_RAW)
return file;
event = perf_file->private_data;
ee = ERR_PTR(-EINVAL);
if (attr->type == PERF_TYPE_HARDWARE)
return file;
attr = perf_event_attrs(event);
if (IS_ERR(attr) || attr->inherit)
goto err_out;
switch (attr->type) {
case PERF_TYPE_SOFTWARE:
if (attr->config != PERF_COUNT_SW_BPF_OUTPUT)
goto err_out;
/* fall-through */
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
ee = bpf_event_entry_gen(perf_file, map_file);
if (ee)
return ee;
ee = ERR_PTR(-ENOMEM);
/* fall-through */
default:
break;
}
if (attr->type == PERF_TYPE_SOFTWARE &&
attr->config == PERF_COUNT_SW_BPF_OUTPUT)
return file;
err:
fput(file);
return ERR_PTR(-EINVAL);
err_out:
fput(perf_file);
return ee;
}
static void perf_event_fd_array_put_ptr(void *ptr)
{
fput((struct file *)ptr);
bpf_event_entry_free_rcu(ptr);
}
static void perf_event_fd_array_release(struct bpf_map *map,
struct file *map_file)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_event_entry *ee;
int i;
rcu_read_lock();
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
fd_array_map_delete_elem(map, &i);
}
rcu_read_unlock();
}
static const struct bpf_map_ops perf_event_array_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = perf_event_array_map_free,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
.map_release = perf_event_fd_array_release,
};
static struct bpf_map_type_list perf_event_array_type __read_mostly = {
......
......@@ -124,7 +124,12 @@ void bpf_map_put_with_uref(struct bpf_map *map)
static int bpf_map_release(struct inode *inode, struct file *filp)
{
bpf_map_put_with_uref(filp->private_data);
struct bpf_map *map = filp->private_data;
if (map->ops->map_release)
map->ops->map_release(map, filp);
bpf_map_put_with_uref(map);
return 0;
}
......@@ -387,6 +392,12 @@ static int map_update_elem(union bpf_attr *attr)
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
rcu_read_unlock();
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
......
......@@ -192,18 +192,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
{
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_event_entry *ee;
struct perf_event *event;
struct file *file;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
file = READ_ONCE(array->ptrs[index]);
if (unlikely(!file))
ee = READ_ONCE(array->ptrs[index]);
if (unlikely(!ee))
return -ENOENT;
event = file->private_data;
event = ee->event;
/* make sure event is local and doesn't have pmu::count */
if (event->oncpu != smp_processor_id() ||
event->pmu->count)
......@@ -233,8 +232,8 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
u64 index = flags & BPF_F_INDEX_MASK;
void *data = (void *) (long) r4;
struct perf_sample_data sample_data;
struct bpf_event_entry *ee;
struct perf_event *event;
struct file *file;
struct perf_raw_record raw = {
.size = size,
.data = data,
......@@ -247,12 +246,11 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
file = READ_ONCE(array->ptrs[index]);
if (unlikely(!file))
ee = READ_ONCE(array->ptrs[index]);
if (unlikely(!ee))
return -ENOENT;
event = file->private_data;
event = ee->event;
if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment