Commit af66bfd3 authored by Hou Tao's avatar Hou Tao Committed by Alexei Starovoitov

bpf: Optimize the free of inner map

When removing the inner map from the outer map, the inner map will be
freed after one RCU grace period and one RCU tasks trace grace
period, so it is certain that the bpf program, which may access the
inner map, has exited before the inner map is freed.

However there is no need to wait for one RCU tasks trace grace period if
the outer map is only accessed by non-sleepable program. So adding
sleepable_refcnt in bpf_map and increasing sleepable_refcnt when adding
the outer map into env->used_maps for sleepable program. Although the
max number of bpf program is INT_MAX - 1, the number of bpf programs
which are being loaded may be greater than INT_MAX, so using atomic64_t
instead of atomic_t for sleepable_refcnt. When removing the inner map
from the outer map, using sleepable_refcnt to decide whether or not a
RCU tasks trace grace period is needed before freeing the inner map.
Signed-off-by: default avatarHou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231204140425.1480317-6-houtao@huaweicloud.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 87667336
...@@ -297,6 +297,8 @@ struct bpf_map { ...@@ -297,6 +297,8 @@ struct bpf_map {
bool bypass_spec_v1; bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */ bool frozen; /* write-once; write-protected by freeze_mutex */
bool free_after_mult_rcu_gp; bool free_after_mult_rcu_gp;
bool free_after_rcu_gp;
atomic64_t sleepable_refcnt;
s64 __percpu *elem_count; s64 __percpu *elem_count;
}; };
......
...@@ -2664,12 +2664,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, ...@@ -2664,12 +2664,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len) struct bpf_map **used_maps, u32 len)
{ {
struct bpf_map *map; struct bpf_map *map;
bool sleepable;
u32 i; u32 i;
sleepable = aux->sleepable;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
map = used_maps[i]; map = used_maps[i];
if (map->ops->map_poke_untrack) if (map->ops->map_poke_untrack)
map->ops->map_poke_untrack(map, aux); map->ops->map_poke_untrack(map, aux);
if (sleepable)
atomic64_dec(&map->sleepable_refcnt);
bpf_map_put(map); bpf_map_put(map);
} }
} }
......
...@@ -131,12 +131,16 @@ void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) ...@@ -131,12 +131,16 @@ void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{ {
struct bpf_map *inner_map = ptr; struct bpf_map *inner_map = ptr;
/* The inner map may still be used by both non-sleepable and sleepable /* Defer the freeing of inner map according to the sleepable attribute
* bpf program, so free it after one RCU grace period and one tasks * of bpf program which owns the outer map, so unnecessary waiting for
* trace RCU grace period. * RCU tasks trace grace period can be avoided.
*/ */
if (need_defer) if (need_defer) {
if (atomic64_read(&map->sleepable_refcnt))
WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true);
else
WRITE_ONCE(inner_map->free_after_rcu_gp, true);
}
bpf_map_put(inner_map); bpf_map_put(inner_map);
} }
......
...@@ -751,8 +751,11 @@ void bpf_map_put(struct bpf_map *map) ...@@ -751,8 +751,11 @@ void bpf_map_put(struct bpf_map *map)
bpf_map_free_id(map); bpf_map_free_id(map);
btf_put(map->btf); btf_put(map->btf);
WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
if (READ_ONCE(map->free_after_mult_rcu_gp)) if (READ_ONCE(map->free_after_mult_rcu_gp))
call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
else if (READ_ONCE(map->free_after_rcu_gp))
call_rcu(&map->rcu, bpf_map_free_rcu_gp);
else else
bpf_map_free_in_work(map); bpf_map_free_in_work(map);
} }
...@@ -5345,6 +5348,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr) ...@@ -5345,6 +5348,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
goto out_unlock; goto out_unlock;
} }
/* The bpf program will not access the bpf map, but for the sake of
* simplicity, increase sleepable_refcnt for sleepable program as well.
*/
if (prog->aux->sleepable)
atomic64_inc(&map->sleepable_refcnt);
memcpy(used_maps_new, used_maps_old, memcpy(used_maps_new, used_maps_old,
sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
used_maps_new[prog->aux->used_map_cnt] = map; used_maps_new[prog->aux->used_map_cnt] = map;
......
...@@ -17889,10 +17889,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) ...@@ -17889,10 +17889,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -E2BIG; return -E2BIG;
} }
if (env->prog->aux->sleepable)
atomic64_inc(&map->sleepable_refcnt);
/* hold the map. If the program is rejected by verifier, /* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it * the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded * will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps() * and all maps are released in bpf_free_used_maps()
*/ */
bpf_map_inc(map); bpf_map_inc(map);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment