Commit ee4ed53c authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann

bpf: Convert percpu hash map to per-cpu bpf_mem_alloc.

Convert dynamic allocations in percpu hash map from alloc_percpu() to
bpf_mem_cache_alloc() from per-cpu bpf_mem_alloc. Since bpf_mem_alloc frees
objects after RCU gp the call_rcu() is removed. pcpu_init_value() now needs to
zero-fill per-cpu allocations, since dynamically allocated map elements are now
similar to full prealloc, since alloc_percpu() is not called inline and the
elements are reused in the freelist.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220902211058.60789-12-alexei.starovoitov@gmail.com
parent 4ab67149
...@@ -94,6 +94,7 @@ struct bucket { ...@@ -94,6 +94,7 @@ struct bucket {
struct bpf_htab { struct bpf_htab {
struct bpf_map map; struct bpf_map map;
struct bpf_mem_alloc ma; struct bpf_mem_alloc ma;
struct bpf_mem_alloc pcpu_ma;
struct bucket *buckets; struct bucket *buckets;
void *elems; void *elems;
union { union {
...@@ -121,14 +122,14 @@ struct htab_elem { ...@@ -121,14 +122,14 @@ struct htab_elem {
struct { struct {
void *padding; void *padding;
union { union {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode; struct pcpu_freelist_node fnode;
struct htab_elem *batch_flink; struct htab_elem *batch_flink;
}; };
}; };
}; };
union { union {
struct rcu_head rcu; /* pointer to per-cpu pointer */
void *ptr_to_pptr;
struct bpf_lru_node lru_node; struct bpf_lru_node lru_node;
}; };
u32 hash; u32 hash;
...@@ -448,8 +449,6 @@ static int htab_map_alloc_check(union bpf_attr *attr) ...@@ -448,8 +449,6 @@ static int htab_map_alloc_check(union bpf_attr *attr)
bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
int numa_node = bpf_map_attr_numa_node(attr); int numa_node = bpf_map_attr_numa_node(attr);
BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
offsetof(struct htab_elem, hash_node.pprev));
BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
offsetof(struct htab_elem, hash_node.pprev)); offsetof(struct htab_elem, hash_node.pprev));
...@@ -610,6 +609,12 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ...@@ -610,6 +609,12 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false);
if (err) if (err)
goto free_map_locked; goto free_map_locked;
if (percpu) {
err = bpf_mem_alloc_init(&htab->pcpu_ma,
round_up(htab->map.value_size, 8), true);
if (err)
goto free_map_locked;
}
} }
return &htab->map; return &htab->map;
...@@ -620,6 +625,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ...@@ -620,6 +625,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
free_percpu(htab->map_locked[i]); free_percpu(htab->map_locked[i]);
bpf_map_area_free(htab->buckets); bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
bpf_mem_alloc_destroy(&htab->ma); bpf_mem_alloc_destroy(&htab->ma);
free_htab: free_htab:
lockdep_unregister_key(&htab->lockdep_key); lockdep_unregister_key(&htab->lockdep_key);
...@@ -895,19 +901,11 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) ...@@ -895,19 +901,11 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{ {
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
check_and_free_fields(htab, l); check_and_free_fields(htab, l);
bpf_mem_cache_free(&htab->ma, l); bpf_mem_cache_free(&htab->ma, l);
} }
static void htab_elem_free_rcu(struct rcu_head *head)
{
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
struct bpf_htab *htab = l->htab;
htab_elem_free(htab, l);
}
static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
{ {
struct bpf_map *map = &htab->map; struct bpf_map *map = &htab->map;
...@@ -953,12 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) ...@@ -953,12 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
__pcpu_freelist_push(&htab->freelist, &l->fnode); __pcpu_freelist_push(&htab->freelist, &l->fnode);
} else { } else {
dec_elem_count(htab); dec_elem_count(htab);
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) { htab_elem_free(htab, l);
l->htab = htab;
call_rcu(&l->rcu, htab_elem_free_rcu);
} else {
htab_elem_free(htab, l);
}
} }
} }
...@@ -983,13 +976,12 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, ...@@ -983,13 +976,12 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
void *value, bool onallcpus) void *value, bool onallcpus)
{ {
/* When using prealloc and not setting the initial value on all cpus, /* When not setting the initial value on all cpus, zero-fill element
* zero-fill element values for other cpus (just as what happens when * values for other cpus. Otherwise, bpf program has no way to ensure
* not using prealloc). Otherwise, bpf program has no way to ensure
* known initial values for cpus other than current one * known initial values for cpus other than current one
* (onallcpus=false always when coming from bpf prog). * (onallcpus=false always when coming from bpf prog).
*/ */
if (htab_is_prealloc(htab) && !onallcpus) { if (!onallcpus) {
u32 size = round_up(htab->map.value_size, 8); u32 size = round_up(htab->map.value_size, 8);
int current_cpu = raw_smp_processor_id(); int current_cpu = raw_smp_processor_id();
int cpu; int cpu;
...@@ -1060,18 +1052,18 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, ...@@ -1060,18 +1052,18 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
memcpy(l_new->key, key, key_size); memcpy(l_new->key, key, key_size);
if (percpu) { if (percpu) {
size = round_up(size, 8);
if (prealloc) { if (prealloc) {
pptr = htab_elem_get_ptr(l_new, key_size); pptr = htab_elem_get_ptr(l_new, key_size);
} else { } else {
/* alloc_percpu zero-fills */ /* alloc_percpu zero-fills */
pptr = bpf_map_alloc_percpu(&htab->map, size, 8, pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
GFP_NOWAIT | __GFP_NOWARN);
if (!pptr) { if (!pptr) {
bpf_mem_cache_free(&htab->ma, l_new); bpf_mem_cache_free(&htab->ma, l_new);
l_new = ERR_PTR(-ENOMEM); l_new = ERR_PTR(-ENOMEM);
goto dec_count; goto dec_count;
} }
l_new->ptr_to_pptr = pptr;
pptr = *(void **)pptr;
} }
pcpu_init_value(htab, pptr, value, onallcpus); pcpu_init_value(htab, pptr, value, onallcpus);
...@@ -1568,6 +1560,7 @@ static void htab_map_free(struct bpf_map *map) ...@@ -1568,6 +1560,7 @@ static void htab_map_free(struct bpf_map *map)
bpf_map_free_kptr_off_tab(map); bpf_map_free_kptr_off_tab(map);
free_percpu(htab->extra_elems); free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets); bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
bpf_mem_alloc_destroy(&htab->ma); bpf_mem_alloc_destroy(&htab->ma);
if (htab->use_percpu_counter) if (htab->use_percpu_counter)
percpu_counter_destroy(&htab->pcount); percpu_counter_destroy(&htab->pcount);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment