Commit 5db17c96 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'lru-map-fix'

Daniel Borkmann says:

====================
This set fixes LRU map eviction in combination with map lookups out
of system call side from user space. Main patch is the second one and
test cases are adapted and added in the last one. Thanks!
====================
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 2474c628 d2baab62
...@@ -36,6 +36,7 @@ struct bpf_map_ops { ...@@ -36,6 +36,7 @@ struct bpf_map_ops {
void (*map_free)(struct bpf_map *map); void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map); void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
/* funcs callable from userspace and from eBPF programs */ /* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key); void *(*map_lookup_elem)(struct bpf_map *map, void *key);
......
...@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) ...@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf; return insn - insn_buf;
} }
static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
void *key, const bool mark)
{ {
struct htab_elem *l = __htab_map_lookup_elem(map, key); struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l) { if (l) {
bpf_lru_node_set_ref(&l->lru_node); if (mark)
bpf_lru_node_set_ref(&l->lru_node);
return l->key + round_up(map->key_size, 8); return l->key + round_up(map->key_size, 8);
} }
return NULL; return NULL;
} }
static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
{
return __htab_lru_map_lookup_elem(map, key, true);
}
static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
{
return __htab_lru_map_lookup_elem(map, key, false);
}
static u32 htab_lru_map_gen_lookup(struct bpf_map *map, static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf) struct bpf_insn *insn_buf)
{ {
...@@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = { ...@@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_free = htab_map_free, .map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key, .map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_elem = htab_lru_map_lookup_elem,
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
.map_update_elem = htab_lru_map_update_elem, .map_update_elem = htab_lru_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem, .map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup, .map_gen_lookup = htab_lru_map_gen_lookup,
...@@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{ {
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l; struct htab_elem *l;
void __percpu *pptr; void __percpu *pptr;
int ret = -ENOENT; int ret = -ENOENT;
...@@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) ...@@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key); l = __htab_map_lookup_elem(map, key);
if (!l) if (!l)
goto out; goto out;
if (htab_is_lru(htab)) /* We do not mark LRU map element here in order to not mess up
bpf_lru_node_set_ref(&l->lru_node); * eviction heuristics when user space does a map walk.
*/
pptr = htab_elem_get_ptr(l, map->key_size); pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, bpf_long_memcpy(value + off,
......
...@@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr) ...@@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = map->ops->map_peek_elem(map, value); err = map->ops->map_peek_elem(map, value);
} else { } else {
rcu_read_lock(); rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key); if (map->ops->map_lookup_elem_sys_only)
ptr = map->ops->map_lookup_elem_sys_only(map, key);
else
ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) { if (IS_ERR(ptr)) {
err = PTR_ERR(ptr); err = PTR_ERR(ptr);
} else if (!ptr) { } else if (!ptr) {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment