Commit 6710e112 authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by David S. Miller

bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP

The 'cpumap' is primarily used as a backend map for XDP BPF helper
call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'.

This patch implement the main part of the map.  It is not connected to
the XDP redirect system yet, and no SKB allocation are done yet.

The main concern in this patch is to ensure the datapath can run
without any locking.  This adds complexity to the setup and tear-down
procedure, which assumptions are extra carefully documented in the
code comments.

V2:
 - make sure array isn't larger than NR_CPUS
 - make sure CPUs added is a valid possible CPU

V3: fix nitpicks from Jakub Kicinski <kubakici@wp.pl>

V5:
 - Restrict map allocation to root / CAP_SYS_ADMIN
 - WARN_ON_ONCE if queue is not empty on tear-down
 - Return -EPERM on memlock limit instead of -ENOMEM
 - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup()
 - Moved cpu_map_enqueue() to next patch

V6: all notice by Daniel Borkmann
 - Fix err return code in cpu_map_alloc() introduced in V5
 - Move cpu_possible() check after max_entries boundary check
 - Forbid usage initially in check_map_func_compatibility()

V7:
 - Fix alloc error path spotted by Daniel Borkmann
 - Did stress test adding+removing CPUs from the map concurrently
 - Fixed refcnt issue on cpu_map_entry, kthread started too soon
 - Make sure packets are flushed during tear-down, involved use of
   rcu_barrier() and kthread_run only exit after queue is empty
 - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring

V8:
 - Nitpicking comments and gramma by Edward Cree
 - Fix missing semi-colon introduced in V7 due to rebasing
 - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch
Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4b70c62b
...@@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) ...@@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
#ifdef CONFIG_STREAM_PARSER #ifdef CONFIG_STREAM_PARSER
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif #endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#endif #endif
...@@ -112,6 +112,7 @@ enum bpf_map_type { ...@@ -112,6 +112,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
}; };
enum bpf_prog_type { enum bpf_prog_type {
......
...@@ -5,6 +5,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list ...@@ -5,6 +5,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o
ifeq ($(CONFIG_NET),y) ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
ifeq ($(CONFIG_STREAM_PARSER),y) ifeq ($(CONFIG_STREAM_PARSER),y)
obj-$(CONFIG_BPF_SYSCALL) += sockmap.o obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
endif endif
......
This diff is collapsed.
...@@ -592,6 +592,12 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -592,6 +592,12 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0) if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value; goto free_value;
/* Need to create a kthread, thus must support schedule */
if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
}
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible * inside bpf map update or delete otherwise deadlocks are possible
*/ */
...@@ -622,7 +628,7 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -622,7 +628,7 @@ static int map_update_elem(union bpf_attr *attr)
} }
__this_cpu_dec(bpf_prog_active); __this_cpu_dec(bpf_prog_active);
preempt_enable(); preempt_enable();
out:
if (!err) if (!err)
trace_bpf_map_update_elem(map, ufd, key, value); trace_bpf_map_update_elem(map, ufd, key, value);
free_value: free_value:
......
...@@ -1444,6 +1444,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, ...@@ -1444,6 +1444,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_redirect_map) if (func_id != BPF_FUNC_redirect_map)
goto error; goto error;
break; break;
/* Restrict bpf side of cpumap, open when use-cases appear */
case BPF_MAP_TYPE_CPUMAP:
if (func_id != BPF_FUNC_redirect_map)
goto error;
break;
case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_ARRAY_OF_MAPS:
case BPF_MAP_TYPE_HASH_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS:
if (func_id != BPF_FUNC_map_lookup_elem) if (func_id != BPF_FUNC_map_lookup_elem)
......
...@@ -112,6 +112,7 @@ enum bpf_map_type { ...@@ -112,6 +112,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_CPUMAP,
}; };
enum bpf_prog_type { enum bpf_prog_type {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment