Commit 15e541ed authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-array-map-offload-and-tests'

Jakub Kicinski says:

====================
This set brings in the rest of map offload code held up by urgent
fixes and improvements to the BPF arrays.

The first 3 patches take care of array map offload, similarly to
hash maps the attribute validation is split out to a separate map
op, and used for both offloaded and non-offloaded case (allocation
only happens if map is on the host).  Offload support comes down
to allowing this map type through the offload check in the core.
NFP driver also rejects the delete operation in case of array maps.

Subsequent patches add reporting of target device in a very similar
way target device of programs is reported (ifindex+netns dev/ino).
Netdevsim is extended with a trivial map implementation allowing us
to test the offload in test_offload.py.

Last patch adds a small busy wait to NFP map IO, this improves the
response times which is especially useful for map dumps.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 0c91c423 ca027a1c
......@@ -157,7 +157,14 @@ nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
int tag)
{
struct sk_buff *skb;
int err;
int i, err;
for (i = 0; i < 50; i++) {
udelay(4);
skb = nfp_bpf_reply(bpf, tag);
if (skb)
return skb;
}
err = wait_event_interruptible_timeout(bpf->cmsg_wq,
skb = nfp_bpf_reply(bpf, tag),
......
......@@ -176,6 +176,8 @@ nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
static int
nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
{
if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
return -EINVAL;
return nfp_bpf_ctrl_del_entry(offmap, key);
}
......
......@@ -17,6 +17,7 @@
#include <linux/bpf_verifier.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/rtnetlink.h>
#include <net/pkt_cls.h>
......@@ -31,6 +32,19 @@ struct nsim_bpf_bound_prog {
struct list_head l;
};
#define NSIM_BPF_MAX_KEYS 2
struct nsim_bpf_bound_map {
struct netdevsim *ns;
struct bpf_offloaded_map *map;
struct mutex mutex;
struct nsim_map_entry {
void *key;
void *value;
} entry[NSIM_BPF_MAX_KEYS];
struct list_head l;
};
static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data)
{
const char **str = file->private;
......@@ -284,6 +298,224 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
return 0;
}
static bool
nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key)
{
return e->key && !memcmp(key, e->key, map->key_size);
}
static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(nmap->entry); i++)
if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key))
return i;
return -ENOENT;
}
static int
nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER);
if (!nmap->entry[idx].key)
return -ENOMEM;
nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER);
if (!nmap->entry[idx].value) {
kfree(nmap->entry[idx].key);
nmap->entry[idx].key = NULL;
return -ENOMEM;
}
return 0;
}
static int
nsim_map_get_next_key(struct bpf_offloaded_map *offmap,
void *key, void *next_key)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
int idx = -ENOENT;
mutex_lock(&nmap->mutex);
if (key)
idx = nsim_map_key_find(offmap, key);
if (idx == -ENOENT)
idx = 0;
else
idx++;
for (; idx < ARRAY_SIZE(nmap->entry); idx++) {
if (nmap->entry[idx].key) {
memcpy(next_key, nmap->entry[idx].key,
offmap->map.key_size);
break;
}
}
mutex_unlock(&nmap->mutex);
if (idx == ARRAY_SIZE(nmap->entry))
return -ENOENT;
return 0;
}
static int
nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
int idx;
mutex_lock(&nmap->mutex);
idx = nsim_map_key_find(offmap, key);
if (idx >= 0)
memcpy(value, nmap->entry[idx].value, offmap->map.value_size);
mutex_unlock(&nmap->mutex);
return idx < 0 ? idx : 0;
}
static int
nsim_map_update_elem(struct bpf_offloaded_map *offmap,
void *key, void *value, u64 flags)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
int idx, err = 0;
mutex_lock(&nmap->mutex);
idx = nsim_map_key_find(offmap, key);
if (idx < 0 && flags == BPF_EXIST) {
err = idx;
goto exit_unlock;
}
if (idx >= 0 && flags == BPF_NOEXIST) {
err = -EEXIST;
goto exit_unlock;
}
if (idx < 0) {
for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++)
if (!nmap->entry[idx].key)
break;
if (idx == ARRAY_SIZE(nmap->entry)) {
err = -E2BIG;
goto exit_unlock;
}
err = nsim_map_alloc_elem(offmap, idx);
if (err)
goto exit_unlock;
}
memcpy(nmap->entry[idx].key, key, offmap->map.key_size);
memcpy(nmap->entry[idx].value, value, offmap->map.value_size);
exit_unlock:
mutex_unlock(&nmap->mutex);
return err;
}
static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
int idx;
if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
return -EINVAL;
mutex_lock(&nmap->mutex);
idx = nsim_map_key_find(offmap, key);
if (idx >= 0) {
kfree(nmap->entry[idx].key);
kfree(nmap->entry[idx].value);
memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx]));
}
mutex_unlock(&nmap->mutex);
return idx < 0 ? idx : 0;
}
static const struct bpf_map_dev_ops nsim_bpf_map_ops = {
.map_get_next_key = nsim_map_get_next_key,
.map_lookup_elem = nsim_map_lookup_elem,
.map_update_elem = nsim_map_update_elem,
.map_delete_elem = nsim_map_delete_elem,
};
static int
nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
{
struct nsim_bpf_bound_map *nmap;
unsigned int i;
int err;
if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY &&
offmap->map.map_type != BPF_MAP_TYPE_HASH))
return -EINVAL;
if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS)
return -ENOMEM;
if (offmap->map.map_flags)
return -EINVAL;
nmap = kzalloc(sizeof(*nmap), GFP_USER);
if (!nmap)
return -ENOMEM;
offmap->dev_priv = nmap;
nmap->ns = ns;
nmap->map = offmap;
mutex_init(&nmap->mutex);
if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) {
for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
u32 *key;
err = nsim_map_alloc_elem(offmap, i);
if (err)
goto err_free;
key = nmap->entry[i].key;
*key = i;
}
}
offmap->dev_ops = &nsim_bpf_map_ops;
list_add_tail(&nmap->l, &ns->bpf_bound_maps);
return 0;
err_free:
while (--i) {
kfree(nmap->entry[i].key);
kfree(nmap->entry[i].value);
}
kfree(nmap);
return err;
}
static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap)
{
struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
kfree(nmap->entry[i].key);
kfree(nmap->entry[i].value);
}
list_del_init(&nmap->l);
mutex_destroy(&nmap->mutex);
kfree(nmap);
}
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct netdevsim *ns = netdev_priv(dev);
......@@ -328,6 +560,14 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
return err;
return nsim_xdp_set_prog(ns, bpf);
case BPF_OFFLOAD_MAP_ALLOC:
if (!ns->bpf_map_accept)
return -EOPNOTSUPP;
return nsim_bpf_map_alloc(ns, bpf->offmap);
case BPF_OFFLOAD_MAP_FREE:
nsim_bpf_map_free(bpf->offmap);
return 0;
default:
return -EINVAL;
}
......@@ -336,6 +576,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
int nsim_bpf_init(struct netdevsim *ns)
{
INIT_LIST_HEAD(&ns->bpf_bound_progs);
INIT_LIST_HEAD(&ns->bpf_bound_maps);
debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
&ns->bpf_offloaded_id);
......@@ -362,12 +603,17 @@ int nsim_bpf_init(struct netdevsim *ns)
debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
&ns->bpf_xdpoffload_accept);
ns->bpf_map_accept = true;
debugfs_create_bool("bpf_map_accept", 0600, ns->ddir,
&ns->bpf_map_accept);
return 0;
}
void nsim_bpf_uninit(struct netdevsim *ns)
{
WARN_ON(!list_empty(&ns->bpf_bound_progs));
WARN_ON(!list_empty(&ns->bpf_bound_maps));
WARN_ON(ns->xdp_prog);
WARN_ON(ns->bpf_offloaded);
}
......@@ -61,6 +61,9 @@ struct netdevsim {
bool bpf_tc_non_bound_accept;
bool bpf_xdpdrv_accept;
bool bpf_xdpoffload_accept;
bool bpf_map_accept;
struct list_head bpf_bound_maps;
};
extern struct dentry *nsim_ddir;
......
......@@ -586,6 +586,8 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog);
int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map);
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
int bpf_map_offload_update_elem(struct bpf_map *map,
void *key, void *value, u64 flags);
......
......@@ -938,6 +938,9 @@ struct bpf_map_info {
__u32 max_entries;
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
......
......@@ -49,27 +49,35 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
}
/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
static int array_map_alloc_check(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size, mask64;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
(percpu && numa_node != NUMA_NO_NODE))
return ERR_PTR(-EINVAL);
return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
return ERR_PTR(-E2BIG);
return -E2BIG;
return 0;
}
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size, mask64;
elem_size = round_up(attr->value_size, 8);
......@@ -112,12 +120,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries;
array->map.map_flags = attr->map_flags;
array->map.numa_node = numa_node;
bpf_map_init_from_attr(&array->map, attr);
array->elem_size = elem_size;
if (!percpu)
......@@ -327,6 +330,7 @@ static void array_map_free(struct bpf_map *map)
}
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
......@@ -337,6 +341,7 @@ const struct bpf_map_ops array_map_ops = {
};
const struct bpf_map_ops percpu_array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
......@@ -345,12 +350,12 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem,
};
static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
static int fd_array_map_alloc_check(union bpf_attr *attr)
{
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return ERR_PTR(-EINVAL);
return array_map_alloc(attr);
return -EINVAL;
return array_map_alloc_check(attr);
}
static void fd_array_map_free(struct bpf_map *map)
......@@ -474,7 +479,8 @@ void bpf_fd_array_map_clear(struct bpf_map *map)
}
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
......@@ -561,7 +567,8 @@ static void perf_event_fd_array_release(struct bpf_map *map,
}
const struct bpf_map_ops perf_event_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
......@@ -592,7 +599,8 @@ static void cgroup_fd_array_free(struct bpf_map *map)
}
const struct bpf_map_ops cgroup_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
......@@ -610,7 +618,7 @@ static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
if (IS_ERR(inner_map_meta))
return inner_map_meta;
map = fd_array_map_alloc(attr);
map = array_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
......@@ -673,6 +681,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
}
const struct bpf_map_ops array_of_maps_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_of_map_alloc,
.map_free = array_of_map_free,
.map_get_next_key = array_map_get_next_key,
......
......@@ -299,7 +299,8 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (attr->map_type != BPF_MAP_TYPE_HASH)
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
attr->map_type != BPF_MAP_TYPE_HASH)
return ERR_PTR(-EINVAL);
offmap = kzalloc(sizeof(*offmap), GFP_USER);
......@@ -412,6 +413,61 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
return ret;
}
struct ns_get_path_bpf_map_args {
struct bpf_offloaded_map *offmap;
struct bpf_map_info *info;
};
static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data)
{
struct ns_get_path_bpf_map_args *args = private_data;
struct ns_common *ns;
struct net *net;
rtnl_lock();
down_read(&bpf_devs_lock);
if (args->offmap->netdev) {
args->info->ifindex = args->offmap->netdev->ifindex;
net = dev_net(args->offmap->netdev);
get_net(net);
ns = &net->ns;
} else {
args->info->ifindex = 0;
ns = NULL;
}
up_read(&bpf_devs_lock);
rtnl_unlock();
return ns;
}
int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
struct ns_get_path_bpf_map_args args = {
.offmap = map_to_offmap(map),
.info = info,
};
struct inode *ns_inode;
struct path ns_path;
void *res;
res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args);
if (IS_ERR(res)) {
if (!info->ifindex)
return -ENODEV;
return PTR_ERR(res);
}
ns_inode = ns_path.dentry->d_inode;
info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
info->netns_ino = ns_inode->i_ino;
path_put(&ns_path);
return 0;
}
bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
{
struct bpf_offloaded_map *offmap;
......
......@@ -1801,6 +1801,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
info.map_flags = map->map_flags;
memcpy(info.name, map->name, sizeof(map->name));
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_info_fill(&info, map);
if (err)
return err;
}
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
return -EFAULT;
......
......@@ -428,6 +428,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_name(json_wtr, "flags");
jsonw_printf(json_wtr, "%#x", info->map_flags);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
jsonw_uint_field(json_wtr, "bytes_key", info->key_size);
jsonw_uint_field(json_wtr, "bytes_value", info->value_size);
jsonw_uint_field(json_wtr, "max_entries", info->max_entries);
......@@ -469,7 +472,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (*info->name)
printf("name %s ", info->name);
printf("flags 0x%x\n", info->map_flags);
printf("flags 0x%x", info->map_flags);
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("\n");
printf("\tkey %uB value %uB max_entries %u",
info->key_size, info->value_size, info->max_entries);
......
......@@ -938,6 +938,9 @@ struct bpf_map_info {
__u32 max_entries;
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
......
......@@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
......
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
#include <linux/bpf.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") htab = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u32),
.value_size = sizeof(long),
.max_entries = 2,
};
struct bpf_map_def SEC("maps") array = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(long),
.max_entries = 2,
};
/* Sample program which should always load for testing control paths. */
SEC(".text") int func()
{
__u64 key64 = 0;
__u32 key = 0;
long *value;
value = bpf_map_lookup_elem(&htab, &key);
if (!value)
return 1;
value = bpf_map_lookup_elem(&array, &key64);
if (!value)
return 1;
return 0;
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment