Commit 906312ca authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'xdp_devmap'

David Ahern says:

====================
Implementation of Daniel's proposal for allowing DEVMAP entries to be
a device index, program fd pair.

Programs are run after XDP_REDIRECT and have access to both Rx device
and Tx device.

v4
- moved struct bpf_devmap_val from uapi to devmap.c, named the union
  and dropped the prefix from the elements - Jesper
- fixed 2 bugs in selftests

v3
- renamed struct to bpf_devmap_val
- used offsetofend to check for expected map size, modification of
  Toke's comment
- check for explicit value sizes
- adjusted switch statement in dev_map_run_prog per Andrii's comment
- changed SEC shortcut to xdp_devmap
- changed selftests to use skeleton and new map declaration

v2
- moved dev_map_ext_val definition to uapi to formalize the API for devmap
  extensions; add bpf_ prefix to the prog_fd and prog_id entries
- changed devmap code to handle struct in a way that it can support future
  extensions
- fixed subject in libbpf patch

v1
- fixed prog put on invalid program - Toke
- changed write value from id to fd per Toke's comments about capabilities
- add test cases
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents b36e62eb d39aec79
......@@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
......@@ -1363,6 +1364,10 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
{
return NULL;
}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
return false;
}
static inline void __dev_flush(void)
{
......
......@@ -61,12 +61,17 @@ struct xdp_rxq_info {
struct xdp_mem_info mem;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
struct xdp_txq_info {
struct net_device *dev;
};
struct xdp_buff {
void *data;
void *data_end;
void *data_meta;
void *data_hard_start;
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
};
......
......@@ -225,6 +225,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETPEERNAME,
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
__MAX_BPF_ATTACH_TYPE
};
......@@ -3706,6 +3707,8 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
__u32 egress_ifindex; /* txq->dev->ifindex */
};
enum sk_action {
......
......@@ -60,12 +60,23 @@ struct xdp_dev_bulk_queue {
unsigned int count;
};
/* DEVMAP values */
struct bpf_devmap_val {
u32 ifindex; /* device index */
union {
int fd; /* prog fd on map write */
u32 id; /* prog id on map read */
} bpf_prog;
};
struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist;
struct bpf_dtab *dtab;
struct bpf_prog *xdp_prog;
struct rcu_head rcu;
unsigned int idx;
struct bpf_devmap_val val;
};
struct bpf_dtab {
......@@ -105,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
u32 valsize = attr->value_size;
u64 cost = 0;
int err;
/* check sanity of attributes */
/* check sanity of attributes. 2 value sizes supported:
* 4 bytes: ifindex
* 8 bytes: ifindex + prog fd
*/
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
(valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return -EINVAL;
/* Lookup returns a pointer straight to dev->ifindex, so make sure the
......@@ -217,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)
hlist_for_each_entry_safe(dev, next, head, index_hlist) {
hlist_del_rcu(&dev->index_hlist);
if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
......@@ -231,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
if (!dev)
continue;
if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
......@@ -317,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
bool dev_map_can_have_prog(struct bpf_map *map)
{
if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
return true;
return false;
}
static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
struct net_device *dev = bq->dev;
......@@ -441,6 +472,33 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return bq_enqueue(dev, xdpf, dev_rx);
}
static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct xdp_txq_info txq = { .dev = dev };
u32 act;
xdp->txq = &txq;
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
return xdp;
case XDP_DROP:
break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dev, xdp_prog, act);
break;
}
xdp_return_buff(xdp);
return NULL;
}
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
......@@ -452,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{
struct net_device *dev = dst->dev;
if (dst->xdp_prog) {
xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
if (!xdp)
return 0;
}
return __xdp_enqueue(dev, xdp, dev_rx);
}
......@@ -472,18 +535,15 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
return obj ? &obj->val : NULL;
}
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
*(u32 *)key);
struct net_device *dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
return obj ? &obj->val : NULL;
}
static void __dev_map_entry_free(struct rcu_head *rcu)
......@@ -491,6 +551,8 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
struct bpf_dtab_netdev *dev;
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
......@@ -541,9 +603,10 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab *dtab,
u32 ifindex,
struct bpf_devmap_val *val,
unsigned int idx)
{
struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
......@@ -551,24 +614,46 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
if (!dev)
return ERR_PTR(-ENOMEM);
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
kfree(dev);
return ERR_PTR(-EINVAL);
dev->dev = dev_get_by_index(net, val->ifindex);
if (!dev->dev)
goto err_out;
if (val->bpf_prog.fd >= 0) {
prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
goto err_put_dev;
if (prog->expected_attach_type != BPF_XDP_DEVMAP)
goto err_put_prog;
}
dev->idx = idx;
dev->dtab = dtab;
if (prog) {
dev->xdp_prog = prog;
dev->val.bpf_prog.id = prog->aux->id;
} else {
dev->xdp_prog = NULL;
dev->val.bpf_prog.id = 0;
}
dev->val.ifindex = val->ifindex;
return dev;
err_put_prog:
bpf_prog_put(prog);
err_put_dev:
dev_put(dev->dev);
err_out:
kfree(dev);
return ERR_PTR(-EINVAL);
}
static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value;
u32 i = *(u32 *)key;
if (unlikely(map_flags > BPF_EXIST))
......@@ -578,10 +663,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
if (!ifindex) {
/* already verified value_size <= sizeof val */
memcpy(&val, value, map->value_size);
if (!val.ifindex) {
dev = NULL;
/* can not specify fd if ifindex is 0 */
if (val.bpf_prog.fd != -1)
return -EINVAL;
} else {
dev = __dev_map_alloc_node(net, dtab, ifindex, i);
dev = __dev_map_alloc_node(net, dtab, &val, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
......@@ -608,13 +699,16 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value;
u32 idx = *(u32 *)key;
unsigned long flags;
int err = -EEXIST;
if (unlikely(map_flags > BPF_EXIST || !ifindex))
/* already verified value_size <= sizeof val */
memcpy(&val, value, map->value_size);
if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
return -EINVAL;
spin_lock_irqsave(&dtab->index_lock, flags);
......@@ -623,7 +717,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
if (old_dev && (map_flags & BPF_NOEXIST))
goto out_err;
dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
dev = __dev_map_alloc_node(net, dtab, &val, idx);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out_err;
......
......@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
if (new) {
u32 i;
/* generic XDP does not work with DEVMAPs that can
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
}
}
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
......@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
bpf_prog_put(prog);
return -EINVAL;
}
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
......
......@@ -7015,6 +7015,13 @@ static bool xdp_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
switch (off) {
case offsetof(struct xdp_md, egress_ifindex):
return false;
}
}
if (type == BPF_WRITE) {
if (bpf_prog_is_dev_bound(prog->aux)) {
switch (off) {
......@@ -7985,6 +7992,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(struct xdp_rxq_info,
queue_index));
break;
case offsetof(struct xdp_md, egress_ifindex):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, txq));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
si->dst_reg, si->dst_reg,
offsetof(struct xdp_txq_info, dev));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
}
return insn - insn_buf;
......
......@@ -225,6 +225,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETPEERNAME,
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
__MAX_BPF_ATTACH_TYPE
};
......@@ -3705,6 +3706,8 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
__u32 egress_ifindex; /* txq->dev->ifindex */
};
enum sk_action {
......
......@@ -6657,6 +6657,8 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
.attach_fn = attach_iter),
BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
......
// SPDX-License-Identifier: GPL-2.0
#include <uapi/linux/bpf.h>
#include <linux/if_link.h>
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
#define IFINDEX_LO 1
struct bpf_devmap_val {
u32 ifindex; /* device index */
union {
int fd; /* prog fd on map write */
u32 id; /* prog id on map read */
} bpf_prog;
};
void test_xdp_with_devmap_helpers(void)
{
struct test_xdp_with_devmap_helpers *skel;
struct bpf_prog_info info = {};
struct bpf_devmap_val val = {
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
__u32 duration = 0, idx = 0;
int err, dm_fd, map_fd;
skel = test_xdp_with_devmap_helpers__open_and_load();
if (CHECK_FAIL(!skel)) {
perror("test_xdp_with_devmap_helpers__open_and_load");
return;
}
/* can not attach program with DEVMAPs that allow programs
* as xdp generic
*/
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Generic attach of program with 8-byte devmap",
"should have failed\n");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
if (CHECK_FAIL(err))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err, "Add program to devmap entry",
"err %d errno %d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &idx, &val);
CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
"expected %u read %u\n", info.id, val.bpf_prog.id);
/* can not attach BPF_XDP_DEVMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
"should have failed\n");
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
"should have failed\n");
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
}
void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
__u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
if (CHECK(skel,
"Load of XDP program accessing egress ifindex without attach type",
"should have failed\n")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
void test_xdp_devmap_attach(void)
{
if (test__start_subtest("DEVMAP with programs in entries"))
test_xdp_with_devmap_helpers();
if (test__start_subtest("Verifier check of DEVMAP programs"))
test_neg_xdp_devmap_helpers();
}
// SPDX-License-Identifier: GPL-2.0
/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
* because of access to egress_ifindex
*/
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
SEC("xdp_dm_log")
int xdpdm_devlog(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
unsigned int len = data_end - data;
bpf_trace_printk(fmt, sizeof(fmt),
ctx->ingress_ifindex, ctx->egress_ifindex, len);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct bpf_devmap_val));
__uint(max_entries, 4);
} dm_ports SEC(".maps");
SEC("xdp_redir")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&dm_ports, 1, 0);
}
/* invalid program on DEVMAP entry;
* SEC name means expected attach type not set
*/
SEC("xdp_dummy")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
}
/* valid program on DEVMAP entry via SEC name;
* has access to egress and ingress ifindex
*/
SEC("xdp_devmap")
int xdp_dummy_dm(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
unsigned int len = data_end - data;
bpf_trace_printk(fmt, sizeof(fmt),
ctx->ingress_ifindex, ctx->egress_ifindex, len);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment