Commit eefe06e8 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-prog-testing-framework'

Alexei Starovoitov says:

====================
bpf: program testing framework

Development and testing of networking bpf programs is quite cumbersome.
Especially tricky are XDP programs that attach to real netdevices and
program development feels like working on the car engine while
the car is in motion.
Another problem is ongoing changes to upstream llvm core
that can introduce an optimization that verifier will not
recognize. llvm bpf backend tests have no ability to run the programs.
To improve this situation introduce BPF_PROG_TEST_RUN command
to test and performance benchmark bpf programs.
It achieves several goals:
- development of xdp and skb based bpf programs can be done
in a canned environment with unit tests
- program performance optimizations can be benchmarked outside of
networking core (without driver and skb costs)
- continuous testing of upstream changes is finally practical

Patches 4,5,6 add C based test cases of various complexity
to cover some sched_cls and xdp features. More tests will
be added in the future. The tests were run on centos7 only.

For now the framework supports only skb and xdp programs. In the future
it can be extended to socket_filter and tracing program types.

More details are in individual patches.

v1->v2:
- rename bpf_program_test_run->bpf_prog_test_run
- add missing #include <linux/bpf.h> since libbpf.h shouldn't depend
on prior includes
- reordered patches 3 and 4 to keep bisect clean
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 98cd1552 37821613
......@@ -169,6 +169,8 @@ struct bpf_verifier_ops {
const struct bpf_insn *src,
struct bpf_insn *dst,
struct bpf_prog *prog);
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
};
struct bpf_prog_type_list {
......@@ -233,6 +235,11 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
......
......@@ -81,6 +81,7 @@ enum bpf_cmd {
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
};
enum bpf_map_type {
......@@ -189,6 +190,17 @@ union bpf_attr {
__u32 attach_type;
__u32 attach_flags;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
} test;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......
......@@ -973,6 +973,28 @@ static int bpf_prog_detach(const union bpf_attr *attr)
}
#endif /* CONFIG_CGROUP_BPF */
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_prog *prog;
int ret = -ENOTSUPP;
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
prog = bpf_prog_get(attr->test.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
if (prog->aux->ops->test_run)
ret = prog->aux->ops->test_run(prog, attr, uattr);
bpf_prog_put(prog);
return ret;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
......@@ -1039,7 +1061,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
#ifdef CONFIG_CGROUP_BPF
case BPF_PROG_ATTACH:
err = bpf_prog_attach(&attr);
......@@ -1048,7 +1069,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = bpf_prog_detach(&attr);
break;
#endif
case BPF_PROG_TEST_RUN:
err = bpf_prog_test_run(&attr, uattr);
break;
default:
err = -EINVAL;
break;
......
......@@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
......
obj-y := test_run.o
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/sched/signal.h>
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
{
u32 ret;
preempt_disable();
rcu_read_lock();
ret = BPF_PROG_RUN(prog, ctx);
rcu_read_unlock();
preempt_enable();
return ret;
}
static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
{
u64 time_start, time_spent = 0;
u32 ret = 0, i;
if (!repeat)
repeat = 1;
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
ret = bpf_test_run_one(prog, ctx);
if (need_resched()) {
if (signal_pending(current))
break;
time_spent += ktime_get_ns() - time_start;
cond_resched();
time_start = ktime_get_ns();
}
}
time_spent += ktime_get_ns() - time_start;
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
return ret;
}
static int bpf_test_finish(union bpf_attr __user *uattr, const void *data,
u32 size, u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(uattr->test.data_out);
int err = -EFAULT;
if (data_out && copy_to_user(data_out, data, size))
goto out;
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
goto out;
if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
goto out;
err = 0;
out:
return err;
}
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)
return ERR_PTR(-ENOMEM);
if (copy_from_user(data + headroom, data_in, size)) {
kfree(data);
return ERR_PTR(-EFAULT);
}
return data;
}
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
u32 retval, duration;
struct sk_buff *skb;
void *data;
int ret;
data = bpf_test_init(kattr, size, NET_SKB_PAD,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
is_l2 = true;
/* fall through */
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
is_direct_pkt_access = true;
break;
default:
break;
}
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
return -ENOMEM;
}
skb_reserve(skb, NET_SKB_PAD);
__skb_put(skb, size);
skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
skb_reset_network_header(skb);
if (is_l2)
__skb_push(skb, ETH_HLEN);
if (is_direct_pkt_access)
bpf_compute_data_end(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
if (!is_l2)
__skb_push(skb, ETH_HLEN);
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
ret = bpf_test_finish(uattr, skb->data, size, retval, duration);
kfree_skb(skb);
return ret;
}
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct xdp_buff xdp = {};
u32 retval, duration;
void *data;
int ret;
data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0);
if (IS_ERR(data))
return PTR_ERR(data);
xdp.data_hard_start = data;
xdp.data = data + XDP_PACKET_HEADROOM;
xdp.data_end = xdp.data + size;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
if (xdp.data != data + XDP_PACKET_HEADROOM)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(uattr, xdp.data, size, retval, duration);
kfree(data);
return ret;
}
......@@ -3309,24 +3309,28 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops xdp_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
.test_run = bpf_prog_test_run_xdp,
};
static const struct bpf_verifier_ops cg_skb_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops lwt_inout_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops lwt_xmit_ops = {
......@@ -3334,6 +3338,7 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops cg_sock_ops = {
......
......@@ -81,6 +81,7 @@ enum bpf_cmd {
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
};
enum bpf_map_type {
......@@ -189,6 +190,17 @@ union bpf_attr {
__u32 attach_type;
__u32 attach_flags;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
} test;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -459,6 +471,18 @@ union bpf_attr {
* Return:
* > 0 length of the string including the trailing NUL on success
* < 0 error
*
* u64 bpf_bpf_get_socket_cookie(skb)
* Get the cookie for the socket stored inside sk_buff.
* @skb: pointer to skb
* Return: 8 Bytes non-decreasing number on success or 0 if the socket
* field is missing inside sk_buff
*
* u32 bpf_get_socket_uid(skb)
* Get the owner uid of the socket stored inside sk_buff.
* @skb: pointer to skb
* Return: uid of the socket owner on success or 0 if the socket pointer
* inside sk_buff is NULL
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......
......@@ -209,3 +209,27 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,
__u32 *duration)
{
union bpf_attr attr;
int ret;
bzero(&attr, sizeof(attr));
attr.test.prog_fd = prog_fd;
attr.test.data_in = ptr_to_u64(data);
attr.test.data_out = ptr_to_u64(data_out);
attr.test.data_size_in = size;
attr.test.repeat = repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
if (size_out)
*size_out = attr.test.data_size_out;
if (retval)
*retval = attr.test.retval;
if (duration)
*duration = attr.test.duration;
return ret;
}
......@@ -47,6 +47,8 @@ int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,
__u32 *duration);
#endif
......@@ -1618,8 +1618,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
return fd;
}
static void bpf_program__set_type(struct bpf_program *prog,
enum bpf_prog_type type)
void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
prog->type = type;
}
......
......@@ -25,6 +25,7 @@
#include <stdint.h>
#include <stdbool.h>
#include <sys/types.h> // for size_t
#include <linux/bpf.h>
enum libbpf_errno {
__LIBBPF_ERRNO__START = 4000,
......@@ -185,6 +186,7 @@ int bpf_program__set_sched_cls(struct bpf_program *prog);
int bpf_program__set_sched_act(struct bpf_program *prog);
int bpf_program__set_xdp(struct bpf_program *prog);
int bpf_program__set_perf_event(struct bpf_program *prog);
void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
bool bpf_program__is_socket_filter(struct bpf_program *prog);
bool bpf_program__is_tracepoint(struct bpf_program *prog);
......
LIBDIR := ../../../lib
BPFDIR := $(LIBDIR)/bpf
CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
LDLIBS += -lcap
CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) -I../../../include
LDLIBS += -lcap -lelf
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o
TEST_PROGS := test_kmod.sh
include ../lib.mk
BPFOBJ := $(OUTPUT)/bpf.o
BPFOBJ := $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS): $(BPFOBJ)
......@@ -21,3 +23,10 @@ force:
$(BPFOBJ): force
$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
CLANG ?= clang
%.o: %.c
$(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \
-D__x86_64__ -Wno-compare-distinct-pointer-types \
-O2 -target bpf -c $< -o $@
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef _TEST_IPTNL_COMMON_H
#define _TEST_IPTNL_COMMON_H
#include <linux/types.h>
#define MAX_IPTNL_ENTRIES 256U
struct vip {
union {
__u32 v6[4];
__u32 v4;
} daddr;
__u16 dport;
__u16 family;
__u8 protocol;
};
struct iptnl_info {
union {
__u32 v6[4];
__u32 v4;
} saddr;
union {
__u32 v6[4];
__u32 v4;
} daddr;
__u16 family;
__u8 dmac[6];
};
#endif
This diff is collapsed.
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include "bpf_helpers.h"
#define _htons __builtin_bswap16
#define barrier() __asm__ __volatile__("": : :"memory")
int _version SEC("version") = 1;
SEC("test1")
int process(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct ethhdr *eth = (struct ethhdr *)(data);
struct tcphdr *tcp = NULL;
__u8 proto = 255;
__u64 ihl_len;
if (eth + 1 > data_end)
return TC_ACT_SHOT;
if (eth->h_proto == _htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(eth + 1);
if (iph + 1 > data_end)
return TC_ACT_SHOT;
ihl_len = iph->ihl * 4;
proto = iph->protocol;
tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
} else if (eth->h_proto == _htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
if (ip6h + 1 > data_end)
return TC_ACT_SHOT;
ihl_len = sizeof(*ip6h);
proto = ip6h->nexthdr;
tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
}
if (tcp) {
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
barrier(); /* to force ordering of checks */
if (((void *)(tcp) + 18) > data_end)
return TC_ACT_SHOT;
if (tcp->urg_ptr == 123)
return TC_ACT_OK;
}
return TC_ACT_UNSPEC;
}
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <linux/types.h>
typedef __u16 __sum16;
#include <arpa/inet.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <linux/bpf.h>
#include <linux/err.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "test_iptunnel_common.h"
#include "bpf_util.h"
#define _htons __builtin_bswap16
static int error_cnt, pass_cnt;
#define MAGIC_BYTES 123
/* ipv4 test vector */
static struct {
struct ethhdr eth;
struct iphdr iph;
struct tcphdr tcp;
} __packed pkt_v4 = {
.eth.h_proto = _htons(ETH_P_IP),
.iph.ihl = 5,
.iph.protocol = 6,
.iph.tot_len = _htons(MAGIC_BYTES),
.tcp.urg_ptr = 123,
};
/* ipv6 test vector */
static struct {
struct ethhdr eth;
struct ipv6hdr iph;
struct tcphdr tcp;
} __packed pkt_v6 = {
.eth.h_proto = _htons(ETH_P_IPV6),
.iph.nexthdr = 6,
.iph.payload_len = _htons(MAGIC_BYTES),
.tcp.urg_ptr = 123,
};
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
error_cnt++; \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
pass_cnt++; \
printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
} \
})
static int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
struct bpf_program *prog;
struct bpf_object *obj;
int err;
obj = bpf_object__open(file);
if (IS_ERR(obj)) {
error_cnt++;
return -ENOENT;
}
prog = bpf_program__next(NULL, obj);
if (!prog) {
bpf_object__close(obj);
error_cnt++;
return -ENOENT;
}
bpf_program__set_type(prog, type);
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
error_cnt++;
return -EINVAL;
}
*pobj = obj;
*prog_fd = bpf_program__fd(prog);
return 0;
}
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
printf("%s:FAIL:map '%s' not found\n", test, name);
error_cnt++;
return -1;
}
return bpf_map__fd(map);
}
static void test_pkt_access(void)
{
const char *file = "./test_pkt_access.o";
struct bpf_object *obj;
__u32 duration, retval;
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (err)
return;
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
CHECK(err || errno || retval, "ipv4",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || errno || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
bpf_object__close(obj);
}
static void test_xdp(void)
{
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct vip key6 = {.protocol = 6, .family = AF_INET6};
struct iptnl_info value4 = {.family = AF_INET};
struct iptnl_info value6 = {.family = AF_INET6};
const char *file = "./test_xdp.o";
struct bpf_object *obj;
char buf[128];
struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
__u32 duration, retval, size;
int err, prog_fd, map_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (err)
return;
map_fd = bpf_find_map(__func__, obj, "vip2tnl");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &key4, &value4, 0);
bpf_map_update_elem(map_fd, &key6, &value6, 0);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_TX || size != 74 ||
iph->protocol != IPPROTO_IPIP, "ipv4",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_TX || size != 114 ||
iph6->nexthdr != IPPROTO_IPV6, "ipv6",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
out:
bpf_object__close(obj);
}
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
static void test_l4lb(void)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
const char *file = "./test_l4lb.o";
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
__u32 vip_num;
} value = {.vip_num = VIP_NUM};
__u32 stats_key = VIP_NUM;
struct vip_stats {
__u64 bytes;
__u64 pkts;
} stats[nr_cpus];
struct real_definition {
union {
__be32 dst;
__be32 dstv6[4];
};
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
__u32 duration, retval, size;
int err, i, prog_fd, map_fd;
__u64 bytes = 0, pkts = 0;
struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (err)
return;
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &key, &value, 0);
map_fd = bpf_find_map(__func__, obj, "ch_rings");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
map_fd = bpf_find_map(__func__, obj, "reals");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
map_fd = bpf_find_map(__func__, obj, "stats");
if (map_fd < 0)
goto out;
bpf_map_lookup_elem(map_fd, &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
error_cnt++;
printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
}
out:
bpf_object__close(obj);
}
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
test_pkt_access();
test_xdp();
test_l4lb();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return 0;
}
/* Copyright (c) 2016,2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "test_iptunnel_common.h"
#define htons __builtin_bswap16
#define ntohs __builtin_bswap16
int _version SEC("version") = 1;
struct bpf_map_def SEC("maps") rxcnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 256,
};
struct bpf_map_def SEC("maps") vip2tnl = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(struct vip),
.value_size = sizeof(struct iptnl_info),
.max_entries = MAX_IPTNL_ENTRIES,
};
static __always_inline void count_tx(__u32 protocol)
{
__u64 *rxcnt_count;
rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
if (rxcnt_count)
*rxcnt_count += 1;
}
static __always_inline int get_dport(void *trans_data, void *data_end,
__u8 protocol)
{
struct tcphdr *th;
struct udphdr *uh;
switch (protocol) {
case IPPROTO_TCP:
th = (struct tcphdr *)trans_data;
if (th + 1 > data_end)
return -1;
return th->dest;
case IPPROTO_UDP:
uh = (struct udphdr *)trans_data;
if (uh + 1 > data_end)
return -1;
return uh->dest;
default:
return 0;
}
}
static __always_inline void set_ethhdr(struct ethhdr *new_eth,
const struct ethhdr *old_eth,
const struct iptnl_info *tnl,
__be16 h_proto)
{
memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
new_eth->h_proto = h_proto;
}
static __always_inline int handle_ipv4(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
struct iphdr *iph = data + sizeof(struct ethhdr);
__u16 *next_iph;
__u16 payload_len;
struct vip vip = {};
int dport;
__u32 csum = 0;
int i;
if (iph + 1 > data_end)
return XDP_DROP;
dport = get_dport(iph + 1, data_end, iph->protocol);
if (dport == -1)
return XDP_DROP;
vip.protocol = iph->protocol;
vip.family = AF_INET;
vip.daddr.v4 = iph->daddr;
vip.dport = dport;
payload_len = ntohs(iph->tot_len);
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v4-in-v4 */
if (!tnl || tnl->family != AF_INET)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
return XDP_DROP;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
iph = data + sizeof(*new_eth);
old_eth = data + sizeof(*iph);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end ||
iph + 1 > data_end)
return XDP_DROP;
set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
iph->version = 4;
iph->ihl = sizeof(*iph) >> 2;
iph->frag_off = 0;
iph->protocol = IPPROTO_IPIP;
iph->check = 0;
iph->tos = 0;
iph->tot_len = htons(payload_len + sizeof(*iph));
iph->daddr = tnl->daddr.v4;
iph->saddr = tnl->saddr.v4;
iph->ttl = 8;
next_iph = (__u16 *)iph;
#pragma clang loop unroll(full)
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
count_tx(vip.protocol);
return XDP_TX;
}
static __always_inline int handle_ipv6(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
__u16 payload_len;
struct vip vip = {};
int dport;
if (ip6h + 1 > data_end)
return XDP_DROP;
dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
if (dport == -1)
return XDP_DROP;
vip.protocol = ip6h->nexthdr;
vip.family = AF_INET6;
memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
vip.dport = dport;
payload_len = ip6h->payload_len;
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v6-in-v6 */
if (!tnl || tnl->family != AF_INET6)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
return XDP_DROP;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
ip6h = data + sizeof(*new_eth);
old_eth = data + sizeof(*ip6h);
if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
ip6h + 1 > data_end)
return XDP_DROP;
set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
ip6h->version = 6;
ip6h->priority = 0;
memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
ip6h->nexthdr = IPPROTO_IPV6;
ip6h->hop_limit = 8;
memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
count_tx(vip.protocol);
return XDP_TX;
}
SEC("xdp_tx_iptunnel")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct ethhdr *eth = data;
__u16 h_proto;
if (eth + 1 > data_end)
return XDP_DROP;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
return handle_ipv4(xdp);
else if (h_proto == htons(ETH_P_IPV6))
return handle_ipv6(xdp);
else
return XDP_DROP;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment