Commit 6256f8c9 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by Stephen Hemminger

tc, bpf: finalize eBPF support for cls and act front-end

This work finalizes both eBPF front-ends for the classifier and action
part in tc, it allows for custom ELF section selection, a simplified tc
command frontend (while keeping compat), reusing of common maps between
classifier and actions residing in the same object file, and exporting
of all map fds to an eBPF agent for handing off further control in user
space.

It also adds an extensive example of how eBPF can be used, and a minimal
self-contained example agent that dumps map data. The example is well
documented and hopefully provides a good starting point into programming
cls_bpf and act_bpf.
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Acked-by: default avatarThomas Graf <tgraf@suug.ch>
Acked-by: default avatarJiri Pirko <jiri@resnulli.us>
Acked-by: default avatarJamal Hadi Salim <jhs@mojatatu.com>
parent f0eb8da5
......@@ -72,12 +72,16 @@ ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
etc. The same thing can be made with rules.
I still did not test ipchains, but they should work too.
Setup and code example of BPF classifier and action can be found under
examples/bpf/, which should explain everything for getting started.
Setup of rsvp and u32 classifiers is more hairy.
If you read RSVP specs, you will understand how rsvp classifier
works easily. What's about u32... That's example:
#! /bin/sh
TC=/home/root/tc
......
/*
* eBPF user space agent part
*
* Simple, _self-contained_ user space agent for the eBPF kernel
* ebpf_prog.c program, which gets all map fds passed from tc via unix
* domain socket in one transaction and can thus keep referencing
* them from user space in order to read out (or possibly modify)
* map data. Here, just as a minimal example to display counters.
*
* The agent only uses the bpf(2) syscall API to read or possibly
* write to eBPF maps, it doesn't need to be aware of the low-level
* bytecode parts and/or ELF parsing bits.
*
* ! For more details, see header comment in bpf_prog.c !
*
* gcc bpf_agent.c -o bpf_agent -Wall -O2
*
* For example, a more complex user space agent could run on each
* host, reading and writing into eBPF maps used by tc classifier
* and actions. It would thus allow for implementing a distributed
* tc architecture, for example, which would push down central
* policies into eBPF maps, and thus altering run-time behaviour.
*
* -- Happy eBPF hacking! ;)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdint.h>
#include <assert.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
/* Just some misc macros as min(), offsetof(), etc. */
#include "../../include/utils.h"
/* Common code from fd passing. */
#include "../../include/bpf_scm.h"
/* Common, shared definitions with ebpf_prog.c */
#include "bpf_shared.h"
/* Mini syscall wrapper */
#include "bpf_sys.h"
static void bpf_dump_drops(int fd)
{
int cpu, max;
max = sysconf(_SC_NPROCESSORS_ONLN);
printf(" `- number of drops:");
for (cpu = 0; cpu < max; cpu++) {
long drops;
assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
printf("\tcpu%d: %5ld", cpu, drops);
}
printf("\n");
}
static void bpf_dump_queue(int fd)
{
/* Just for the same of the example. */
int max_queue = 4, i;
printf(" | nic queues:");
for (i = 0; i < max_queue; i++) {
struct count_queue cq;
int ret;
memset(&cq, 0, sizeof(cq));
ret = bpf_lookup_elem(fd, &i, &cq);
assert(ret == 0 || (ret < 0 && errno == ENOENT));
printf("\tq%d:[pkts: %ld, mis: %ld]",
i, cq.total, cq.mismatch);
}
printf("\n");
}
static void bpf_dump_proto(int fd)
{
uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
char *names[] = { "tcp", "udp", "icmp" };
int i;
printf(" ` protos:");
for (i = 0; i < ARRAY_SIZE(protos); i++) {
struct count_tuple ct;
int ret;
memset(&ct, 0, sizeof(ct));
ret = bpf_lookup_elem(fd, &protos[i], &ct);
assert(ret == 0 || (ret < 0 && errno == ENOENT));
printf("\t%s:[pkts: %ld, bytes: %ld]",
names[i], ct.packets, ct.bytes);
}
printf("\n");
}
static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
{
int i, tfd[BPF_MAP_ID_MAX];
printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
aux->obj_st.st_ino, aux->num_ent);
for (i = 0; i < aux->num_ent; i++) {
printf("map%d:\n", i);
printf(" `- fd: %u\n", fds[i]);
printf(" | serial: %u\n", aux->ent[i].id);
printf(" | type: %u\n", aux->ent[i].type);
printf(" | max elem: %u\n", aux->ent[i].max_elem);
printf(" | size key: %u\n", aux->ent[i].size_key);
printf(" ` size val: %u\n", aux->ent[i].size_value);
tfd[aux->ent[i].id] = fds[i];
}
for (i = 0; i < 30; i++) {
int period = 5;
printf("data, period: %dsec\n", period);
bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
sleep(period);
}
}
static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
unsigned int entries)
{
struct bpf_map_set_msg msg;
int *cmsg_buf, min_fd, i;
char *amsg_buf, *mmsg_buf;
cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
amsg_buf = (char *)msg.aux.ent;
mmsg_buf = (char *)&msg.aux;
for (i = 0; i < entries; i += min_fd) {
struct cmsghdr *cmsg;
int ret;
min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
bpf_map_set_init_single(&msg, min_fd);
ret = recvmsg(fd, &msg.hdr, 0);
if (ret <= 0)
return ret ? : -1;
cmsg = CMSG_FIRSTHDR(&msg.hdr);
if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
return -EINVAL;
if (msg.hdr.msg_flags & MSG_CTRUNC)
return -EIO;
min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
if (min_fd > entries || min_fd <= 0)
return -1;
memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
if (i + min_fd == aux->num_ent)
break;
}
return 0;
}
int main(int argc, char **argv)
{
int fds[BPF_SCM_MAX_FDS];
struct bpf_map_aux aux;
struct sockaddr_un addr;
int fd, ret, i;
if (argc < 2) {
fprintf(stderr, "Usage: %s <path-uds>\n", argv[0]);
exit(1);
}
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (fd < 0) {
fprintf(stderr, "Cannot open socket: %s\n",
strerror(errno));
exit(1);
}
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
if (ret < 0) {
fprintf(stderr, "Cannot bind to socket: %s\n",
strerror(errno));
exit(1);
}
memset(fds, 0, sizeof(fds));
memset(&aux, 0, sizeof(aux));
ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
if (ret >= 0)
bpf_info_loop(fds, &aux);
for (i = 0; i < aux.num_ent; i++)
close(fds[i]);
close(fd);
return 0;
}
#ifndef __BPF_FUNCS__
#define __BPF_FUNCS__
/* Misc macros. */
#ifndef __maybe_unused
# define __maybe_unused __attribute__ ((__unused__))
#endif
#ifndef __section
# define __section(NAME) __attribute__((section(NAME), used))
#endif
#ifndef offsetof
# define offsetof __builtin_offsetof
#endif
#ifndef htons
# define htons(x) __constant_htons((x))
#endif
#ifndef likely
# define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
/* The verifier will translate them to actual function calls. */
static void *(*bpf_map_lookup_elem)(void *map, void *key) __maybe_unused =
(void *) BPF_FUNC_map_lookup_elem;
static int (*bpf_map_update_elem)(void *map, void *key, void *value,
unsigned long long flags) __maybe_unused =
(void *) BPF_FUNC_map_update_elem;
static int (*bpf_map_delete_elem)(void *map, void *key) __maybe_unused =
(void *) BPF_FUNC_map_delete_elem;
static unsigned int (*get_smp_processor_id)(void) __maybe_unused =
(void *) BPF_FUNC_get_smp_processor_id;
static unsigned int (*get_prandom_u32)(void) __maybe_unused =
(void *) BPF_FUNC_get_prandom_u32;
/* LLVM built-in functions that an eBPF C program may use to emit
* BPF_LD_ABS and BPF_LD_IND instructions.
*/
unsigned long long load_byte(void *skb, unsigned long long off)
asm ("llvm.bpf.load.byte");
unsigned long long load_half(void *skb, unsigned long long off)
asm ("llvm.bpf.load.half");
unsigned long long load_word(void *skb, unsigned long long off)
asm ("llvm.bpf.load.word");
#endif /* __BPF_FUNCS__ */
This diff is collapsed.
#ifndef __BPF_SHARED__
#define __BPF_SHARED__
#include <stdint.h>
#include "../../include/bpf_elf.h"
enum {
BPF_MAP_ID_PROTO,
BPF_MAP_ID_QUEUE,
BPF_MAP_ID_DROPS,
__BPF_MAP_ID_MAX,
#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
};
struct count_tuple {
long packets; /* type long for __sync_fetch_and_add() */
long bytes;
};
struct count_queue {
long total;
long mismatch;
};
#endif /* __BPF_SHARED__ */
#ifndef __BPF_SYS__
#define __BPF_SYS__
#include <sys/syscall.h>
#include <linux/bpf.h>
static inline __u64 bpf_ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
static inline int bpf_lookup_elem(int fd, void *key, void *value)
{
union bpf_attr attr = {
.map_fd = fd,
.key = bpf_ptr_to_u64(key),
.value = bpf_ptr_to_u64(value),
};
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
#endif /* __BPF_SYS__ */
#ifndef __BPF_ELF__
#define __BPF_ELF__
#include <asm/types.h>
/* Note:
*
* Below ELF section names and bpf_elf_map structure definition
* are not (!) kernel ABI. It's rather a "contract" between the
* application and the BPF loader in tc. For compatibility, the
* section names should stay as-is. Introduction of aliases, if
* needed, are a possibility, though.
*/
/* ELF section names, etc */
#define ELF_SECTION_LICENSE "license"
#define ELF_SECTION_MAPS "maps"
#define ELF_SECTION_CLASSIFIER "classifier"
#define ELF_SECTION_ACTION "action"
#define ELF_MAX_MAPS 64
#define ELF_MAX_LICENSE_LEN 128
/* ELF map definition */
struct bpf_elf_map {
__u32 type;
__u32 size_key;
__u32 size_value;
__u32 max_elem;
__u32 id;
};
#endif /* __BPF_ELF__ */
#ifndef __BPF_SCM__
#define __BPF_SCM__
#include <sys/types.h>
#include <sys/socket.h>
#include "utils.h"
#include "bpf_elf.h"
#define BPF_SCM_AUX_VER 1
#define BPF_SCM_MAX_FDS ELF_MAX_MAPS
#define BPF_SCM_MSG_SIZE 1024
struct bpf_elf_st {
dev_t st_dev;
ino_t st_ino;
};
struct bpf_map_aux {
unsigned short uds_ver;
unsigned short num_ent;
char obj_name[64];
struct bpf_elf_st obj_st;
struct bpf_elf_map ent[BPF_SCM_MAX_FDS];
};
struct bpf_map_set_msg {
struct msghdr hdr;
struct iovec iov;
char msg_buf[BPF_SCM_MSG_SIZE];
struct bpf_map_aux aux;
};
static inline int *bpf_map_set_init(struct bpf_map_set_msg *msg,
struct sockaddr_un *addr,
unsigned int addr_len)
{
const unsigned int cmsg_ctl_len = sizeof(int) * BPF_SCM_MAX_FDS;
struct cmsghdr *cmsg;
msg->iov.iov_base = &msg->aux;
msg->iov.iov_len = sizeof(msg->aux);
msg->hdr.msg_iov = &msg->iov;
msg->hdr.msg_iovlen = 1;
msg->hdr.msg_name = (struct sockaddr *)addr;
msg->hdr.msg_namelen = addr_len;
BUILD_BUG_ON(sizeof(msg->msg_buf) < cmsg_ctl_len);
msg->hdr.msg_control = &msg->msg_buf;
msg->hdr.msg_controllen = cmsg_ctl_len;
cmsg = CMSG_FIRSTHDR(&msg->hdr);
cmsg->cmsg_len = msg->hdr.msg_controllen;
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
return (int *)CMSG_DATA(cmsg);
}
static inline void bpf_map_set_init_single(struct bpf_map_set_msg *msg,
int num)
{
struct cmsghdr *cmsg;
msg->hdr.msg_controllen = CMSG_LEN(sizeof(int) * num);
msg->iov.iov_len = offsetof(struct bpf_map_aux, ent) +
sizeof(struct bpf_elf_map) * num;
cmsg = CMSG_FIRSTHDR(&msg->hdr);
cmsg->cmsg_len = msg->hdr.msg_controllen;
}
#endif /* __BPF_SCM__ */
......@@ -171,6 +171,20 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
#ifndef offsetof
# define offsetof(type, member) ((size_t) &((type *)0)->member)
#endif
#ifndef min
# define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#endif
#ifndef __check_format_string
# define __check_format_string(pos_str, pos_args) \
__attribute__ ((format (printf, (pos_str), (pos_args))))
......
......@@ -14,6 +14,7 @@
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <libgen.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
......@@ -28,22 +29,36 @@
#include "tc_util.h"
#include "tc_bpf.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
fprintf(stderr, "\n");
fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
fprintf(stderr, " [from file]: run bytecode-file FILE\n");
fprintf(stderr, " [from file]: run object-file FILE\n");
fprintf(stderr, "BPF use case:\n");
fprintf(stderr, " bytecode BPF_BYTECODE\n");
fprintf(stderr, " bytecode-file FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
fprintf(stderr, " [ classid CLASSID ]\n");
fprintf(stderr, "Common remaining options:\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
fprintf(stderr, " [ classid CLASSID ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
fprintf(stderr, "or an ELF file containing eBPF map definitions and bytecode.\n");
fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
fprintf(stderr, "\n");
fprintf(stderr, "ACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
......@@ -51,8 +66,13 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
struct tcmsg *t = NLMSG_DATA(n);
const char *bpf_uds_name = NULL;
const char *bpf_sec_name = NULL;
char *bpf_obj = NULL;
struct rtattr *tail;
bool seen_run = false;
long h = 0;
int ret = 0;
if (argc == 0)
return 0;
......@@ -68,40 +88,76 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
t->tcm_handle = h;
tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len));
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
while (argc > 0) {
if (matches(*argv, "run") == 0) {
bool from_file = true, ebpf;
struct sock_filter bpf_ops[BPF_MAXINSNS];
bool from_file, ebpf;
int ret;
NEXT_ARG();
if (strcmp(*argv, "bytecode-file") == 0) {
ebpf = false;
} else if (strcmp(*argv, "bytecode") == 0) {
opt_bpf:
bpf_sec_name = bpf_default_section(bpf_type);
ebpf = false;
seen_run = true;
if (strcmp(*argv, "bytecode-file") == 0 ||
strcmp(*argv, "bcf") == 0) {
from_file = true;
} else if (strcmp(*argv, "bytecode") == 0 ||
strcmp(*argv, "bc") == 0) {
from_file = false;
ebpf = false;
} else if (strcmp(*argv, "object-file") == 0) {
} else if (strcmp(*argv, "object-file") == 0 ||
strcmp(*argv, "obj") == 0) {
ebpf = true;
} else {
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
NEXT_ARG();
ret = ebpf ? bpf_open_object(*argv, BPF_PROG_TYPE_SCHED_CLS) :
bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ebpf) {
bpf_obj = *argv;
NEXT_ARG();
if (strcmp(*argv, "section") == 0 ||
strcmp(*argv, "sec") == 0) {
NEXT_ARG();
bpf_sec_name = *argv;
NEXT_ARG();
}
if (strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0) {
NEXT_ARG();
bpf_uds_name = *argv;
NEXT_ARG();
}
PREV_ARG();
}
ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name) :
bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
fprintf(stderr, "%s\n", ebpf ?
"Could not load object" :
"Illegal \"bytecode\"");
return -1;
}
if (ebpf) {
char bpf_name[256];
bpf_obj = basename(bpf_obj);
snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
bpf_obj, bpf_sec_name);
addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
addattrstrz(n, MAX_MSG, TCA_BPF_NAME, *argv);
addattrstrz(n, MAX_MSG, TCA_BPF_NAME, bpf_name);
} else {
addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
......@@ -109,7 +165,8 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
}
} else if (matches(*argv, "classid") == 0 ||
strcmp(*argv, "flowid") == 0) {
unsigned handle;
unsigned int handle;
NEXT_ARG();
if (get_tc_classid(&handle, *argv)) {
fprintf(stderr, "Illegal \"classid\"\n");
......@@ -134,15 +191,23 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
explain();
return -1;
} else {
if (!seen_run)
goto opt_bpf;
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
argc--; argv++;
argc--;
argv++;
}
tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
return 0;
tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
if (bpf_uds_name)
ret = bpf_handoff_map_fds(bpf_uds_name, bpf_obj);
return ret;
}
static int bpf_print_opt(struct filter_util *qu, FILE *f,
......@@ -169,9 +234,11 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
else if (tb[TCA_BPF_FD])
fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
fprintf(f, "\n");
}
if (tb[TCA_BPF_POLICE]) {
fprintf(f, "\n");
......
......@@ -7,6 +7,7 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Jiri Pirko <jiri@resnulli.us>
* Daniel Borkmann <daniel@iogearbox.net>
*/
#include <stdio.h>
......@@ -14,6 +15,8 @@
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <libgen.h>
#include <linux/bpf.h>
#include <linux/tc_act/tc_bpf.h>
#include "utils.h"
......@@ -21,16 +24,30 @@
#include "tc_util.h"
#include "tc_bpf.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
fprintf(stderr, "\n");
fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
fprintf(stderr, " [from file]: run bytecode-file FILE\n");
fprintf(stderr, "BPF use case:\n");
fprintf(stderr, " bytecode BPF_BYTECODE\n");
fprintf(stderr, " bytecode-file FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
}
static void usage(void)
......@@ -42,12 +59,17 @@ static void usage(void)
static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
int tca_id, struct nlmsghdr *n)
{
int argc = *argc_p;
char **argv = *argv_p;
char **argv = *argv_p, bpf_name[256];
struct rtattr *tail;
struct tc_act_bpf parm = { 0 };
struct sock_filter bpf_ops[BPF_MAXINSNS];
bool ebpf = false, seen_run = false;
const char *bpf_uds_name = NULL;
const char *bpf_sec_name = NULL;
char *bpf_obj = NULL;
int argc = *argc_p, ret = 0;
__u16 bpf_len = 0;
__u32 bpf_fd = 0;
if (matches(*argv, "bpf") != 0)
return -1;
......@@ -60,25 +82,70 @@ static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
int ret;
NEXT_ARG();
if (strcmp(*argv, "bytecode-file") == 0) {
opt_bpf:
bpf_sec_name = bpf_default_section(bpf_type);
seen_run = true;
if (strcmp(*argv, "bytecode-file") == 0 ||
strcmp(*argv, "bcf") == 0) {
from_file = true;
} else if (strcmp(*argv, "bytecode") == 0) {
} else if (strcmp(*argv, "bytecode") == 0 ||
strcmp(*argv, "bc") == 0) {
from_file = false;
} else if (strcmp(*argv, "object-file") == 0 ||
strcmp(*argv, "obj") == 0) {
ebpf = true;
} else {
fprintf(stderr, "unexpected \"%s\"\n", *argv);
explain();
return -1;
}
NEXT_ARG();
ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ebpf) {
bpf_obj = *argv;
NEXT_ARG();
if (strcmp(*argv, "section") == 0 ||
strcmp(*argv, "sec") == 0) {
NEXT_ARG();
bpf_sec_name = *argv;
NEXT_ARG();
}
if (strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0) {
NEXT_ARG();
bpf_uds_name = *argv;
NEXT_ARG();
}
PREV_ARG();
}
ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name) :
bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
fprintf(stderr, "Illegal \"bytecode\"\n");
fprintf(stderr, "%s\n", ebpf ?
"Could not load object" :
"Illegal \"bytecode\"");
return -1;
}
bpf_len = ret;
if (ebpf) {
bpf_obj = basename(bpf_obj);
snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
bpf_obj, bpf_sec_name);
bpf_fd = ret;
} else {
bpf_len = ret;
}
} else if (matches(*argv, "help") == 0) {
usage();
} else {
if (!seen_run)
goto opt_bpf;
break;
}
argc--;
......@@ -123,29 +190,42 @@ static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
}
}
if (!bpf_len) {
if ((!bpf_len && !ebpf) || (!bpf_fd && ebpf)) {
fprintf(stderr, "bpf: Bytecode needs to be passed\n");
explain();
return -1;
}
tail = NLMSG_TAIL(n);
addattr_l(n, MAX_MSG, tca_id, NULL, 0);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm));
addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops,
bpf_len * sizeof(struct sock_filter));
if (ebpf) {
addattr32(n, MAX_MSG, TCA_ACT_BPF_FD, bpf_fd);
addattrstrz(n, MAX_MSG, TCA_ACT_BPF_NAME, bpf_name);
} else {
addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops,
bpf_len * sizeof(struct sock_filter));
}
tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
*argc_p = argc;
*argv_p = argv;
return 0;
if (bpf_uds_name)
ret = bpf_handoff_map_fds(bpf_uds_name, bpf_obj);
return ret;
}
static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
{
struct rtattr *tb[TCA_ACT_BPF_MAX + 1];
struct tc_act_bpf *parm;
SPRINT_BUF(action_buf);
if (arg == NULL)
return -1;
......@@ -156,15 +236,25 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
fprintf(f, "[NULL bpf parameters]");
return -1;
}
parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]);
fprintf(f, " bpf ");
fprintf(f, "bpf ");
if (tb[TCA_ACT_BPF_NAME])
fprintf(f, "%s ", rta_getattr_str(tb[TCA_ACT_BPF_NAME]));
else if (tb[TCA_ACT_BPF_FD])
fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_ACT_BPF_FD]));
if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN])
if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_ACT_BPF_OPS],
rta_getattr_u16(tb[TCA_ACT_BPF_OPS_LEN]));
fprintf(f, " ");
}
fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt,
fprintf(f, "default-action %s\n", action_n2a(parm->action, action_buf,
sizeof(action_buf)));
fprintf(f, "\tindex %d ref %d bind %d", parm->index, parm->refcnt,
parm->bindcnt);
if (show_stats) {
......
This diff is collapsed.
......@@ -24,32 +24,6 @@
#include "utils.h"
/* Note:
*
* Below ELF section names and bpf_elf_map structure definition
* are not (!) kernel ABI. It's rather a "contract" between the
* application and the BPF loader in tc. For compatibility, the
* section names should stay as-is. Introduction of aliases, if
* needed, are a possibility, though.
*/
/* ELF section names, etc */
#define ELF_SECTION_LICENSE "license"
#define ELF_SECTION_MAPS "maps"
#define ELF_SECTION_CLASSIFIER "classifier"
#define ELF_SECTION_ACTION "action"
#define ELF_MAX_MAPS 64
#define ELF_MAX_LICENSE_LEN 128
/* ELF map definition */
struct bpf_elf_map {
__u32 type;
__u32 size_key;
__u32 size_value;
__u32 max_elem;
};
int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
char **bpf_string, bool *need_release,
const char separator);
......@@ -57,28 +31,40 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
bool from_file);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
const char *bpf_default_section(const enum bpf_prog_type type);
#ifdef HAVE_ELF
int bpf_open_object(const char *path, enum bpf_prog_type type,
const char *sec);
int bpf_handoff_map_fds(const char *path, const char *obj);
static inline __u64 bpf_ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
#ifdef HAVE_ELF
int bpf_open_object(const char *path, enum bpf_prog_type type);
static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
#ifdef __NR_bpf
return syscall(__NR_bpf, cmd, attr, size);
#else
fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
errno = ENOSYS;
return -1;
#endif
}
#else
static inline int bpf_open_object(const char *path, enum bpf_prog_type type)
static inline int bpf_open_object(const char *path, enum bpf_prog_type type,
const char *sec)
{
fprintf(stderr, "No ELF library support compiled in.\n");
errno = ENOSYS;
return -1;
}
static inline int bpf_handoff_map_fds(const char *path, const char *obj)
{
return 0;
}
#endif /* HAVE_ELF */
#endif /* _TC_BPF_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment