Commit 32e93fb7 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by Stephen Hemminger

{f,m}_bpf: allow for sharing maps

This larger work addresses one of the bigger remaining issues on
tc's eBPF frontend, that is, to allow for persistent file descriptors.
Whenever tc parses the ELF object, extracts and loads maps into the
kernel, these file descriptors will be out of reach after the tc
instance exits.

Meaning, for simple (unnested) programs which contain one or
multiple maps, the kernel holds a reference, and they will live
on inside the kernel until the program holding them is unloaded,
but they will be out of reach for user space, even worse with
(also multiple nested) tail calls.

For this issue, we introduced the concept of an agent that can
receive the set of file descriptors from the tc instance creating
them, in order to be able to further inspect/update map data for
a specific use case. However, while that is more tied towards
specific applications, it still doesn't easily allow for sharing
maps accross multiple tc instances and would require a daemon to
be running in the background. F.e. when a map should be shared by
two eBPF programs, one attached to ingress, one to egress, this
currently doesn't work with the tc frontend.

This work solves exactly that, i.e. if requested, maps can now be
_arbitrarily_ shared between object files (PIN_GLOBAL_NS) or within
a single object (but various program sections, PIN_OBJECT_NS) without
"loosing" the file descriptor set. To make that happen, we use eBPF
object pinning introduced in kernel commit b2197755b263 ("bpf: add
support for persistent maps/progs") for exactly this purpose.

The shipped examples/bpf/bpf_shared.c code from this patch can be
easily applied, for instance, as:

 - classifier-classifier shared:

  tc filter add dev foo parent 1: bpf obj shared.o sec egress
  tc filter add dev foo parent ffff: bpf obj shared.o sec ingress

 - classifier-action shared (here: late binding to a dummy classifier):

  tc actions add action bpf obj shared.o sec egress pass index 42
  tc filter add dev foo parent ffff: bpf obj shared.o sec ingress
  tc filter add dev foo parent 1: bpf bytecode '1,6 0 0 4294967295,' \
     action bpf index 42

The toy example increments a shared counter on egress and dumps its
value on ingress (if no sharing (PIN_NONE) would have been chosen,
map value is 0, of course, due to the two map instances being created):

  [...]
          <idle>-0     [002] ..s. 38264.788234: : map val: 4
          <idle>-0     [002] ..s. 38264.788919: : map val: 4
          <idle>-0     [002] ..s. 38264.789599: : map val: 5
  [...]

... thus if both sections reference the pinned map(s) in question,
tc will take care of fetching the appropriate file descriptor.

The patch has been tested extensively on both, classifier and
action sides.
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent e149d4e8
#ifndef __BPF_FUNCS__ #ifndef __BPF_FUNCS__
#define __BPF_FUNCS__ #define __BPF_FUNCS__
#include <stdint.h>
#include "../../include/bpf_elf.h"
/* Misc macros. */ /* Misc macros. */
#ifndef __maybe_unused #ifndef __maybe_unused
# define __maybe_unused __attribute__ ((__unused__)) # define __maybe_unused __attribute__ ((__unused__))
...@@ -43,6 +47,9 @@ static unsigned int (*get_smp_processor_id)(void) __maybe_unused = ...@@ -43,6 +47,9 @@ static unsigned int (*get_smp_processor_id)(void) __maybe_unused =
static unsigned int (*get_prandom_u32)(void) __maybe_unused = static unsigned int (*get_prandom_u32)(void) __maybe_unused =
(void *) BPF_FUNC_get_prandom_u32; (void *) BPF_FUNC_get_prandom_u32;
static int (*bpf_printk)(const char *fmt, int fmt_size, ...) __maybe_unused =
(void *) BPF_FUNC_trace_printk;
/* LLVM built-in functions that an eBPF C program may use to emit /* LLVM built-in functions that an eBPF C program may use to emit
* BPF_LD_ABS and BPF_LD_IND instructions. * BPF_LD_ABS and BPF_LD_IND instructions.
*/ */
......
#include <linux/bpf.h>
#include "bpf_funcs.h"
/* Minimal, stand-alone toy map pinning example:
*
* clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
* tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
* tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
*
* Both classifier will share the very same map instance in this example,
* so map content can be accessed from ingress *and* egress side!
*
* This example has a pinning of PIN_OBJECT_NS, so it's private and
* thus shared among various program sections within the object.
*
* A setting of PIN_GLOBAL_NS would place it into a global namespace,
* so that it can be shared among different object files. A setting
* of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
* instance is being created.
*/
struct bpf_elf_map __section("maps") map_sh = {
.type = BPF_MAP_TYPE_ARRAY,
.size_key = sizeof(int),
.size_value = sizeof(int),
.pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
.max_elem = 1,
};
__section("egress") int emain(struct __sk_buff *skb)
{
int key = 0, *val;
val = bpf_map_lookup_elem(&map_sh, &key);
if (val)
__sync_fetch_and_add(val, 1);
return -1;
}
__section("ingress") int imain(struct __sk_buff *skb)
{
char fmt[] = "map val: %d\n";
int key = 0, *val;
val = bpf_map_lookup_elem(&map_sh, &key);
if (val)
bpf_printk(fmt, sizeof(fmt), *val);
return -1;
}
char __license[] __section("license") = "GPL";
#ifndef __BPF_SHARED__ #ifndef __BPF_SHARED__
#define __BPF_SHARED__ #define __BPF_SHARED__
#include <stdint.h>
#include "../../include/bpf_elf.h"
enum { enum {
BPF_MAP_ID_PROTO, BPF_MAP_ID_PROTO,
BPF_MAP_ID_QUEUE, BPF_MAP_ID_QUEUE,
......
...@@ -21,6 +21,11 @@ ...@@ -21,6 +21,11 @@
#define ELF_MAX_MAPS 64 #define ELF_MAX_MAPS 64
#define ELF_MAX_LICENSE_LEN 128 #define ELF_MAX_LICENSE_LEN 128
/* Object pinning settings */
#define PIN_NONE 0
#define PIN_OBJECT_NS 1
#define PIN_GLOBAL_NS 2
/* ELF map definition */ /* ELF map definition */
struct bpf_elf_map { struct bpf_elf_map {
__u32 type; __u32 type;
...@@ -28,6 +33,7 @@ struct bpf_elf_map { ...@@ -28,6 +33,7 @@ struct bpf_elf_map {
__u32 size_value; __u32 size_value;
__u32 max_elem; __u32 max_elem;
__u32 id; __u32 id;
__u8 pinning;
}; };
#endif /* __BPF_ELF__ */ #endif /* __BPF_ELF__ */
...@@ -192,6 +192,9 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); ...@@ -192,6 +192,9 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
__attribute__ ((format (printf, (pos_str), (pos_args)))) __attribute__ ((format (printf, (pos_str), (pos_args))))
#endif #endif
#define _textify(x) #x
#define textify(x) _textify(x)
#define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32)) #define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32))
#define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32)) #define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32))
......
...@@ -26,7 +26,7 @@ static char *argv_default[] = { BPF_DEFAULT_CMD, NULL }; ...@@ -26,7 +26,7 @@ static char *argv_default[] = { BPF_DEFAULT_CMD, NULL };
static void explain(void) static void explain(void)
{ {
fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n\n"); fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ] [ debug ]\n\n");
fprintf(stderr, "Where UDS_FILE provides the name of a unix domain socket file\n"); fprintf(stderr, "Where UDS_FILE provides the name of a unix domain socket file\n");
fprintf(stderr, "to import eBPF maps and the optional CMD denotes the command\n"); fprintf(stderr, "to import eBPF maps and the optional CMD denotes the command\n");
fprintf(stderr, "to be executed (default: \'%s\').\n", BPF_DEFAULT_CMD); fprintf(stderr, "to be executed (default: \'%s\').\n", BPF_DEFAULT_CMD);
...@@ -58,17 +58,21 @@ static int parse_bpf(struct exec_util *eu, int argc, char **argv) ...@@ -58,17 +58,21 @@ static int parse_bpf(struct exec_util *eu, int argc, char **argv)
NEXT_ARG(); NEXT_ARG();
argv_run = argv; argv_run = argv;
break; break;
} else if (matches(*argv, "import") == 0 || } else if (matches(*argv, "import") == 0) {
matches(*argv, "imp") == 0) {
NEXT_ARG(); NEXT_ARG();
bpf_uds_name = *argv; bpf_uds_name = *argv;
} else if (matches(*argv, "debug") == 0 ||
matches(*argv, "dbg") == 0) {
if (bpf_trace_pipe())
fprintf(stderr,
"No trace pipe, tracefs not mounted?\n");
return -1;
} else { } else {
explain(); explain();
return -1; return -1;
} }
argc--; NEXT_ARG_FWD();
argv++;
} }
if (!bpf_uds_name) { if (!bpf_uds_name) {
...@@ -142,6 +146,6 @@ err: ...@@ -142,6 +146,6 @@ err:
} }
struct exec_util bpf_exec_util = { struct exec_util bpf_exec_util = {
.id = "bpf", .id = "bpf",
.parse_eopt = parse_bpf, .parse_eopt = parse_bpf,
}; };
...@@ -11,19 +11,8 @@ ...@@ -11,19 +11,8 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <syslog.h> #include <linux/bpf.h>
#include <fcntl.h>
#include <libgen.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <limits.h>
#include <linux/filter.h>
#include <linux/if.h>
#include "utils.h" #include "utils.h"
#include "tc_util.h" #include "tc_util.h"
...@@ -31,6 +20,13 @@ ...@@ -31,6 +20,13 @@
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS; static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
static const int nla_tbl[BPF_NLA_MAX] = {
[BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
[BPF_NLA_OPS] = TCA_BPF_OPS,
[BPF_NLA_FD] = TCA_BPF_FD,
[BPF_NLA_NAME] = TCA_BPF_NAME,
};
static void explain(void) static void explain(void)
{ {
fprintf(stderr, "Usage: ... bpf ...\n"); fprintf(stderr, "Usage: ... bpf ...\n");
...@@ -42,6 +38,7 @@ static void explain(void) ...@@ -42,6 +38,7 @@ static void explain(void)
fprintf(stderr, "eBPF use case:\n"); fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]"); fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]");
fprintf(stderr, " [ verbose ] [ direct-action ]\n"); fprintf(stderr, " [ verbose ] [ direct-action ]\n");
fprintf(stderr, " object-pinned FILE [ direct-action ]\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Common remaining options:\n"); fprintf(stderr, "Common remaining options:\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n"); fprintf(stderr, " [ action ACTION_SPEC ]\n");
...@@ -51,7 +48,8 @@ static void explain(void) ...@@ -51,7 +48,8 @@ static void explain(void)
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n"); fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n"); fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n"); fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type)); fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
...@@ -66,119 +64,38 @@ static void explain(void) ...@@ -66,119 +64,38 @@ static void explain(void)
static int bpf_parse_opt(struct filter_util *qu, char *handle, static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n) int argc, char **argv, struct nlmsghdr *n)
{ {
const char *bpf_obj = NULL, *bpf_uds_name = NULL;
struct tcmsg *t = NLMSG_DATA(n); struct tcmsg *t = NLMSG_DATA(n);
const char *bpf_uds_name = NULL;
const char *bpf_sec_name = NULL;
unsigned int bpf_flags = 0; unsigned int bpf_flags = 0;
char *bpf_obj = NULL;
struct rtattr *tail;
bool seen_run = false; bool seen_run = false;
long h = 0; struct rtattr *tail;
int ret = 0; int ret = 0;
if (argc == 0) if (argc == 0)
return 0; return 0;
if (handle) { if (handle) {
h = strtol(handle, NULL, 0); if (get_u32(&t->tcm_handle, handle, 0)) {
if (h == LONG_MIN || h == LONG_MAX) { fprintf(stderr, "Illegal \"handle\"\n");
fprintf(stderr, "Illegal handle \"%s\", must be "
"numeric.\n", handle);
return -1; return -1;
} }
} }
t->tcm_handle = h;
tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len)); tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len));
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0); addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
while (argc > 0) { while (argc > 0) {
if (matches(*argv, "run") == 0) { if (matches(*argv, "run") == 0) {
struct sock_filter bpf_ops[BPF_MAXINSNS];
bool from_file, ebpf, bpf_verbose;
int ret;
NEXT_ARG(); NEXT_ARG();
opt_bpf: opt_bpf:
bpf_sec_name = bpf_default_section(bpf_type);
bpf_verbose = false;
ebpf = false;
seen_run = true; seen_run = true;
if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
if (strcmp(*argv, "bytecode-file") == 0 || &bpf_obj, &bpf_uds_name, n)) {
strcmp(*argv, "bcf") == 0) { fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
from_file = true;
} else if (strcmp(*argv, "bytecode") == 0 ||
strcmp(*argv, "bc") == 0) {
from_file = false;
} else if (strcmp(*argv, "object-file") == 0 ||
strcmp(*argv, "obj") == 0) {
ebpf = true;
} else {
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
NEXT_ARG();
if (ebpf) {
bpf_uds_name = getenv(BPF_ENV_UDS);
bpf_obj = *argv;
NEXT_ARG_FWD();
if (argc > 0 &&
(strcmp(*argv, "section") == 0 ||
strcmp(*argv, "sec") == 0)) {
NEXT_ARG();
bpf_sec_name = *argv;
NEXT_ARG_FWD();
}
if (argc > 0 && !bpf_uds_name &&
(strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0)) {
NEXT_ARG();
bpf_uds_name = *argv;
NEXT_ARG_FWD();
}
if (argc > 0 &&
(strcmp(*argv, "verbose") == 0 ||
strcmp(*argv, "verb") == 0)) {
bpf_verbose = true;
NEXT_ARG_FWD();
}
PREV_ARG();
}
ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name,
bpf_verbose) :
bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
fprintf(stderr, "%s\n", ebpf ?
"Could not load object" :
"Illegal \"bytecode\"");
return -1; return -1;
} }
if (ebpf) {
char bpf_name[256];
bpf_obj = basename(bpf_obj);
snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
bpf_obj, bpf_sec_name);
addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
addattrstrz(n, MAX_MSG, TCA_BPF_NAME, bpf_name);
} else {
addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
ret * sizeof(struct sock_filter));
}
} else if (matches(*argv, "classid") == 0 || } else if (matches(*argv, "classid") == 0 ||
strcmp(*argv, "flowid") == 0) { matches(*argv, "flowid") == 0) {
unsigned int handle; unsigned int handle;
NEXT_ARG(); NEXT_ARG();
...@@ -204,7 +121,7 @@ opt_bpf: ...@@ -204,7 +121,7 @@ opt_bpf:
return -1; return -1;
} }
continue; continue;
} else if (strcmp(*argv, "help") == 0) { } else if (matches(*argv, "help") == 0) {
explain(); explain();
return -1; return -1;
} else { } else {
...@@ -280,7 +197,7 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f, ...@@ -280,7 +197,7 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
} }
struct filter_util bpf_filter_util = { struct filter_util bpf_filter_util = {
.id = "bpf", .id = "bpf",
.parse_fopt = bpf_parse_opt, .parse_fopt = bpf_parse_opt,
.print_fopt = bpf_print_opt, .print_fopt = bpf_print_opt,
}; };
...@@ -12,20 +12,23 @@ ...@@ -12,20 +12,23 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <libgen.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/tc_act/tc_bpf.h> #include <linux/tc_act/tc_bpf.h>
#include "utils.h" #include "utils.h"
#include "rt_names.h"
#include "tc_util.h" #include "tc_util.h"
#include "tc_bpf.h" #include "tc_bpf.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT; static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
static const int nla_tbl[BPF_NLA_MAX] = {
[BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN,
[BPF_NLA_OPS] = TCA_ACT_BPF_OPS,
[BPF_NLA_FD] = TCA_ACT_BPF_FD,
[BPF_NLA_NAME] = TCA_ACT_BPF_NAME,
};
static void explain(void) static void explain(void)
{ {
fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n"); fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n");
...@@ -37,12 +40,14 @@ static void explain(void) ...@@ -37,12 +40,14 @@ static void explain(void)
fprintf(stderr, "eBPF use case:\n"); fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]"); fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]");
fprintf(stderr, " [ verbose ]\n"); fprintf(stderr, " [ verbose ]\n");
fprintf(stderr, " object-pinned FILE\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n"); fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n"); fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n"); fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type)); fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
...@@ -54,114 +59,40 @@ static void explain(void) ...@@ -54,114 +59,40 @@ static void explain(void)
fprintf(stderr, "explicitly specifies an action index upon creation.\n"); fprintf(stderr, "explicitly specifies an action index upon creation.\n");
} }
static void usage(void) static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
int tca_id, struct nlmsghdr *n)
{ {
explain(); const char *bpf_obj = NULL, *bpf_uds_name = NULL;
exit(-1); struct tc_act_bpf parm;
} bool seen_run = false;
static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
int tca_id, struct nlmsghdr *n)
{
char **argv = *argv_p, bpf_name[256];
struct rtattr *tail; struct rtattr *tail;
struct tc_act_bpf parm = { 0 }; int argc, ret = 0;
struct sock_filter bpf_ops[BPF_MAXINSNS]; char **argv;
bool ebpf_fill = false, bpf_fill = false;
bool ebpf = false, seen_run = false; argv = *ptr_argv;
const char *bpf_uds_name = NULL; argc = *ptr_argc;
const char *bpf_sec_name = NULL;
char *bpf_obj = NULL;
int argc = *argc_p, ret = 0;
__u16 bpf_len = 0;
__u32 bpf_fd = 0;
if (matches(*argv, "bpf") != 0) if (matches(*argv, "bpf") != 0)
return -1; return -1;
NEXT_ARG(); NEXT_ARG();
tail = NLMSG_TAIL(n);
addattr_l(n, MAX_MSG, tca_id, NULL, 0);
while (argc > 0) { while (argc > 0) {
if (matches(*argv, "run") == 0) { if (matches(*argv, "run") == 0) {
bool from_file, bpf_verbose;
int ret;
NEXT_ARG(); NEXT_ARG();
opt_bpf: opt_bpf:
bpf_sec_name = bpf_default_section(bpf_type);
bpf_verbose = false;
seen_run = true; seen_run = true;
if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
if (strcmp(*argv, "bytecode-file") == 0 || &bpf_obj, &bpf_uds_name, n)) {
strcmp(*argv, "bcf") == 0) { fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
from_file = true;
} else if (strcmp(*argv, "bytecode") == 0 ||
strcmp(*argv, "bc") == 0) {
from_file = false;
} else if (strcmp(*argv, "object-file") == 0 ||
strcmp(*argv, "obj") == 0) {
ebpf = true;
} else {
fprintf(stderr, "unexpected \"%s\"\n", *argv);
explain();
return -1; return -1;
} }
NEXT_ARG();
if (ebpf) {
bpf_uds_name = getenv(BPF_ENV_UDS);
bpf_obj = *argv;
NEXT_ARG_FWD();
if (argc > 0 &&
(strcmp(*argv, "section") == 0 ||
strcmp(*argv, "sec") == 0)) {
NEXT_ARG();
bpf_sec_name = *argv;
NEXT_ARG_FWD();
}
if (argc > 0 && !bpf_uds_name &&
(strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0)) {
NEXT_ARG();
bpf_uds_name = *argv;
NEXT_ARG_FWD();
}
if (argc > 0 &&
(strcmp(*argv, "verbose") == 0 ||
strcmp(*argv, "verb") == 0)) {
bpf_verbose = true;
NEXT_ARG_FWD();
}
PREV_ARG();
}
ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name,
bpf_verbose) :
bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
fprintf(stderr, "%s\n", ebpf ?
"Could not load object" :
"Illegal \"bytecode\"");
return -1;
}
if (ebpf) {
bpf_obj = basename(bpf_obj);
snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
bpf_obj, bpf_sec_name);
bpf_fd = ret;
ebpf_fill = true;
} else {
bpf_len = ret;
bpf_fill = true;
}
} else if (matches(*argv, "help") == 0) { } else if (matches(*argv, "help") == 0) {
usage(); explain();
return -1;
} else if (matches(*argv, "index") == 0) { } else if (matches(*argv, "index") == 0) {
break; break;
} else { } else {
...@@ -173,7 +104,9 @@ opt_bpf: ...@@ -173,7 +104,9 @@ opt_bpf:
NEXT_ARG_FWD(); NEXT_ARG_FWD();
} }
memset(&parm, 0, sizeof(parm));
parm.action = TC_ACT_PIPE; parm.action = TC_ACT_PIPE;
if (argc) { if (argc) {
if (matches(*argv, "reclassify") == 0) { if (matches(*argv, "reclassify") == 0) {
parm.action = TC_ACT_RECLASSIFY; parm.action = TC_ACT_RECLASSIFY;
...@@ -207,32 +140,19 @@ opt_bpf: ...@@ -207,32 +140,19 @@ opt_bpf:
} }
} }
tail = NLMSG_TAIL(n);
addattr_l(n, MAX_MSG, tca_id, NULL, 0);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm)); addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm));
if (ebpf_fill) {
addattr32(n, MAX_MSG, TCA_ACT_BPF_FD, bpf_fd);
addattrstrz(n, MAX_MSG, TCA_ACT_BPF_NAME, bpf_name);
} else if (bpf_fill) {
addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops,
bpf_len * sizeof(struct sock_filter));
}
tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
*argc_p = argc;
*argv_p = argv;
if (bpf_uds_name) if (bpf_uds_name)
ret = bpf_send_map_fds(bpf_uds_name, bpf_obj); ret = bpf_send_map_fds(bpf_uds_name, bpf_obj);
*ptr_argc = argc;
*ptr_argv = argv;
return ret; return ret;
} }
static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
{ {
struct rtattr *tb[TCA_ACT_BPF_MAX + 1]; struct rtattr *tb[TCA_ACT_BPF_MAX + 1];
struct tc_act_bpf *parm; struct tc_act_bpf *parm;
...@@ -249,7 +169,6 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) ...@@ -249,7 +169,6 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
} }
parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]);
fprintf(f, "bpf "); fprintf(f, "bpf ");
if (tb[TCA_ACT_BPF_NAME]) if (tb[TCA_ACT_BPF_NAME])
...@@ -276,12 +195,11 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) ...@@ -276,12 +195,11 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
} }
fprintf(f, "\n "); fprintf(f, "\n ");
return 0; return 0;
} }
struct action_util bpf_action_util = { struct action_util bpf_action_util = {
.id = "bpf", .id = "bpf",
.parse_aopt = parse_bpf, .parse_aopt = bpf_parse_opt,
.print_aopt = print_bpf, .print_aopt = bpf_print_opt,
}; };
This diff is collapsed.
...@@ -13,61 +13,56 @@ ...@@ -13,61 +13,56 @@
#ifndef _TC_BPF_H_ #ifndef _TC_BPF_H_
#define _TC_BPF_H_ 1 #define _TC_BPF_H_ 1
#include <linux/filter.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <sys/syscall.h> #include <linux/magic.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include "utils.h" #include "utils.h"
#include "bpf_scm.h" #include "bpf_scm.h"
enum {
BPF_NLA_OPS_LEN = 0,
BPF_NLA_OPS,
BPF_NLA_FD,
BPF_NLA_NAME,
__BPF_NLA_MAX,
};
#define BPF_NLA_MAX __BPF_NLA_MAX
#define BPF_ENV_UDS "TC_BPF_UDS" #define BPF_ENV_UDS "TC_BPF_UDS"
#define BPF_ENV_MNT "TC_BPF_MNT"
#define BPF_ENV_NOLOG "TC_BPF_NOLOG"
int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, #ifndef BPF_FS_MAGIC
char **bpf_string, bool *need_release, # define BPF_FS_MAGIC 0xcafe4a11
const char separator); #endif
int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
bool from_file);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
#define BPF_DIR_MNT "/sys/fs/bpf"
#define BPF_DIR_TC "tc"
#define BPF_DIR_GLOBALS "globals"
#ifndef TRACEFS_MAGIC
# define TRACEFS_MAGIC 0x74726163
#endif
#define TRACE_DIR_MNT "/sys/kernel/tracing"
int bpf_trace_pipe(void);
const char *bpf_default_section(const enum bpf_prog_type type); const char *bpf_default_section(const enum bpf_prog_type type);
#ifdef HAVE_ELF int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
int bpf_open_object(const char *path, enum bpf_prog_type type, enum bpf_prog_type type, const char **ptr_object,
const char *sec, bool verbose); const char **ptr_uds_name, struct nlmsghdr *n);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
#ifdef HAVE_ELF
int bpf_send_map_fds(const char *path, const char *obj); int bpf_send_map_fds(const char *path, const char *obj);
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
unsigned int entries); unsigned int entries);
static inline __u64 bpf_ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
#ifdef __NR_bpf
return syscall(__NR_bpf, cmd, attr, size);
#else #else
fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
errno = ENOSYS;
return -1;
#endif
}
#else
static inline int bpf_open_object(const char *path, enum bpf_prog_type type,
const char *sec, bool verbose)
{
fprintf(stderr, "No ELF library support compiled in.\n");
errno = ENOSYS;
return -1;
}
static inline int bpf_send_map_fds(const char *path, const char *obj) static inline int bpf_send_map_fds(const char *path, const char *obj)
{ {
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment