Commit b896c4f9 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Ingo Molnar

samples/bpf: Add simple non-portable kprobe filter example

tracex1_kern.c - C program compiled into BPF.

It attaches to kprobe:netif_receive_skb()

When skb->dev->name == "lo", it prints sample debug message into
trace_pipe via bpf_trace_printk() helper function.

tracex1_user.c - corresponding user space component that:
  - loads BPF program via bpf() syscall
  - opens kprobes:netif_receive_skb event via perf_event_open()
    syscall
  - attaches the program to event via ioctl(event_fd,
    PERF_EVENT_IOC_SET_BPF, prog_fd);
  - prints from trace_pipe

Note, this BPF program is non-portable. It must be recompiled
with current kernel headers. kprobe is not a stable ABI and
BPF+kprobe scripts may no longer be meaningful when kernel
internals change.

No matter in what way the kernel changes, neither the kprobe,
nor the BPF program can ever crash or corrupt the kernel,
assuming the kprobes, perf and BPF subsystem has no bugs.

The verifier will detect that the program is using
bpf_trace_printk() and the kernel will print 'this is a DEBUG
kernel' warning banner, which means that bpf_trace_printk()
should be used for debugging of the BPF program only.

Usage:
$ sudo tracex1
            ping-19826 [000] d.s2 63103.382648: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63103.382684: : skb ffff880466b1d300 len 84

            ping-19826 [000] d.s2 63104.382533: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63104.382594: : skb ffff880466b1d300 len 84
Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1427312966-8434-7-git-send-email-ast@plumgrid.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 9c959c86
...@@ -6,23 +6,27 @@ hostprogs-y := test_verifier test_maps ...@@ -6,23 +6,27 @@ hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example hostprogs-y += sock_example
hostprogs-y += sockex1 hostprogs-y += sockex1
hostprogs-y += sockex2 hostprogs-y += sockex2
hostprogs-y += tracex1
test_verifier-objs := test_verifier.o libbpf.o test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o sock_example-objs := sock_example.o libbpf.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always := $(hostprogs-y) always := $(hostprogs-y)
always += sockex1_kern.o always += sockex1_kern.o
always += sockex2_kern.o always += sockex2_kern.o
always += tracex1_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf HOSTLOADLIBES_sockex2 += -lelf
HOSTLOADLIBES_tracex1 += -lelf
# point this to your LLVM backend with bpf support # point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
......
...@@ -15,6 +15,12 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value, ...@@ -15,6 +15,12 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
(void *) BPF_FUNC_map_update_elem; (void *) BPF_FUNC_map_update_elem;
static int (*bpf_map_delete_elem)(void *map, void *key) = static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem; (void *) BPF_FUNC_map_delete_elem;
static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
static unsigned long long (*bpf_ktime_get_ns)(void) =
(void *) BPF_FUNC_ktime_get_ns;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) BPF_FUNC_trace_printk;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
......
...@@ -8,29 +8,70 @@ ...@@ -8,29 +8,70 @@
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_helpers.h" #include "bpf_helpers.h"
#include "bpf_load.h" #include "bpf_load.h"
#define DEBUGFS "/sys/kernel/debug/tracing/"
static char license[128]; static char license[128];
static int kern_version;
static bool processed_sec[128]; static bool processed_sec[128];
int map_fd[MAX_MAPS]; int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS]; int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt; int prog_cnt;
static int load_and_attach(const char *event, struct bpf_insn *prog, int size) static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{ {
int fd;
bool is_socket = strncmp(event, "socket", 6) == 0; bool is_socket = strncmp(event, "socket", 6) == 0;
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
enum bpf_prog_type prog_type;
char buf[256];
int fd, efd, err, id;
struct perf_event_attr attr = {};
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
if (is_socket) {
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
} else if (is_kprobe || is_kretprobe) {
prog_type = BPF_PROG_TYPE_KPROBE;
} else {
printf("Unknown event '%s'\n", event);
return -1;
}
if (!is_socket) if (is_kprobe || is_kretprobe) {
/* tracing events tbd */ if (is_kprobe)
event += 7;
else
event += 10;
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
is_kprobe ? 'p' : 'r', event, event);
err = system(buf);
if (err < 0) {
printf("failed to create kprobe '%s' error '%s'\n",
event, strerror(errno));
return -1; return -1;
}
}
fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
prog, size, license);
if (fd < 0) { if (fd < 0) {
printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
...@@ -39,6 +80,41 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ...@@ -39,6 +80,41 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_fd[prog_cnt++] = fd; prog_fd[prog_cnt++] = fd;
if (is_socket)
return 0;
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event);
strcat(buf, "/id");
efd = open(buf, O_RDONLY, 0);
if (efd < 0) {
printf("failed to open event %s\n", event);
return -1;
}
err = read(efd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
printf("read from '%s' failed '%s'\n", event, strerror(errno));
return -1;
}
close(efd);
buf[err] = 0;
id = atoi(buf);
attr.config = id;
efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
if (efd < 0) {
printf("event %d fd %d err %s\n", id, efd, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
return 0; return 0;
} }
...@@ -135,6 +211,9 @@ int load_bpf_file(char *path) ...@@ -135,6 +211,9 @@ int load_bpf_file(char *path)
if (gelf_getehdr(elf, &ehdr) != &ehdr) if (gelf_getehdr(elf, &ehdr) != &ehdr)
return 1; return 1;
/* clear all kprobes */
i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
/* scan over all elf sections to get license and map info */ /* scan over all elf sections to get license and map info */
for (i = 1; i < ehdr.e_shnum; i++) { for (i = 1; i < ehdr.e_shnum; i++) {
...@@ -149,6 +228,14 @@ int load_bpf_file(char *path) ...@@ -149,6 +228,14 @@ int load_bpf_file(char *path)
if (strcmp(shname, "license") == 0) { if (strcmp(shname, "license") == 0) {
processed_sec[i] = true; processed_sec[i] = true;
memcpy(license, data->d_buf, data->d_size); memcpy(license, data->d_buf, data->d_size);
} else if (strcmp(shname, "version") == 0) {
processed_sec[i] = true;
if (data->d_size != sizeof(int)) {
printf("invalid size of version section %zd\n",
data->d_size);
return 1;
}
memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) { } else if (strcmp(shname, "maps") == 0) {
processed_sec[i] = true; processed_sec[i] = true;
if (load_maps(data->d_buf, data->d_size)) if (load_maps(data->d_buf, data->d_size))
...@@ -178,7 +265,8 @@ int load_bpf_file(char *path) ...@@ -178,7 +265,8 @@ int load_bpf_file(char *path)
if (parse_relo_and_apply(data, symbols, &shdr, insns)) if (parse_relo_and_apply(data, symbols, &shdr, insns))
continue; continue;
if (memcmp(shname_prog, "events/", 7) == 0 || if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
memcmp(shname_prog, "kretprobe/", 10) == 0 ||
memcmp(shname_prog, "socket", 6) == 0) memcmp(shname_prog, "socket", 6) == 0)
load_and_attach(shname_prog, insns, data_prog->d_size); load_and_attach(shname_prog, insns, data_prog->d_size);
} }
...@@ -193,7 +281,8 @@ int load_bpf_file(char *path) ...@@ -193,7 +281,8 @@ int load_bpf_file(char *path)
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue; continue;
if (memcmp(shname, "events/", 7) == 0 || if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "socket", 6) == 0) memcmp(shname, "socket", 6) == 0)
load_and_attach(shname, data->d_buf, data->d_size); load_and_attach(shname, data->d_buf, data->d_size);
} }
...@@ -201,3 +290,23 @@ int load_bpf_file(char *path) ...@@ -201,3 +290,23 @@ int load_bpf_file(char *path)
close(fd); close(fd);
return 0; return 0;
} }
void read_trace_pipe(void)
{
int trace_fd;
trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0)
return;
while (1) {
static char buf[4096];
ssize_t sz;
sz = read(trace_fd, buf, sizeof(buf));
if (sz > 0) {
buf[sz] = 0;
puts(buf);
}
}
}
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
extern int map_fd[MAX_MAPS]; extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS]; extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS];
/* parses elf file compiled by llvm .c->.o /* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall * . parses 'maps' section and creates maps via BPF syscall
...@@ -21,4 +22,6 @@ extern int prog_fd[MAX_PROGS]; ...@@ -21,4 +22,6 @@ extern int prog_fd[MAX_PROGS];
*/ */
int load_bpf_file(char *path); int load_bpf_file(char *path);
void read_trace_pipe(void);
#endif #endif
...@@ -81,7 +81,7 @@ char bpf_log_buf[LOG_BUF_SIZE]; ...@@ -81,7 +81,7 @@ char bpf_log_buf[LOG_BUF_SIZE];
int bpf_prog_load(enum bpf_prog_type prog_type, int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int prog_len, const struct bpf_insn *insns, int prog_len,
const char *license) const char *license, int kern_version)
{ {
union bpf_attr attr = { union bpf_attr attr = {
.prog_type = prog_type, .prog_type = prog_type,
...@@ -93,6 +93,11 @@ int bpf_prog_load(enum bpf_prog_type prog_type, ...@@ -93,6 +93,11 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
.log_level = 1, .log_level = 1,
}; };
/* assign one field outside of struct init to make sure any
* padding is zero initialized
*/
attr.kern_version = kern_version;
bpf_log_buf[0] = 0; bpf_log_buf[0] = 0;
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
...@@ -121,3 +126,10 @@ int open_raw_sock(const char *name) ...@@ -121,3 +126,10 @@ int open_raw_sock(const char *name)
return sock; return sock;
} }
int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
int group_fd, unsigned long flags)
{
return syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
}
...@@ -13,7 +13,7 @@ int bpf_get_next_key(int fd, void *key, void *next_key); ...@@ -13,7 +13,7 @@ int bpf_get_next_key(int fd, void *key, void *next_key);
int bpf_prog_load(enum bpf_prog_type prog_type, int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len, const struct bpf_insn *insns, int insn_len,
const char *license); const char *license, int kern_version);
#define LOG_BUF_SIZE 65536 #define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE]; extern char bpf_log_buf[LOG_BUF_SIZE];
...@@ -182,4 +182,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; ...@@ -182,4 +182,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
/* create RAW socket and bind to interface 'name' */ /* create RAW socket and bind to interface 'name' */
int open_raw_sock(const char *name); int open_raw_sock(const char *name);
struct perf_event_attr;
int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
int group_fd, unsigned long flags);
#endif #endif
...@@ -56,7 +56,7 @@ static int test_sock(void) ...@@ -56,7 +56,7 @@ static int test_sock(void)
}; };
prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
"GPL"); "GPL", 0);
if (prog_fd < 0) { if (prog_fd < 0) {
printf("failed to load prog '%s'\n", strerror(errno)); printf("failed to load prog '%s'\n", strerror(errno));
goto cleanup; goto cleanup;
......
...@@ -689,7 +689,7 @@ static int test(void) ...@@ -689,7 +689,7 @@ static int test(void)
prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog, prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog,
prog_len * sizeof(struct bpf_insn), prog_len * sizeof(struct bpf_insn),
"GPL"); "GPL", 0);
if (tests[i].result == ACCEPT) { if (tests[i].result == ACCEPT) {
if (prog_fd < 0) { if (prog_fd < 0) {
......
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
#include "bpf_helpers.h"
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
* Number of arguments and their positions can change, etc.
* In such case this bpf+kprobe example will no longer be meaningful
*/
SEC("kprobe/__netif_receive_skb_core")
int bpf_prog1(struct pt_regs *ctx)
{
/* attaches to kprobe netif_receive_skb,
* looks for packets on loobpack device and prints them
*/
char devname[IFNAMSIZ] = {};
struct net_device *dev;
struct sk_buff *skb;
int len;
/* non-portable! works for the given kernel only */
skb = (struct sk_buff *) ctx->di;
dev = _(skb->dev);
len = _(skb->len);
bpf_probe_read(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
/* using bpf_trace_printk() for DEBUG ONLY */
bpf_trace_printk(fmt, sizeof(fmt), skb, len);
}
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
#include <stdio.h>
#include <linux/bpf.h>
#include <unistd.h>
#include "libbpf.h"
#include "bpf_load.h"
int main(int ac, char **argv)
{
FILE *f;
char filename[256];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
f = popen("taskset 1 ping -c5 localhost", "r");
(void) f;
read_trace_pipe();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment