Commit 89ad9342 authored by Brenden Blanco's avatar Brenden Blanco Committed by GitHub

Merge pull request #634 from Eichhoernchen/xdp_net-next

Added XDP support to BCC
parents 60b082e0 e0724d73
...@@ -84,6 +84,7 @@ enum bpf_map_type { ...@@ -84,6 +84,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -92,6 +93,8 @@ enum bpf_prog_type { ...@@ -92,6 +93,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
}; };
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
...@@ -312,6 +315,66 @@ enum bpf_func_id { ...@@ -312,6 +315,66 @@ enum bpf_func_id {
*/ */
BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
/**
* bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*/
BPF_FUNC_get_hash_recalc,
/**
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*/
BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
...@@ -346,6 +409,12 @@ enum bpf_func_id { ...@@ -346,6 +409,12 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2) #define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
*/ */
...@@ -365,6 +434,8 @@ struct __sk_buff { ...@@ -365,6 +434,8 @@ struct __sk_buff {
__u32 cb[5]; __u32 cb[5];
__u32 hash; __u32 hash;
__u32 tc_classid; __u32 tc_classid;
__u32 data;
__u32 data_end;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
...@@ -379,4 +450,24 @@ struct bpf_tunnel_key { ...@@ -379,4 +450,24 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
};
/* user accessible metadata for XDP packet hook
* new fields must be added to the end of this structure
*/
struct xdp_md {
__u32 data;
__u32 data_end;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -85,6 +85,7 @@ enum bpf_map_type { ...@@ -85,6 +85,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -93,6 +94,8 @@ enum bpf_prog_type { ...@@ -93,6 +94,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
}; };
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
...@@ -313,6 +316,66 @@ enum bpf_func_id { ...@@ -313,6 +316,66 @@ enum bpf_func_id {
*/ */
BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
/**
* bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*/
BPF_FUNC_get_hash_recalc,
/**
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*/
BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
...@@ -347,6 +410,12 @@ enum bpf_func_id { ...@@ -347,6 +410,12 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2) #define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
*/ */
...@@ -366,6 +435,8 @@ struct __sk_buff { ...@@ -366,6 +435,8 @@ struct __sk_buff {
__u32 cb[5]; __u32 cb[5];
__u32 hash; __u32 hash;
__u32 tc_classid; __u32 tc_classid;
__u32 data;
__u32 data_end;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
...@@ -380,5 +451,25 @@ struct bpf_tunnel_key { ...@@ -380,5 +451,25 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
};
/* user accessible metadata for XDP packet hook
* new fields must be added to the end of this structure
*/
struct xdp_md {
__u32 data;
__u32 data_end;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
)********" )********"
...@@ -408,3 +408,104 @@ error: ...@@ -408,3 +408,104 @@ error:
return NULL; return NULL;
} }
int bpf_attach_xdp(const char *dev_name, int progfd) {
struct sockaddr_nl sa;
int sock, seq = 0, len, ret = -1;
char buf[4096];
struct nlattr *nla, *nla_xdp;
struct {
struct nlmsghdr nh;
struct ifinfomsg ifinfo;
char attrbuf[64];
} req;
struct nlmsghdr *nh;
struct nlmsgerr *err;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
return -1;
}
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
goto cleanup;
}
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_type = RTM_SETLINK;
req.nh.nlmsg_pid = 0;
req.nh.nlmsg_seq = ++seq;
req.ifinfo.ifi_family = AF_UNSPEC;
req.ifinfo.ifi_index = if_nametoindex(dev_name);
if (req.ifinfo.ifi_index == 0) {
fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
goto cleanup;
}
nla = (struct nlattr *)(((char *)&req)
+ NLMSG_ALIGN(req.nh.nlmsg_len));
nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
// we specify the FD passed over by the user
nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
goto cleanup;
}
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
goto cleanup;
}
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != getpid()) {
fprintf(stderr, "bpf: Wrong pid %d, expected %d\n",
nh->nlmsg_pid, getpid());
errno = EBADMSG;
goto cleanup;
}
if (nh->nlmsg_seq != seq) {
fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
nh->nlmsg_seq, seq);
errno = EBADMSG;
goto cleanup;
}
switch (nh->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(nh);
if (!err->error)
continue;
fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
errno = -err->error;
goto cleanup;
case NLMSG_DONE:
break;
}
}
ret = 0;
cleanup:
close(sock);
return ret;
}
...@@ -61,6 +61,9 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name); ...@@ -61,6 +61,9 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name);
void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu); void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu);
/* attached a prog expressed by progfd to the device specified in dev_name */
int bpf_attach_xdp(const char *dev_name, int progfd);
#define LOG_BUF_SIZE 65536 #define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE]; extern char bpf_log_buf[LOG_BUF_SIZE];
......
...@@ -21,6 +21,7 @@ import multiprocessing ...@@ -21,6 +21,7 @@ import multiprocessing
import os import os
import re import re
import struct import struct
import errno
import sys import sys
basestring = (unicode if sys.version_info[0] < 3 else str) basestring = (unicode if sys.version_info[0] < 3 else str)
...@@ -67,6 +68,7 @@ class BPF(object): ...@@ -67,6 +68,7 @@ class BPF(object):
SCHED_CLS = 3 SCHED_CLS = 3
SCHED_ACT = 4 SCHED_ACT = 4
TRACEPOINT = 5 TRACEPOINT = 5
XDP = 6
_probe_repl = re.compile("[^a-zA-Z0-9_]") _probe_repl = re.compile("[^a-zA-Z0-9_]")
_sym_caches = {} _sym_caches = {}
...@@ -439,6 +441,39 @@ class BPF(object): ...@@ -439,6 +441,39 @@ class BPF(object):
raise Exception("Failed to detach BPF from kprobe") raise Exception("Failed to detach BPF from kprobe")
self._del_kprobe(ev_name) self._del_kprobe(ev_name)
@staticmethod
def attach_xdp(dev, fn):
'''
This function attaches a BPF function to a device on the device
driver level (XDP)
'''
if not isinstance(fn, BPF.Function):
raise Exception("arg 1 must be of type BPF.Function")
res = lib.bpf_attach_xdp(dev.encode("ascii"), fn.fd)
if res < 0:
err_no = ct.get_errno()
if err_no == errno.EBADMSG:
raise Exception("Internal error while attaching BFP to device,"+
" try increasing the debug level!")
else:
errstr = os.strerror(err_no)
raise Exception("Failed to attach BPF to device %s: %s"
% (dev, errstr))
@staticmethod
def remove_xdp(dev):
'''
This function removes any BPF function from a device on the
device driver level (XDP)
'''
res = lib.bpf_attach_xdp(dev.encode("ascii"), -1)
if res < 0:
errstr = os.strerror(ct.get_errno())
raise Exception("Failed to detach BPF from device %s: %s"
% (dev, errstr))
@classmethod @classmethod
def _check_path_symbol(cls, module, symname, addr): def _check_path_symbol(cls, module, symname, addr):
sym = bcc_symbol() sym = bcc_symbol()
......
...@@ -108,6 +108,9 @@ lib.perf_reader_free.argtypes = [ct.c_void_p] ...@@ -108,6 +108,9 @@ lib.perf_reader_free.argtypes = [ct.c_void_p]
lib.perf_reader_fd.restype = int lib.perf_reader_fd.restype = int
lib.perf_reader_fd.argtypes = [ct.c_void_p] lib.perf_reader_fd.argtypes = [ct.c_void_p]
lib.bpf_attach_xdp.restype = ct.c_int;
lib.bpf_attach_xdp.argtypes = [ct.c_char_p, ct.c_int]
# bcc symbol helpers # bcc symbol helpers
class bcc_symbol(ct.Structure): class bcc_symbol(ct.Structure):
_fields_ = [ _fields_ = [
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment