Commit 70006af9 authored by David S. Miller's avatar David S. Miller

Merge branch 'ebpf_skb_fields'

Alexei Starovoitov says:

====================
bpf: allow eBPF access skb fields

V1->V2:
- refactored field access converter into common helper convert_skb_access()
  used in both classic and extended BPF
- added missing build_bug_on for field 'len'
- added comment to uapi/linux/bpf.h as suggested by Daniel
- dropped exposing 'ifindex' field for now

classic BPF has a way to access skb fields, whereas extended BPF didn't.
This patch introduces this ability.

Classic BPF can access fields via negative SKF_AD_OFF offset.
Positive bpf_ld_abs N is treated as load from packet, whereas
bpf_ld_abs -0x1000 + N is treated as skb fields access.
Many offsets were hard coded over years: SKF_AD_PROTOCOL, SKF_AD_PKTTYPE, etc.
The problem with this approach was that for every new field classic bpf
assembler had to be tweaked.

I've considered doing the same for extended, but for every new field LLVM
compiler would have to be modifed. Since it would need to add a new intrinsic.
It could be done with single intrinsic and magic offset or use of inline
assembler, but neither are clean from compiler backend point of view, since
they look like calls but shouldn't scratch caller-saved registers.

Another approach was to introduce a new helper functions like bpf_get_pkt_type()
for every field that we want to access, but that is equally ugly for kernel
and slow, since helpers are calls and they are slower then just loads.
In theory helper calls can be 'inlined' inside kernel into direct loads, but
since they were calls for user space, compiler would have to spill registers
around such calls anyway. Teaching compiler to treat such helpers differently
is even uglier.

They were few other ideas considered. At the end the best seems to be to
introduce a user accessible mirror of in-kernel sk_buff structure:

struct __sk_buff {
    __u32 len;
    __u32 pkt_type;
    __u32 mark;
    __u32 queue_mapping;
};

bpf programs will do:

int bpf_prog1(struct __sk_buff *skb)
{
    __u32 var = skb->pkt_type;

which will be compiled to bpf assembler as:

dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type)

bpf verifier will check validity of access and will convert it to:

dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset))
dst_reg &= 7

since 'pkt_type' is a bitfield.

No new instructions added. LLVM doesn't need to be modified.
JITs don't change and verifier already knows when it accesses 'ctx' pointer.
The only thing needed was to convert user visible offset within __sk_buff
to kernel internal offset within sk_buff.
For 'len' and other fields conversion is trivial.
Converting 'pkt_type' takes 2 or 3 instructions depending on endianness.
More fields can be exposed by adding to the end of the 'struct __sk_buff'.
Like vlan_tci and others can be added later.

When pkt_type field is moved around, goes into different structure, removed or
its size changes, the function convert_skb_access() would need to updated and
it will cover both classic and extended.

Patch 2 updates examples to demonstrates how fields are accessed and
adds new tests for verifier, since it needs to detect a corner case when
attacker is using single bpf instruction in two branches with different
register types.

The 4 fields of __sk_buff are already exposed to user space via classic bpf and
I believe they're useful in extended as well.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a498cfe9 614cd3bd
...@@ -103,6 +103,9 @@ struct bpf_verifier_ops { ...@@ -103,6 +103,9 @@ struct bpf_verifier_ops {
* with 'type' (read or write) is allowed * with 'type' (read or write) is allowed
*/ */
bool (*is_valid_access)(int off, int size, enum bpf_access_type type); bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
struct bpf_insn *insn);
}; };
struct bpf_prog_type_list { struct bpf_prog_type_list {
...@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f); ...@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
void bpf_map_put(struct bpf_map *map); void bpf_map_put(struct bpf_map *map);
/* verify correctness of eBPF program */ /* verify correctness of eBPF program */
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
#else #else
static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
{ {
......
...@@ -170,4 +170,14 @@ enum bpf_func_id { ...@@ -170,4 +170,14 @@ enum bpf_func_id {
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
struct __sk_buff {
__u32 len;
__u32 pkt_type;
__u32 mark;
__u32 queue_mapping;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr) ...@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
goto free_prog; goto free_prog;
/* run eBPF verifier */ /* run eBPF verifier */
err = bpf_check(prog, attr); err = bpf_check(&prog, attr);
if (err < 0) if (err < 0)
goto free_used_maps; goto free_used_maps;
......
...@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env) ...@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
return err; return err;
} else if (class == BPF_LDX) { } else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM || enum bpf_reg_type src_reg_type;
insn->imm != 0) {
verbose("BPF_LDX uses reserved fields\n"); /* check for reserved fields is already done */
return -EINVAL;
}
/* check src operand */ /* check src operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP); err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err) if (err)
...@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env) ...@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
if (err) if (err)
return err; return err;
src_reg_type = regs[insn->src_reg].type;
if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
* use reserved 'imm' field to mark this insn
*/
insn->imm = src_reg_type;
} else if (src_reg_type != insn->imm &&
(src_reg_type == PTR_TO_CTX ||
insn->imm == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
* src_reg == ctx in one branch and
* src_reg == stack|map in some other branch.
* Reject it.
*/
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
} else if (class == BPF_STX) { } else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_XADD) { if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn); err = check_xadd(env, insn);
...@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) ...@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
int i, j; int i, j;
for (i = 0; i < insn_cnt; i++, insn++) { for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) != BPF_MEM ||
insn->imm != 0)) {
verbose("BPF_LDX uses reserved fields\n");
return -EINVAL;
}
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_map *map; struct bpf_map *map;
struct fd f; struct fd f;
...@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) ...@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
insn->src_reg = 0; insn->src_reg = 0;
} }
static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
{
struct bpf_insn *insn = prog->insnsi;
int insn_cnt = prog->len;
int i;
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) != BPF_JMP ||
BPF_OP(insn->code) == BPF_CALL ||
BPF_OP(insn->code) == BPF_EXIT)
continue;
/* adjust offset of jmps if necessary */
if (i < pos && i + insn->off + 1 > pos)
insn->off += delta;
else if (i > pos && i + insn->off + 1 < pos)
insn->off -= delta;
}
}
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
static int convert_ctx_accesses(struct verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
u32 cnt;
int i;
if (!env->prog->aux->ops->convert_ctx_access)
return 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
continue;
if (insn->imm != PTR_TO_CTX) {
/* clear internal mark */
insn->imm = 0;
continue;
}
cnt = env->prog->aux->ops->
convert_ctx_access(insn->dst_reg, insn->src_reg,
insn->off, insn_buf);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
}
if (cnt == 1) {
memcpy(insn, insn_buf, sizeof(*insn));
continue;
}
/* several new insns need to be inserted. Make room for them */
insn_cnt += cnt - 1;
new_prog = bpf_prog_realloc(env->prog,
bpf_prog_size(insn_cnt),
GFP_USER);
if (!new_prog)
return -ENOMEM;
new_prog->len = insn_cnt;
memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
sizeof(*insn) * (insn_cnt - i - cnt));
/* copy substitute insns in place of load instruction */
memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
/* adjust branches in the whole program */
adjust_branches(new_prog, i, cnt - 1);
/* keep walking new program and skip insns we just inserted */
env->prog = new_prog;
insn = new_prog->insnsi + i + cnt - 1;
i += cnt - 1;
}
return 0;
}
static void free_states(struct verifier_env *env) static void free_states(struct verifier_env *env)
{ {
struct verifier_state_list *sl, *sln; struct verifier_state_list *sl, *sln;
...@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env) ...@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
kfree(env->explored_states); kfree(env->explored_states);
} }
int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{ {
char __user *log_ubuf = NULL; char __user *log_ubuf = NULL;
struct verifier_env *env; struct verifier_env *env;
int ret = -EINVAL; int ret = -EINVAL;
if (prog->len <= 0 || prog->len > BPF_MAXINSNS) if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG; return -E2BIG;
/* 'struct verifier_env' can be global, but since it's not small, /* 'struct verifier_env' can be global, but since it's not small,
...@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) ...@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (!env) if (!env)
return -ENOMEM; return -ENOMEM;
env->prog = prog; env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */ /* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock); mutex_lock(&bpf_verifier_lock);
...@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) ...@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (ret < 0) if (ret < 0)
goto skip_full_check; goto skip_full_check;
env->explored_states = kcalloc(prog->len, env->explored_states = kcalloc(env->prog->len,
sizeof(struct verifier_state_list *), sizeof(struct verifier_state_list *),
GFP_USER); GFP_USER);
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1968,6 +2083,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) ...@@ -1968,6 +2083,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
while (pop_stack(env, NULL) >= 0); while (pop_stack(env, NULL) >= 0);
free_states(env); free_states(env);
if (ret == 0)
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
if (log_level && log_len >= log_size - 1) { if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size); BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */ /* verifier log exceeded user supplied buffer */
...@@ -1983,18 +2102,18 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) ...@@ -1983,18 +2102,18 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (ret == 0 && env->used_map_cnt) { if (ret == 0 && env->used_map_cnt) {
/* if program passed verifier, update used_maps in bpf_prog_info */ /* if program passed verifier, update used_maps in bpf_prog_info */
prog->aux->used_maps = kmalloc_array(env->used_map_cnt, env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
sizeof(env->used_maps[0]), sizeof(env->used_maps[0]),
GFP_KERNEL); GFP_KERNEL);
if (!prog->aux->used_maps) { if (!env->prog->aux->used_maps) {
ret = -ENOMEM; ret = -ENOMEM;
goto free_log_buf; goto free_log_buf;
} }
memcpy(prog->aux->used_maps, env->used_maps, memcpy(env->prog->aux->used_maps, env->used_maps,
sizeof(env->used_maps[0]) * env->used_map_cnt); sizeof(env->used_maps[0]) * env->used_map_cnt);
prog->aux->used_map_cnt = env->used_map_cnt; env->prog->aux->used_map_cnt = env->used_map_cnt;
/* program is valid. Convert pseudo bpf_ld_imm64 into generic /* program is valid. Convert pseudo bpf_ld_imm64 into generic
* bpf_ld_imm64 instructions * bpf_ld_imm64 instructions
...@@ -2006,11 +2125,12 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) ...@@ -2006,11 +2125,12 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (log_level) if (log_level)
vfree(log_buf); vfree(log_buf);
free_env: free_env:
if (!prog->aux->used_maps) if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release /* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them. * them now. Otherwise free_bpf_prog_info() will release them.
*/ */
release_maps(env); release_maps(env);
*prog = env->prog;
kfree(env); kfree(env);
mutex_unlock(&bpf_verifier_lock); mutex_unlock(&bpf_verifier_lock);
return ret; return ret;
......
...@@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) ...@@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return prandom_u32(); return prandom_u32();
} }
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
switch (skb_field) {
case SKF_AD_MARK:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
offsetof(struct sk_buff, mark));
break;
case SKF_AD_PKTTYPE:
*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
#endif
break;
case SKF_AD_QUEUE:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, queue_mapping));
break;
}
return insn - insn_buf;
}
static bool convert_bpf_extensions(struct sock_filter *fp, static bool convert_bpf_extensions(struct sock_filter *fp,
struct bpf_insn **insnp) struct bpf_insn **insnp)
{ {
struct bpf_insn *insn = *insnp; struct bpf_insn *insn = *insnp;
u32 cnt;
switch (fp->k) { switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL: case SKF_AD_OFF + SKF_AD_PROTOCOL:
...@@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, ...@@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break; break;
case SKF_AD_OFF + SKF_AD_PKTTYPE: case SKF_AD_OFF + SKF_AD_PKTTYPE:
*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
PKT_TYPE_OFFSET()); insn += cnt - 1;
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
insn++;
*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
#endif
break; break;
case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_IFINDEX:
...@@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, ...@@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break; break;
case SKF_AD_OFF + SKF_AD_MARK: case SKF_AD_OFF + SKF_AD_MARK:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
insn += cnt - 1;
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
offsetof(struct sk_buff, mark));
break; break;
case SKF_AD_OFF + SKF_AD_RXHASH: case SKF_AD_OFF + SKF_AD_RXHASH:
...@@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, ...@@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break; break;
case SKF_AD_OFF + SKF_AD_QUEUE: case SKF_AD_OFF + SKF_AD_QUEUE:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
insn += cnt - 1;
*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
offsetof(struct sk_buff, queue_mapping));
break; break;
case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG:
...@@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id) ...@@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id)
static bool sk_filter_is_valid_access(int off, int size, static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type) enum bpf_access_type type)
{ {
/* skb fields cannot be accessed yet */ /* only read is allowed */
return false; if (type != BPF_READ)
return false;
/* check bounds */
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
/* disallow misaligned access */
if (off % size != 0)
return false;
/* all __sk_buff fields are __u32 */
if (size != 4)
return false;
return true;
}
static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
switch (ctx_off) {
case offsetof(struct __sk_buff, len):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
offsetof(struct sk_buff, len));
break;
case offsetof(struct __sk_buff, mark):
return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
case offsetof(struct __sk_buff, pkt_type):
return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
case offsetof(struct __sk_buff, queue_mapping):
return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
}
return insn - insn_buf;
} }
static const struct bpf_verifier_ops sk_filter_ops = { static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto, .get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access, .is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = sk_filter_convert_ctx_access,
}; };
static struct bpf_prog_type_list sk_filter_type __read_mostly = { static struct bpf_prog_type_list sk_filter_type __read_mostly = {
......
#include <uapi/linux/bpf.h> #include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h> #include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h> #include <uapi/linux/ip.h>
#include "bpf_helpers.h" #include "bpf_helpers.h"
...@@ -11,14 +12,17 @@ struct bpf_map_def SEC("maps") my_map = { ...@@ -11,14 +12,17 @@ struct bpf_map_def SEC("maps") my_map = {
}; };
SEC("socket1") SEC("socket1")
int bpf_prog1(struct sk_buff *skb) int bpf_prog1(struct __sk_buff *skb)
{ {
int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value; long *value;
if (skb->pkt_type != PACKET_OUTGOING)
return 0;
value = bpf_map_lookup_elem(&my_map, &index); value = bpf_map_lookup_elem(&my_map, &index);
if (value) if (value)
__sync_fetch_and_add(value, 1); __sync_fetch_and_add(value, skb->len);
return 0; return 0;
} }
......
...@@ -40,7 +40,7 @@ int main(int ac, char **argv) ...@@ -40,7 +40,7 @@ int main(int ac, char **argv)
key = IPPROTO_ICMP; key = IPPROTO_ICMP;
assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
printf("TCP %lld UDP %lld ICMP %lld packets\n", printf("TCP %lld UDP %lld ICMP %lld bytes\n",
tcp_cnt, udp_cnt, icmp_cnt); tcp_cnt, udp_cnt, icmp_cnt);
sleep(1); sleep(1);
} }
......
...@@ -42,13 +42,13 @@ static inline int proto_ports_offset(__u64 proto) ...@@ -42,13 +42,13 @@ static inline int proto_ports_offset(__u64 proto)
} }
} }
static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
{ {
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
& (IP_MF | IP_OFFSET); & (IP_MF | IP_OFFSET);
} }
static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
{ {
__u64 w0 = load_word(ctx, off); __u64 w0 = load_word(ctx, off);
__u64 w1 = load_word(ctx, off + 4); __u64 w1 = load_word(ctx, off + 4);
...@@ -58,7 +58,7 @@ static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) ...@@ -58,7 +58,7 @@ static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off)
return (__u32)(w0 ^ w1 ^ w2 ^ w3); return (__u32)(w0 ^ w1 ^ w2 ^ w3);
} }
static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
struct flow_keys *flow) struct flow_keys *flow)
{ {
__u64 verlen; __u64 verlen;
...@@ -82,7 +82,7 @@ static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, ...@@ -82,7 +82,7 @@ static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
return nhoff; return nhoff;
} }
static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
struct flow_keys *flow) struct flow_keys *flow)
{ {
*ip_proto = load_byte(skb, *ip_proto = load_byte(skb,
...@@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto ...@@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto
return nhoff; return nhoff;
} }
static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
{ {
__u64 nhoff = ETH_HLEN; __u64 nhoff = ETH_HLEN;
__u64 ip_proto; __u64 ip_proto;
...@@ -183,18 +183,23 @@ static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) ...@@ -183,18 +183,23 @@ static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow)
return true; return true;
} }
struct pair {
long packets;
long bytes;
};
struct bpf_map_def SEC("maps") hash_map = { struct bpf_map_def SEC("maps") hash_map = {
.type = BPF_MAP_TYPE_HASH, .type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__be32), .key_size = sizeof(__be32),
.value_size = sizeof(long), .value_size = sizeof(struct pair),
.max_entries = 1024, .max_entries = 1024,
}; };
SEC("socket2") SEC("socket2")
int bpf_prog2(struct sk_buff *skb) int bpf_prog2(struct __sk_buff *skb)
{ {
struct flow_keys flow; struct flow_keys flow;
long *value; struct pair *value;
u32 key; u32 key;
if (!flow_dissector(skb, &flow)) if (!flow_dissector(skb, &flow))
...@@ -203,9 +208,10 @@ int bpf_prog2(struct sk_buff *skb) ...@@ -203,9 +208,10 @@ int bpf_prog2(struct sk_buff *skb)
key = flow.dst; key = flow.dst;
value = bpf_map_lookup_elem(&hash_map, &key); value = bpf_map_lookup_elem(&hash_map, &key);
if (value) { if (value) {
__sync_fetch_and_add(value, 1); __sync_fetch_and_add(&value->packets, 1);
__sync_fetch_and_add(&value->bytes, skb->len);
} else { } else {
long val = 1; struct pair val = {1, skb->len};
bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
} }
......
...@@ -6,6 +6,11 @@ ...@@ -6,6 +6,11 @@
#include <unistd.h> #include <unistd.h>
#include <arpa/inet.h> #include <arpa/inet.h>
struct pair {
__u64 packets;
__u64 bytes;
};
int main(int ac, char **argv) int main(int ac, char **argv)
{ {
char filename[256]; char filename[256];
...@@ -29,13 +34,13 @@ int main(int ac, char **argv) ...@@ -29,13 +34,13 @@ int main(int ac, char **argv)
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
int key = 0, next_key; int key = 0, next_key;
long long value; struct pair value;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &value); bpf_lookup_elem(map_fd[0], &next_key, &value);
printf("ip %s count %lld\n", printf("ip %s bytes %lld packets %lld\n",
inet_ntoa((struct in_addr){htonl(next_key)}), inet_ntoa((struct in_addr){htonl(next_key)}),
value); value.bytes, value.packets);
key = next_key; key = next_key;
} }
sleep(1); sleep(1);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/unistd.h> #include <linux/unistd.h>
#include <string.h> #include <string.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <stddef.h>
#include "libbpf.h" #include "libbpf.h"
#define MAX_INSNS 512 #define MAX_INSNS 512
...@@ -642,6 +643,75 @@ static struct bpf_test tests[] = { ...@@ -642,6 +643,75 @@ static struct bpf_test tests[] = {
}, },
.result = ACCEPT, .result = ACCEPT,
}, },
{
"access skb fields ok",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, len)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, pkt_type)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, queue_mapping)),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
},
{
"access skb fields bad1",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
},
{
"access skb fields bad2",
.insns = {
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, pkt_type)),
BPF_EXIT_INSN(),
},
.fixup = {4},
.errstr = "different pointers",
.result = REJECT,
},
{
"access skb fields bad3",
.insns = {
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, pkt_type)),
BPF_EXIT_INSN(),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_JMP_IMM(BPF_JA, 0, 0, -12),
},
.fixup = {6},
.errstr = "different pointers",
.result = REJECT,
},
}; };
static int probe_filter_length(struct bpf_insn *fp) static int probe_filter_length(struct bpf_insn *fp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment