Commit 84085f87 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'enable-bpf_skc-cast-for-networking-progs'

Martin KaFai Lau says:

====================
This set allows networking prog type to directly read fields from
the in-kernel socket type, e.g. "struct tcp_sock".

Patch 2 has the details on the use case.

v3:
- Pass arg_btf_id instead of fn into check_reg_type() in Patch 1 (Lorenz)
- Move arg_btf_id from func_proto to struct bpf_reg_types in Patch 2 (Lorenz)
- Remove test_sock_fields from .gitignore in Patch 8 (Andrii)
- Add tests to have better coverage on the modified helpers (Alexei)
  Patch 13 is added.
- Use "void *sk" as the helper argument in UAPI bpf.h

v3:
- ARG_PTR_TO_SOCK_COMMON_OR_NULL was attempted in v2.  The _OR_NULL was
  needed because the PTR_TO_BTF_ID could be NULL but note that a could be NULL
  PTR_TO_BTF_ID is not a scalar NULL to the verifier.  "_OR_NULL" implicitly
  gives an expectation that the helper can take a scalar NULL which does
  not make sense in most (except one) helpers.  Passing scalar NULL
  should be rejected at the verification time.

  Thus, this patch uses ARG_PTR_TO_BTF_ID_SOCK_COMMON to specify that the
  helper can take both the btf-id ptr or the legacy PTR_TO_SOCK_COMMON but
  not scalar NULL.  It requires the func_proto to explicitly specify the
  arg_btf_id such that there is a very clear expectation that the helper
  can handle a NULL PTR_TO_BTF_ID.

v2:
- Add ARG_PTR_TO_SOCK_COMMON_OR_NULL (Lorenz)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 182bf3f3 9a856cae
......@@ -292,6 +292,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
__BPF_ARG_TYPE_MAX,
};
......
......@@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
extern const struct bpf_func_proto sk_storage_get_btf_proto;
extern const struct bpf_func_proto sk_storage_delete_btf_proto;
struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
......
......@@ -2512,7 +2512,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_sk_release(struct bpf_sock *sock)
* long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
......@@ -2692,7 +2692,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
......@@ -2861,6 +2861,7 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
* **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
* long bpf_send_signal(u32 sig)
* Description
......@@ -2877,7 +2878,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
* s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
......@@ -3106,7 +3107,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
......@@ -3234,11 +3235,11 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
* u64 bpf_sk_cgroup_id(void *sk)
* Description
* Return the cgroup v2 id of the socket *sk*.
*
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
* *sk* must be a non-**NULL** pointer to a socket, e.g. one
* returned from **bpf_sk_lookup_xxx**\ (),
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
* same as in **bpf_skb_cgroup_id**\ ().
......@@ -3248,7 +3249,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
* u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *sk* at the *ancestor_level*. The root cgroup is at
......
......@@ -56,9 +56,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_inode_storage_delete:
return &bpf_inode_storage_delete_proto;
case BPF_FUNC_sk_storage_get:
return &sk_storage_get_btf_proto;
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &sk_storage_delete_btf_proto;
return &bpf_sk_storage_delete_proto;
default:
return tracing_prog_func_proto(func_id, prog);
}
......
......@@ -486,7 +486,12 @@ static bool is_acquire_function(enum bpf_func_id func_id,
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
func_id == BPF_FUNC_sk_fullsock;
func_id == BPF_FUNC_sk_fullsock ||
func_id == BPF_FUNC_skc_to_tcp_sock ||
func_id == BPF_FUNC_skc_to_tcp6_sock ||
func_id == BPF_FUNC_skc_to_udp6_sock ||
func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
/* string representation of 'enum bpf_reg_type' */
......@@ -3953,6 +3958,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
struct bpf_reg_types {
const enum bpf_reg_type types[10];
u32 *btf_id;
};
static const struct bpf_reg_types map_key_value_types = {
......@@ -3973,6 +3979,17 @@ static const struct bpf_reg_types sock_types = {
},
};
static const struct bpf_reg_types btf_id_sock_common_types = {
.types = {
PTR_TO_SOCK_COMMON,
PTR_TO_SOCKET,
PTR_TO_TCP_SOCK,
PTR_TO_XDP_SOCK,
PTR_TO_BTF_ID,
},
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
};
static const struct bpf_reg_types mem_types = {
.types = {
PTR_TO_STACK,
......@@ -4014,6 +4031,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_CTX] = &context_types,
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
[ARG_PTR_TO_SOCKET] = &fullsock_types,
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
......@@ -4028,19 +4046,27 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
const struct bpf_reg_types *compatible)
enum bpf_arg_type arg_type,
const u32 *arg_btf_id)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected, type = reg->type;
const struct bpf_reg_types *compatible;
int i, j;
compatible = compatible_reg_types[arg_type];
if (!compatible) {
verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
return -EFAULT;
}
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
if (expected == NOT_INIT)
break;
if (type == expected)
return 0;
goto found;
}
verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
......@@ -4048,6 +4074,33 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
return -EACCES;
found:
if (type == PTR_TO_BTF_ID) {
if (!arg_btf_id) {
if (!compatible->btf_id) {
verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
return -EFAULT;
}
arg_btf_id = compatible->btf_id;
}
if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
*arg_btf_id)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, kernel_type_name(reg->btf_id),
kernel_type_name(*arg_btf_id));
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
}
}
return 0;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
......@@ -4057,7 +4110,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg];
const struct bpf_reg_types *compatible;
enum bpf_reg_type type = reg->type;
int err = 0;
......@@ -4097,35 +4149,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/
goto skip_type_check;
compatible = compatible_reg_types[arg_type];
if (!compatible) {
verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
return -EFAULT;
}
err = check_reg_type(env, regno, compatible);
err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
if (err)
return err;
if (type == PTR_TO_BTF_ID) {
const u32 *btf_id = fn->arg_btf_id[arg];
if (!btf_id) {
verbose(env, "verifier internal error: missing BTF ID\n");
return -EFAULT;
}
if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *btf_id)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, kernel_type_name(reg->btf_id), kernel_type_name(*btf_id));
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
}
} else if (type == PTR_TO_CTX) {
if (type == PTR_TO_CTX) {
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
......@@ -4573,10 +4601,14 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
{
int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++)
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
return false;
}
return true;
}
......
......@@ -269,7 +269,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
{
struct bpf_local_storage_data *sdata;
if (flags > BPF_SK_STORAGE_GET_F_CREATE)
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;
sdata = sk_storage_lookup(sk, map, true);
......@@ -299,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
{
if (!sk || !sk_fullsock(sk))
return -EINVAL;
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
int err;
......@@ -355,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_SOCKET,
.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
......@@ -375,27 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_SOCKET,
};
const struct bpf_func_proto sk_storage_get_btf_proto = {
.func = bpf_sk_storage_get,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
const struct bpf_func_proto sk_storage_delete_btf_proto = {
.func = bpf_sk_storage_delete,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
struct bpf_sk_storage_diag {
......
......@@ -77,6 +77,9 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
if (in_compat_syscall()) {
......@@ -4085,18 +4088,17 @@ static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
{
struct cgroup *cgrp;
sk = sk_to_full_sk(sk);
if (!sk || !sk_fullsock(sk))
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return cgroup_id(cgrp);
}
BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
{
struct sock *sk = skb_to_full_sk(skb);
if (!sk || !sk_fullsock(sk))
return 0;
return __bpf_sk_cgroup_id(sk);
return __bpf_sk_cgroup_id(skb->sk);
}
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
......@@ -4112,6 +4114,10 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
struct cgroup *ancestor;
struct cgroup *cgrp;
sk = sk_to_full_sk(sk);
if (!sk || !sk_fullsock(sk))
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
......@@ -4123,12 +4129,7 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
ancestor_level)
{
struct sock *sk = skb_to_full_sk(skb);
if (!sk || !sk_fullsock(sk))
return 0;
return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
}
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
......@@ -4148,7 +4149,7 @@ static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
.func = bpf_sk_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
......@@ -4160,7 +4161,7 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
.func = bpf_sk_ancestor_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_ANYTHING,
};
#endif
......@@ -5694,7 +5695,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
if (sk_is_refcounted(sk))
if (sk && sk_is_refcounted(sk))
sock_gen_put(sk);
return 0;
}
......@@ -5703,7 +5704,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
......@@ -6085,7 +6086,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
u32 cookie;
int ret;
if (unlikely(th_len < sizeof(*th)))
if (unlikely(!sk || th_len < sizeof(*th)))
return -EINVAL;
/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
......@@ -6138,7 +6139,7 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
.gpl_only = true,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM,
......@@ -6152,7 +6153,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
......@@ -6207,7 +6208,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
.gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM,
......@@ -6216,7 +6217,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
{
if (flags != 0)
if (!sk || flags != 0)
return -EINVAL;
if (!skb_at_tc_ingress(skb))
return -EOPNOTSUPP;
......@@ -6240,7 +6241,7 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg3_type = ARG_ANYTHING,
};
......@@ -6620,7 +6621,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6639,7 +6640,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6800,7 +6801,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto;
#endif
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6841,7 +6842,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6883,7 +6884,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6929,7 +6930,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6971,7 +6972,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -6982,7 +6983,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -7009,7 +7010,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -9746,7 +9747,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
return bpf_base_func_proto(func_id);
return bpf_sk_base_func_proto(func_id);
}
}
......@@ -9913,8 +9914,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
.func = bpf_skc_to_tcp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};
......@@ -9930,8 +9930,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
.func = bpf_skc_to_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};
......@@ -9954,8 +9953,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
.func = bpf_skc_to_tcp_timewait_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};
......@@ -9978,8 +9976,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
.func = bpf_skc_to_tcp_request_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};
......@@ -10000,7 +9997,37 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.func = bpf_skc_to_udp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id)
{
const struct bpf_func_proto *func;
switch (func_id) {
case BPF_FUNC_skc_to_tcp6_sock:
func = &bpf_skc_to_tcp6_sock_proto;
break;
case BPF_FUNC_skc_to_tcp_sock:
func = &bpf_skc_to_tcp_sock_proto;
break;
case BPF_FUNC_skc_to_tcp_timewait_sock:
func = &bpf_skc_to_tcp_timewait_sock_proto;
break;
case BPF_FUNC_skc_to_tcp_request_sock:
func = &bpf_skc_to_tcp_request_sock_proto;
break;
case BPF_FUNC_skc_to_udp6_sock:
func = &bpf_skc_to_udp6_sock_proto;
break;
default:
return bpf_base_func_proto(func_id);
}
if (!perfmon_capable())
return NULL;
return func;
}
......@@ -28,22 +28,6 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from)
{
int i;
*to = *from;
for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
to->arg_type[i] = ARG_PTR_TO_BTF_ID;
to->arg_btf_id[i] = &tcp_sock_id;
}
}
}
static int bpf_tcp_ca_init(struct btf *btf)
{
s32 type_id;
......@@ -59,9 +43,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto);
convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto);
return 0;
}
......@@ -188,9 +169,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_tcp_send_ack:
return &bpf_tcp_send_ack_proto;
case BPF_FUNC_sk_storage_get:
return &btf_sk_storage_get_proto;
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &btf_sk_storage_delete_proto;
return &bpf_sk_storage_delete_proto;
default:
return bpf_base_func_proto(func_id);
}
......
......@@ -2512,7 +2512,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_sk_release(struct bpf_sock *sock)
* long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
......@@ -2692,7 +2692,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
......@@ -2861,6 +2861,7 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
* **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
* long bpf_send_signal(u32 sig)
* Description
......@@ -2877,7 +2878,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
* s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
......@@ -3106,7 +3107,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
......@@ -3234,11 +3235,11 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
* u64 bpf_sk_cgroup_id(void *sk)
* Description
* Return the cgroup v2 id of the socket *sk*.
*
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
* *sk* must be a non-**NULL** pointer to a socket, e.g. one
* returned from **bpf_sk_lookup_xxx**\ (),
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
* same as in **bpf_skb_cgroup_id**\ ().
......@@ -3248,7 +3249,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
* u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *sk* at the *ancestor_level*. The root cgroup is at
......
......@@ -13,7 +13,6 @@ test_verifier_log
feature
test_sock
test_sock_addr
test_sock_fields
urandom_read
test_sockmap
test_lirc_mode2_user
......
......@@ -35,7 +35,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
test_netcnt test_tcpnotify_user test_sysctl \
test_progs-no_alu32 \
test_current_pid_tgid_new_ns
......
......@@ -16,6 +16,7 @@ BPF_PROG(name, args)
struct sock_common {
unsigned char skc_state;
__u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
......@@ -45,6 +46,10 @@ struct inet_connection_sock {
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
struct request_sock {
struct sock_common __req_common;
} __attribute__((preserve_access_index));
struct tcp_sock {
struct inet_connection_sock inet_conn;
......@@ -115,14 +120,6 @@ enum tcp_ca_event {
CA_EVENT_ECN_IS_CE = 5,
};
enum tcp_ca_state {
TCP_CA_Open = 0,
TCP_CA_Disorder = 1,
TCP_CA_CWR = 2,
TCP_CA_Recovery = 3,
TCP_CA_Loss = 4
};
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#define _GNU_SOURCE
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sched.h>
#include <linux/compiler.h>
#include <bpf/libbpf.h>
#include "network_helpers.h"
#include "test_progs.h"
#include "test_btf_skc_cls_ingress.skel.h"
struct test_btf_skc_cls_ingress *skel;
struct sockaddr_in6 srv_sa6;
static __u32 duration;
#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
static int write_sysctl(const char *sysctl, const char *value)
{
int fd, err, len;
fd = open(sysctl, O_WRONLY);
if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
sysctl, strerror(errno), errno))
return -1;
len = strlen(value);
err = write(fd, value, len);
close(fd);
if (CHECK(err != len, "write sysctl",
"write(%s, %s, %d): err:%d %s (%d)\n",
sysctl, value, len, err, strerror(errno), errno))
return -1;
return 0;
}
static int prepare_netns(void)
{
if (CHECK(unshare(CLONE_NEWNET), "create netns",
"unshare(CLONE_NEWNET): %s (%d)",
strerror(errno), errno))
return -1;
if (CHECK(system("ip link set dev lo up"),
"ip link set dev lo up", "failed\n"))
return -1;
if (CHECK(system("tc qdisc add dev lo clsact"),
"tc qdisc add dev lo clsact", "failed\n"))
return -1;
if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
"install tc cls-prog at ingress", "failed\n"))
return -1;
/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
* bpf_tcp_gen_syncookie() helper.
*/
if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
return -1;
return 0;
}
static void reset_test(void)
{
memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
skel->bss->listen_tp_sport = 0;
skel->bss->req_sk_sport = 0;
skel->bss->recv_cookie = 0;
skel->bss->gen_cookie = 0;
skel->bss->linum = 0;
}
static void print_err_line(void)
{
if (skel->bss->linum)
printf("bpf prog error at line %u\n", skel->bss->linum);
}
static void test_conn(void)
{
int listen_fd = -1, cli_fd = -1, err;
socklen_t addrlen = sizeof(srv_sa6);
int srv_port;
if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
return;
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
if (CHECK_FAIL(listen_fd == -1))
return;
err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
errno))
goto done;
memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
srv_port = ntohs(srv_sa6.sin6_port);
cli_fd = connect_to_fd(listen_fd, 0);
if (CHECK_FAIL(cli_fd == -1))
goto done;
if (CHECK(skel->bss->listen_tp_sport != srv_port ||
skel->bss->req_sk_sport != srv_port,
"Unexpected sk src port",
"listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
srv_port))
goto done;
if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
"Unexpected syncookie states",
"gen_cookie:%u recv_cookie:%u\n",
skel->bss->gen_cookie, skel->bss->recv_cookie))
goto done;
CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
skel->bss->linum);
done:
if (listen_fd != -1)
close(listen_fd);
if (cli_fd != -1)
close(cli_fd);
}
static void test_syncookie(void)
{
int listen_fd = -1, cli_fd = -1, err;
socklen_t addrlen = sizeof(srv_sa6);
int srv_port;
/* Enforce syncookie mode */
if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
return;
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
if (CHECK_FAIL(listen_fd == -1))
return;
err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
errno))
goto done;
memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
srv_port = ntohs(srv_sa6.sin6_port);
cli_fd = connect_to_fd(listen_fd, 0);
if (CHECK_FAIL(cli_fd == -1))
goto done;
if (CHECK(skel->bss->listen_tp_sport != srv_port,
"Unexpected tp src port",
"listen_tp_sport:%u expected:%u\n",
skel->bss->listen_tp_sport, srv_port))
goto done;
if (CHECK(skel->bss->req_sk_sport,
"Unexpected req_sk src port",
"req_sk_sport:%u expected:0\n",
skel->bss->req_sk_sport))
goto done;
if (CHECK(!skel->bss->gen_cookie ||
skel->bss->gen_cookie != skel->bss->recv_cookie,
"Unexpected syncookie states",
"gen_cookie:%u recv_cookie:%u\n",
skel->bss->gen_cookie, skel->bss->recv_cookie))
goto done;
CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
skel->bss->linum);
done:
if (listen_fd != -1)
close(listen_fd);
if (cli_fd != -1)
close(cli_fd);
}
struct test {
const char *desc;
void (*run)(void);
};
#define DEF_TEST(name) { #name, test_##name }
static struct test tests[] = {
DEF_TEST(conn),
DEF_TEST(syncookie),
};
void test_btf_skc_cls_ingress(void)
{
int i, err;
skel = test_btf_skc_cls_ingress__open_and_load();
if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
return;
err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
if (CHECK(err, "bpf_program__pin",
"cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
test_btf_skc_cls_ingress__destroy(skel);
return;
}
for (i = 0; i < ARRAY_SIZE(tests); i++) {
if (!test__start_subtest(tests[i].desc))
continue;
if (prepare_netns())
break;
tests[i].run();
print_err_line();
reset_test();
}
bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
test_btf_skc_cls_ingress__destroy(skel);
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <sys/socket.h>
#include <sys/epoll.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
......@@ -12,22 +10,13 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <linux/compiler.h>
#include "network_helpers.h"
#include "cgroup_helpers.h"
#include "test_progs.h"
#include "bpf_rlimit.h"
enum bpf_addr_array_idx {
ADDR_SRV_IDX,
ADDR_CLI_IDX,
__NR_BPF_ADDR_ARRAY_IDX,
};
enum bpf_result_array_idx {
EGRESS_SRV_IDX,
EGRESS_CLI_IDX,
INGRESS_LISTEN_IDX,
__NR_BPF_RESULT_ARRAY_IDX,
};
#include "test_sock_fields.skel.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
......@@ -40,46 +29,24 @@ struct bpf_spinlock_cnt {
__u32 cnt;
};
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
printf(format); \
printf("\n"); \
exit(-1); \
} \
})
#define TEST_CGROUP "/test-bpf-sock-fields"
#define PARENT_CGROUP "/test-bpf-sock-fields"
#define CHILD_CGROUP "/test-bpf-sock-fields/child"
#define DATA "Hello BPF!"
#define DATA_LEN sizeof(DATA)
static struct sockaddr_in6 srv_sa6, cli_sa6;
static int sk_pkt_out_cnt10_fd;
struct test_sock_fields *skel;
static int sk_pkt_out_cnt_fd;
static __u64 parent_cg_id;
static __u64 child_cg_id;
static int linum_map_fd;
static int addr_map_fd;
static int tp_map_fd;
static int sk_map_fd;
static __u32 addr_srv_idx = ADDR_SRV_IDX;
static __u32 addr_cli_idx = ADDR_CLI_IDX;
static __u32 egress_srv_idx = EGRESS_SRV_IDX;
static __u32 egress_cli_idx = EGRESS_CLI_IDX;
static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
static __u32 duration;
static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
static void init_loopback6(struct sockaddr_in6 *sa6)
{
memset(sa6, 0, sizeof(*sa6));
sa6->sin6_family = AF_INET6;
sa6->sin6_addr = in6addr_loopback;
}
static void print_sk(const struct bpf_sock *sk)
static void print_sk(const struct bpf_sock *sk, const char *prefix)
{
char src_ip4[24], dst_ip4[24];
char src_ip6[64], dst_ip6[64];
......@@ -89,9 +56,10 @@ static void print_sk(const struct bpf_sock *sk)
inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
"src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
"dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
prefix,
sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
sk->mark, sk->priority,
sk->src_ip4, src_ip4,
......@@ -102,14 +70,15 @@ static void print_sk(const struct bpf_sock *sk)
dst_ip6, ntohs(sk->dst_port));
}
static void print_tp(const struct bpf_tcp_sock *tp)
static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
{
printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
"snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
"rate_delivered:%u rate_interval_us:%u packets_out:%u "
"retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
"segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
"bytes_received:%llu bytes_acked:%llu\n",
prefix,
tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
......@@ -129,57 +98,26 @@ static void check_result(void)
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
&ingress_linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno);
printf("listen_sk: ");
print_sk(&listen_sk);
printf("\n");
printf("srv_sk: ");
print_sk(&srv_sk);
printf("\n");
printf("cli_sk: ");
print_sk(&cli_sk);
printf("\n");
printf("listen_tp: ");
print_tp(&listen_tp);
printf("\n");
printf("srv_tp: ");
print_tp(&srv_tp);
printf("\n");
printf("cli_tp: ");
print_tp(&cli_tp);
printf("\n");
"err:%d errno:%d\n", err, errno);
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
print_sk(&listen_sk, "listen_sk");
print_sk(&srv_sk, "srv_sk");
print_sk(&cli_sk, "cli_sk");
print_tp(&listen_tp, "listen_tp");
print_tp(&srv_tp, "srv_tp");
print_tp(&cli_tp, "cli_tp");
CHECK(listen_sk.state != 10 ||
listen_sk.family != AF_INET6 ||
......@@ -190,8 +128,8 @@ static void check_result(void)
listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
listen_sk.dst_port,
"Unexpected listen_sk",
"Check listen_sk output. ingress_linum:%u",
"listen_sk",
"Unexpected. Check listen_sk output. ingress_linum:%u\n",
ingress_linum);
CHECK(srv_sk.state == 10 ||
......@@ -204,9 +142,11 @@ static void check_result(void)
sizeof(srv_sk.dst_ip6)) ||
srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
srv_sk.dst_port != cli_sa6.sin6_port,
"Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
"srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
egress_linum);
CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
CHECK(cli_sk.state == 10 ||
!cli_sk.state ||
cli_sk.family != AF_INET6 ||
......@@ -217,31 +157,40 @@ static void check_result(void)
sizeof(cli_sk.dst_ip6)) ||
cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
cli_sk.dst_port != srv_sa6.sin6_port,
"Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
"cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
egress_linum);
CHECK(listen_tp.data_segs_out ||
listen_tp.data_segs_in ||
listen_tp.total_retrans ||
listen_tp.bytes_acked,
"Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
"listen_tp",
"Unexpected. Check listen_tp output. ingress_linum:%u\n",
ingress_linum);
CHECK(srv_tp.data_segs_out != 2 ||
srv_tp.data_segs_in ||
srv_tp.snd_cwnd != 10 ||
srv_tp.total_retrans ||
srv_tp.bytes_acked != 2 * DATA_LEN,
"Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
srv_tp.bytes_acked < 2 * DATA_LEN,
"srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
egress_linum);
CHECK(cli_tp.data_segs_out ||
cli_tp.data_segs_in != 2 ||
cli_tp.snd_cwnd != 10 ||
cli_tp.total_retrans ||
cli_tp.bytes_received != 2 * DATA_LEN,
"Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
cli_tp.bytes_received < 2 * DATA_LEN,
"cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
egress_linum);
CHECK(skel->bss->parent_cg_id != parent_cg_id,
"parent_cg_id", "%zu != %zu\n",
(size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
CHECK(skel->bss->child_cg_id != child_cg_id,
"child_cg_id", "%zu != %zu\n",
(size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
}
static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
......@@ -257,15 +206,14 @@ static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
&pkt_out_cnt10);
/* The bpf prog only counts for fullsock and
* passive conneciton did not become fullsock until 3WHS
* had been finished.
* The bpf prog only counted two data packet out but we
* specially init accept_fd's pkt_out_cnt by 2 in
* init_sk_storage(). Hence, 4 here.
* passive connection did not become fullsock until 3WHS
* had been finished, so the bpf prog only counted two data
* packet out.
*/
CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
pkt_out_cnt10.cnt < 0xeB9F + 20,
"bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
"err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
"err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
pkt_out_cnt.cnt = ~0;
......@@ -280,14 +228,14 @@ static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
*
* The bpf_prog initialized it to 0xeB9F.
*/
CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
pkt_out_cnt10.cnt != 0xeB9F + 40,
CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
pkt_out_cnt10.cnt < 0xeB9F + 40,
"bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
"err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
"err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
}
static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
{
struct bpf_spinlock_cnt scnt = {};
int err;
......@@ -295,188 +243,140 @@ static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
scnt.cnt = pkt_out_cnt;
err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
BPF_NOEXIST);
CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
"err:%d errno:%d", err, errno);
if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
"err:%d errno:%d\n", err, errno))
return err;
scnt.cnt *= 10;
err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
BPF_NOEXIST);
CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
"err:%d errno:%d", err, errno);
if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
"err:%d errno:%d\n", err, errno))
return err;
return 0;
}
static void test(void)
{
int listen_fd, cli_fd, accept_fd, epfd, err;
struct epoll_event ev;
socklen_t addrlen;
int i;
addrlen = sizeof(struct sockaddr_in6);
ev.events = EPOLLIN;
epfd = epoll_create(1);
CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
socklen_t addrlen = sizeof(struct sockaddr_in6);
char buf[DATA_LEN];
/* Prepare listen_fd */
listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
listen_fd, errno);
init_loopback6(&srv_sa6);
err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
/* start_server() has logged the error details */
if (CHECK_FAIL(listen_fd == -1))
goto done;
err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
err = listen(listen_fd, 1);
CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
/* Prepare cli_fd */
cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
errno))
goto done;
memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
init_loopback6(&cli_sa6);
err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
cli_fd = connect_to_fd(listen_fd, 0);
if (CHECK_FAIL(cli_fd == -1))
goto done;
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
err, errno);
/* Update addr_map with srv_sa6 and cli_sa6 */
err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
/* Connect from cli_sa6 to srv_sa6 */
err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
CHECK(err && errno != EINPROGRESS,
"connect(cli_fd)", "err:%d errno:%d", err, errno);
ev.data.fd = listen_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
err, errno);
/* Accept the connection */
/* Have some timeout in accept(listen_fd). Just in case. */
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != listen_fd,
"epoll_wait(listen_fd)",
"err:%d errno:%d ev.data.fd:%d listen_fd:%d",
err, errno, ev.data.fd, listen_fd);
if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno))
goto done;
accept_fd = accept(listen_fd, NULL, NULL);
CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
accept_fd, errno);
close(listen_fd);
ev.data.fd = cli_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
err, errno);
if (CHECK(accept_fd == -1, "accept(listen_fd)",
"accept_fd:%d errno:%d\n",
accept_fd, errno))
goto done;
init_sk_storage(accept_fd, 2);
if (init_sk_storage(accept_fd, 0xeB9F))
goto done;
for (i = 0; i < 2; i++) {
/* Send some data from accept_fd to cli_fd */
err = send(accept_fd, DATA, DATA_LEN, 0);
CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
err, errno);
/* Have some timeout in recv(cli_fd). Just in case. */
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != cli_fd,
"epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
err, errno, ev.data.fd, cli_fd);
err = recv(cli_fd, NULL, 0, MSG_TRUNC);
CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
/* Send some data from accept_fd to cli_fd.
* MSG_EOR to stop kernel from coalescing two pkts.
*/
err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
if (CHECK(err != DATA_LEN, "send(accept_fd)",
"err:%d errno:%d\n", err, errno))
goto done;
err = recv(cli_fd, buf, DATA_LEN, 0);
if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
err, errno))
goto done;
}
shutdown(cli_fd, SHUT_WR);
err = recv(accept_fd, buf, 1, 0);
if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
err, errno))
goto done;
shutdown(accept_fd, SHUT_WR);
err = recv(cli_fd, buf, 1, 0);
if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
err, errno))
goto done;
check_sk_pkt_out_cnt(accept_fd, cli_fd);
check_result();
close(epfd);
done:
if (accept_fd != -1)
close(accept_fd);
if (cli_fd != -1)
close(cli_fd);
check_result();
if (listen_fd != -1)
close(listen_fd);
}
int main(int argc, char **argv)
void test_sock_fields(void)
{
struct bpf_prog_load_attr attr = {
.file = "test_sock_fields_kern.o",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.prog_flags = BPF_F_TEST_RND_HI32,
};
int cgroup_fd, egress_fd, ingress_fd, err;
struct bpf_program *ingress_prog;
struct bpf_object *obj;
struct bpf_map *map;
struct bpf_link *egress_link = NULL, *ingress_link = NULL;
int parent_cg_fd = -1, child_cg_fd = -1;
/* Create a cgroup, get fd, and join it */
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
"cgroup_fd:%d errno:%d", cgroup_fd, errno);
atexit(cleanup_cgroup_environment);
err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
ingress_prog = bpf_object__find_program_by_title(obj,
"cgroup_skb/ingress");
CHECK(!ingress_prog,
"bpf_object__find_program_by_title(cgroup_skb/ingress)",
"not found");
ingress_fd = bpf_program__fd(ingress_prog);
err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
"err:%d errno%d", err, errno);
err = bpf_prog_attach(ingress_fd, cgroup_fd,
BPF_CGROUP_INET_INGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
"err:%d errno%d", err, errno);
close(cgroup_fd);
map = bpf_object__find_map_by_name(obj, "addr_map");
CHECK(!map, "cannot find addr_map", "(null)");
addr_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "sock_result_map");
CHECK(!map, "cannot find sock_result_map", "(null)");
sk_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
tp_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "linum_map");
CHECK(!map, "cannot find linum_map", "(null)");
linum_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
sk_pkt_out_cnt_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
sk_pkt_out_cnt10_fd = bpf_map__fd(map);
parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
if (CHECK_FAIL(parent_cg_fd < 0))
return;
parent_cg_id = get_cgroup_id(PARENT_CGROUP);
if (CHECK_FAIL(!parent_cg_id))
goto done;
child_cg_fd = test__join_cgroup(CHILD_CGROUP);
if (CHECK_FAIL(child_cg_fd < 0))
goto done;
child_cg_id = get_cgroup_id(CHILD_CGROUP);
if (CHECK_FAIL(!child_cg_id))
goto done;
skel = test_sock_fields__open_and_load();
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
goto done;
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
child_cg_fd);
if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
PTR_ERR(egress_link)))
goto done;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
child_cg_fd);
if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
PTR_ERR(ingress_link)))
goto done;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
test();
bpf_object__close(obj);
cleanup_cgroup_environment();
printf("PASS\n");
return 0;
done:
bpf_link__destroy(egress_link);
bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel);
if (child_cg_fd != -1)
close(child_cg_fd);
if (parent_cg_fd != -1)
close(parent_cg_fd);
}
......@@ -15,6 +15,8 @@
*/
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
......
......@@ -9,6 +9,8 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <string.h>
#include <errno.h>
#include <netinet/in.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tcp_helpers.h"
struct sockaddr_in6 srv_sa6 = {};
__u16 listen_tp_sport = 0;
__u16 req_sk_sport = 0;
__u32 recv_cookie = 0;
__u32 gen_cookie = 0;
__u32 linum = 0;
#define LOG() ({ if (!linum) linum = __LINE__; })
static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
struct tcp_sock *tp,
struct __sk_buff *skb)
{
if (th->syn) {
__s64 mss_cookie;
void *data_end;
data_end = (void *)(long)(skb->data_end);
if (th->doff * 4 != 40) {
LOG();
return;
}
if ((void *)th + 40 > data_end) {
LOG();
return;
}
mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
th, 40);
if (mss_cookie < 0) {
if (mss_cookie != -ENOENT)
LOG();
} else {
gen_cookie = (__u32)mss_cookie;
}
} else if (gen_cookie) {
/* It was in cookie mode */
int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
th, sizeof(*th));
if (ret < 0) {
if (ret != -ENOENT)
LOG();
} else {
recv_cookie = bpf_ntohl(th->ack_seq) - 1;
}
}
}
static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
{
struct bpf_sock_tuple *tuple;
struct bpf_sock *bpf_skc;
unsigned int tuple_len;
struct tcphdr *th;
void *data_end;
data_end = (void *)(long)(skb->data_end);
th = (struct tcphdr *)(ip6h + 1);
if (th + 1 > data_end)
return TC_ACT_OK;
/* Is it the testing traffic? */
if (th->dest != srv_sa6.sin6_port)
return TC_ACT_OK;
tuple_len = sizeof(tuple->ipv6);
tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
if ((void *)tuple + tuple_len > data_end) {
LOG();
return TC_ACT_OK;
}
bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
BPF_F_CURRENT_NETNS, 0);
if (!bpf_skc) {
LOG();
return TC_ACT_OK;
}
if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
struct request_sock *req_sk;
req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
if (!req_sk) {
LOG();
goto release;
}
if (bpf_sk_assign(skb, req_sk, 0)) {
LOG();
goto release;
}
req_sk_sport = req_sk->__req_common.skc_num;
bpf_sk_release(req_sk);
return TC_ACT_OK;
} else if (bpf_skc->state == BPF_TCP_LISTEN) {
struct tcp_sock *tp;
tp = bpf_skc_to_tcp_sock(bpf_skc);
if (!tp) {
LOG();
goto release;
}
if (bpf_sk_assign(skb, tp, 0)) {
LOG();
goto release;
}
listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
test_syncookie_helper(ip6h, th, tp, skb);
bpf_sk_release(tp);
return TC_ACT_OK;
}
if (bpf_sk_assign(skb, bpf_skc, 0))
LOG();
release:
bpf_sk_release(bpf_skc);
return TC_ACT_OK;
}
SEC("classifier/ingress")
int cls_ingress(struct __sk_buff *skb)
{
struct ipv6hdr *ip6h;
struct ethhdr *eth;
void *data_end;
data_end = (void *)(long)(skb->data_end);
eth = (struct ethhdr *)(long)(skb->data);
if (eth + 1 > data_end)
return TC_ACT_OK;
if (eth->h_proto != bpf_htons(ETH_P_IPV6))
return TC_ACT_OK;
ip6h = (struct ipv6hdr *)(eth + 1);
if (ip6h + 1 > data_end)
return TC_ACT_OK;
if (ip6h->nexthdr == IPPROTO_TCP)
return handle_ip6_tcp(ip6h, skb);
return TC_ACT_OK;
}
char _license[] SEC("license") = "GPL";
......@@ -7,19 +7,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
enum bpf_addr_array_idx {
ADDR_SRV_IDX,
ADDR_CLI_IDX,
__NR_BPF_ADDR_ARRAY_IDX,
};
enum bpf_result_array_idx {
EGRESS_SRV_IDX,
EGRESS_CLI_IDX,
INGRESS_LISTEN_IDX,
__NR_BPF_RESULT_ARRAY_IDX,
};
#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
......@@ -27,27 +15,6 @@ enum bpf_linum_array_idx {
__NR_BPF_LINUM_ARRAY_IDX,
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
__type(key, __u32);
__type(value, struct sockaddr_in6);
} addr_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
__type(key, __u32);
__type(value, struct bpf_sock);
} sock_result_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
__type(key, __u32);
__type(value, struct bpf_tcp_sock);
} tcp_sock_result_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
......@@ -74,6 +41,17 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {};
struct bpf_tcp_sock srv_tp = {};
struct bpf_sock listen_sk = {};
struct bpf_sock srv_sk = {};
struct bpf_sock cli_sk = {};
__u64 parent_cg_id = 0;
__u64 child_cg_id = 0;
__u64 lsndtime = 0;
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
......@@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst,
dst->bytes_acked = src->bytes_acked;
}
#define RETURN { \
/* Always return CG_OK so that no pkt will be filtered out */
#define CG_OK 1
#define RET_LOG() ({ \
linum = __LINE__; \
bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
return 1; \
}
bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
return CG_OK; \
})
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
__u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, linum_idx;
struct tcp_sock *ktp;
linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk;
if (!sk || sk->state == 10)
RETURN;
if (!sk)
RET_LOG();
/* Not the testing egress traffic or
* TCP_LISTEN (10) socket will be copied at the ingress side.
*/
if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
sk->state == 10)
return CG_OK;
if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
/* Server socket */
sk_ret = &srv_sk;
tp_ret = &srv_tp;
} else if (sk->dst_port == srv_sa6.sin6_port) {
/* Client socket */
sk_ret = &cli_sk;
tp_ret = &cli_tp;
} else {
/* Not the testing egress traffic */
return CG_OK;
}
/* It must be a fullsock for cgroup_skb/egress prog */
sk = bpf_sk_fullsock(sk);
if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
!is_loopback6(sk->src_ip6))
RETURN;
if (!sk)
RET_LOG();
/* Not the testing egress traffic */
if (sk->protocol != IPPROTO_TCP)
return CG_OK;
tp = bpf_tcp_sock(sk);
if (!tp)
RETURN;
RET_LOG();
srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
if (!srv_sa6 || !cli_sa6)
RETURN;
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
result_idx = EGRESS_SRV_IDX;
else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
result_idx = EGRESS_CLI_IDX;
else
RETURN;
if (sk_ret == &srv_sk) {
ktp = bpf_skc_to_tcp_sock(sk);
sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
if (!sk_ret || !tp_ret)
RETURN;
if (!ktp)
RET_LOG();
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
lsndtime = ktp->lsndtime;
child_cg_id = bpf_sk_cgroup_id(ktp);
if (!child_cg_id)
RET_LOG();
parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
if (!parent_cg_id)
RET_LOG();
if (result_idx == EGRESS_SRV_IDX) {
/* The userspace has created it for srv sk */
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
0, 0);
} else {
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
......@@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb)
}
if (!pkt_out_cnt || !pkt_out_cnt10)
RETURN;
RET_LOG();
/* Even both cnt and cnt10 have lock defined in their BTF,
* intentionally one cnt takes lock while one does not
......@@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb)
pkt_out_cnt10->cnt += 10;
bpf_spin_unlock(&pkt_out_cnt10->lock);
RETURN;
return CG_OK;
}
SEC("cgroup_skb/ingress")
int ingress_read_sock_fields(struct __sk_buff *skb)
{
__u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
struct sockaddr_in6 *srv_sa6;
struct bpf_tcp_sock *tp;
__u32 linum, linum_idx;
struct bpf_sock *sk;
linum_idx = INGRESS_LINUM_IDX;
sk = skb->sk;
if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
RETURN;
if (!sk)
RET_LOG();
srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
RETURN;
/* Not the testing ingress traffic to the server */
if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
return CG_OK;
if (sk->state != 10 && sk->state != 12)
RETURN;
/* Only interested in TCP_LISTEN */
if (sk->state != 10)
return CG_OK;
sk = bpf_get_listener_sock(sk);
/* It must be a fullsock for cgroup_skb/ingress prog */
sk = bpf_sk_fullsock(sk);
if (!sk)
RETURN;
RET_LOG();
tp = bpf_tcp_sock(sk);
if (!tp)
RETURN;
sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
if (!sk_ret || !tp_ret)
RETURN;
RET_LOG();
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
skcpy(&listen_sk, sk);
tpcpy(&listen_tp, tp);
RETURN;
return CG_OK;
}
char _license[] SEC("license") = "GPL";
......@@ -854,3 +854,50 @@
.errstr = "Unreleased reference",
.result = REJECT,
},
{
"reference tracking: bpf_sk_release(btf_tcp_sock)",
.insns = {
BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "unknown func",
},
{
"reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
.insns = {
BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
.result_unpriv = REJECT,
.errstr_unpriv = "unknown func",
},
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment