Commit f48a9205 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'fix-fullsock-access-after-bpf_sk_release'

Martin KaFai Lau says:

====================
This set addresses issue about accessing invalid
ptr returned from bpf_tcp_sock() and bpf_sk_fullsock()
after bpf_sk_release().

v4:
- Tried the one "id" approach.  It does not work well and the reason is in
  the Patch 1 commit message.
- Rename refcount_id to ref_obj_id.
- With ref_obj_id, resetting reg->id to 0 is fine in mark_ptr_or_null_reg()
  because ref_obj_id is passed to release_reference() instead of reg->id.
- Also reset reg->ref_obj_id in mark_ptr_or_null_reg() when is_null == true
- sk_to_full_sk() is removed from bpf_sk_fullsock() and bpf_tcp_sock().
- bpf_get_listener_sock() is added to do sk_to_full_sk() in Patch 2.
- If tp is from bpf_tcp_sock(sk) and sk is a refcounted ptr,
  bpf_sk_release(tp) is also allowed.

v3:
- reset reg->refcount_id for the is_null case in mark_ptr_or_null_reg()

v2:
- Remove refcount_id arg from release_reference() because
  id == refcount_id
- Add a WARN_ON_ONCE to mark_ptr_or_null_regs() to catch
  an internal verifier bug.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6bf21b54 7681e7b2
...@@ -193,7 +193,6 @@ enum bpf_arg_type { ...@@ -193,7 +193,6 @@ enum bpf_arg_type {
ARG_PTR_TO_CTX, /* pointer to context */ ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
}; };
......
...@@ -66,6 +66,46 @@ struct bpf_reg_state { ...@@ -66,6 +66,46 @@ struct bpf_reg_state {
* same reference to the socket, to determine proper reference freeing. * same reference to the socket, to determine proper reference freeing.
*/ */
u32 id; u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
* from a pointer-cast helper, bpf_sk_fullsock() and
* bpf_tcp_sock().
*
* Consider the following where "sk" is a reference counted
* pointer returned from "sk = bpf_sk_lookup_tcp();":
*
* 1: sk = bpf_sk_lookup_tcp();
* 2: if (!sk) { return 0; }
* 3: fullsock = bpf_sk_fullsock(sk);
* 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
* 5: tp = bpf_tcp_sock(fullsock);
* 6: if (!tp) { bpf_sk_release(sk); return 0; }
* 7: bpf_sk_release(sk);
* 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
*
* After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
* "tp" ptr should be invalidated also. In order to do that,
* the reg holding "fullsock" and "sk" need to remember
* the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
* such that the verifier can reset all regs which have
* ref_obj_id matching the sk_reg->id.
*
* sk_reg->ref_obj_id is set to sk_reg->id at line 1.
* sk_reg->id will stay as NULL-marking purpose only.
* After NULL-marking is done, sk_reg->id can be reset to 0.
*
* After "fullsock = bpf_sk_fullsock(sk);" at line 3,
* fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
*
* After "tp = bpf_tcp_sock(fullsock);" at line 5,
* tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
* which is the same as sk_reg->ref_obj_id.
*
* From the verifier perspective, if sk, fullsock and tp
* are not NULL, they are the same ptr with different
* reg->type. In particular, bpf_sk_release(tp) is also
* allowed and has the same effect as bpf_sk_release(sk).
*/
u32 ref_obj_id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of /* For scalar types (SCALAR_VALUE), this represents our knowledge of
* the actual value. * the actual value.
* For pointer types, this represents the variable part of the offset * For pointer types, this represents the variable part of the offset
......
...@@ -2366,6 +2366,14 @@ union bpf_attr { ...@@ -2366,6 +2366,14 @@ union bpf_attr {
* current value is ect (ECN capable). Works with IPv6 and IPv4. * current value is ect (ECN capable). Works with IPv6 and IPv4.
* Return * Return
* 1 if set, 0 if not set. * 1 if set, 0 if not set.
*
* struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
* Description
* Return a **struct bpf_sock** pointer in TCP_LISTEN state.
* bpf_sk_release() is unnecessary and not allowed.
* Return
* A **struct bpf_sock** pointer on success, or NULL in
* case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2465,7 +2473,8 @@ union bpf_attr { ...@@ -2465,7 +2473,8 @@ union bpf_attr {
FN(spin_unlock), \ FN(spin_unlock), \
FN(sk_fullsock), \ FN(sk_fullsock), \
FN(tcp_sock), \ FN(tcp_sock), \
FN(skb_ecn_set_ce), FN(skb_ecn_set_ce), \
FN(get_listener_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -212,7 +212,7 @@ struct bpf_call_arg_meta { ...@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
int access_size; int access_size;
s64 msize_smax_value; s64 msize_smax_value;
u64 msize_umax_value; u64 msize_umax_value;
int ptr_id; int ref_obj_id;
int func_id; int func_id;
}; };
...@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) ...@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK_OR_NULL; type == PTR_TO_TCP_SOCK_OR_NULL;
} }
static bool type_is_refcounted(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET;
}
static bool type_is_refcounted_or_null(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
}
static bool reg_is_refcounted(const struct bpf_reg_state *reg)
{
return type_is_refcounted(reg->type);
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{ {
return reg->type == PTR_TO_MAP_VALUE && return reg->type == PTR_TO_MAP_VALUE &&
map_value_has_spin_lock(reg->map_ptr); map_value_has_spin_lock(reg->map_ptr);
} }
static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{ {
return type_is_refcounted_or_null(reg->type); return type == ARG_PTR_TO_SOCK_COMMON;
}
static bool arg_type_is_refcounted(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_SOCKET;
} }
/* Determine whether the function releases some resources allocated by another /* Determine whether the function releases some resources allocated by another
...@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id) ...@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp; func_id == BPF_FUNC_sk_lookup_udp;
} }
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
func_id == BPF_FUNC_sk_fullsock;
}
/* string representation of 'enum bpf_reg_type' */ /* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = { static const char * const reg_type_str[] = {
[NOT_INIT] = "?", [NOT_INIT] = "?",
...@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, ...@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (t == PTR_TO_STACK) if (t == PTR_TO_STACK)
verbose(env, ",call_%d", func(env, reg)->callsite); verbose(env, ",call_%d", func(env, reg)->callsite);
} else { } else {
verbose(env, "(id=%d", reg->id); verbose(env, "(id=%d ref_obj_id=%d", reg->id,
reg->ref_obj_id);
if (t != SCALAR_VALUE) if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off); verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t)) if (type_is_pkt_pointer(t))
...@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, ...@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type)) if (!type_is_sk_pointer(type))
goto err_type; goto err_type;
} else if (arg_type == ARG_PTR_TO_SOCKET) { if (reg->ref_obj_id) {
expected_type = PTR_TO_SOCKET; if (meta->ref_obj_id) {
if (type != expected_type) verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
goto err_type; regno, reg->ref_obj_id,
if (meta->ptr_id || !reg->id) { meta->ref_obj_id);
verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", return -EFAULT;
meta->ptr_id, reg->id); }
return -EFAULT; meta->ref_obj_id = reg->ref_obj_id;
} }
meta->ptr_id = reg->id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) { if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true)) if (process_spin_lock(env, regno, true))
...@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) ...@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true; return true;
} }
static bool check_refcount_ok(const struct bpf_func_proto *fn) static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{ {
int count = 0; int count = 0;
if (arg_type_is_refcounted(fn->arg1_type)) if (arg_type_may_be_refcounted(fn->arg1_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg2_type)) if (arg_type_may_be_refcounted(fn->arg2_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg3_type)) if (arg_type_may_be_refcounted(fn->arg3_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg4_type)) if (arg_type_may_be_refcounted(fn->arg4_type))
count++; count++;
if (arg_type_is_refcounted(fn->arg5_type)) if (arg_type_may_be_refcounted(fn->arg5_type))
count++; count++;
/* A reference acquiring function cannot acquire
* another refcounted ptr.
*/
if (is_acquire_function(func_id) && count)
return false;
/* We only support one arg being unreferenced at the moment, /* We only support one arg being unreferenced at the moment,
* which is sufficient for the helper functions we have right now. * which is sufficient for the helper functions we have right now.
*/ */
return count <= 1; return count <= 1;
} }
static int check_func_proto(const struct bpf_func_proto *fn) static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{ {
return check_raw_mode_ok(fn) && return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) && check_arg_pair_ok(fn) &&
check_refcount_ok(fn) ? 0 : -EINVAL; check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
} }
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
...@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) ...@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
} }
static void release_reg_references(struct bpf_verifier_env *env, static void release_reg_references(struct bpf_verifier_env *env,
struct bpf_func_state *state, int id) struct bpf_func_state *state,
int ref_obj_id)
{ {
struct bpf_reg_state *regs = state->regs, *reg; struct bpf_reg_state *regs = state->regs, *reg;
int i; int i;
for (i = 0; i < MAX_BPF_REG; i++) for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].id == id) if (regs[i].ref_obj_id == ref_obj_id)
mark_reg_unknown(env, regs, i); mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) { bpf_for_each_spilled_reg(i, state, reg) {
if (!reg) if (!reg)
continue; continue;
if (reg_is_refcounted(reg) && reg->id == id) if (reg->ref_obj_id == ref_obj_id)
__mark_reg_unknown(reg); __mark_reg_unknown(reg);
} }
} }
...@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env, ...@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
* resources. Identify all copies of the same pointer and clear the reference. * resources. Identify all copies of the same pointer and clear the reference.
*/ */
static int release_reference(struct bpf_verifier_env *env, static int release_reference(struct bpf_verifier_env *env,
struct bpf_call_arg_meta *meta) int ref_obj_id)
{ {
struct bpf_verifier_state *vstate = env->cur_state; struct bpf_verifier_state *vstate = env->cur_state;
int err;
int i; int i;
err = release_reference_state(cur_func(env), ref_obj_id);
if (err)
return err;
for (i = 0; i <= vstate->curframe; i++) for (i = 0; i <= vstate->curframe; i++)
release_reg_references(env, vstate->frame[i], meta->ptr_id); release_reg_references(env, vstate->frame[i], ref_obj_id);
return release_reference_state(cur_func(env), meta->ptr_id); return 0;
} }
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
...@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta)); memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access; meta.pkt_access = fn->pkt_access;
err = check_func_proto(fn); err = check_func_proto(fn, func_id);
if (err) { if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n", verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id); func_id_name(func_id), func_id);
...@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err; return err;
} }
} else if (is_release_function(func_id)) { } else if (is_release_function(func_id)) {
err = release_reference(env, &meta); err = release_reference(env, meta.ref_obj_id);
if (err) { if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n", verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id); func_id_name(func_id), func_id);
...@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (id < 0) if (id < 0)
return id; return id;
/* For release_reference() */ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id; regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
} else { } else {
/* For mark_ptr_or_null_reg() */ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].id = ++env->id_gen;
...@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL; return -EINVAL;
} }
if (is_ptr_cast_function(func_id))
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
do_refine_retval_range(regs, fn->ret_type, func_id, &meta); do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id); err = check_map_func_compatibility(env, meta.map_ptr, func_id);
...@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, ...@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK; reg->type = PTR_TO_TCP_SOCK;
} }
if (is_null || !(reg_is_refcounted(reg) || if (is_null) {
reg_may_point_to_spin_lock(reg))) { /* We don't need id and ref_obj_id from this point
/* We don't need id from this point onwards anymore, * onwards anymore, thus we should better reset it,
* thus we should better reset it, so that state * so that state pruning has chances to take effect.
* pruning has chances to take effect. */
reg->id = 0;
reg->ref_obj_id = 0;
} else if (!reg_may_point_to_spin_lock(reg)) {
/* For not-NULL ptr, reg->ref_obj_id will be reset
* in release_reg_references().
*
* reg->id is still used by spin_lock ptr. Other
* than spin_lock ptr type, reg->id can be reset.
*/ */
reg->id = 0; reg->id = 0;
} }
...@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, ...@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
{ {
struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg, *regs = state->regs; struct bpf_reg_state *reg, *regs = state->regs;
u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id; u32 id = regs[regno].id;
int i, j; int i, j;
if (reg_is_refcounted_or_null(&regs[regno]) && is_null) if (ref_obj_id && ref_obj_id == id && is_null)
release_reference_state(state, id); /* regs[regno] is in the " == NULL" branch.
* No one could have freed the reference state before
* doing the NULL check.
*/
WARN_ON_ONCE(release_reference_state(state, id));
for (i = 0; i < MAX_BPF_REG; i++) for (i = 0; i < MAX_BPF_REG; i++)
mark_ptr_or_null_reg(state, &regs[i], id, is_null); mark_ptr_or_null_reg(state, &regs[i], id, is_null);
......
...@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { ...@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{ {
sk = sk_to_full_sk(sk);
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
} }
...@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { ...@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release, .func = bpf_sk_release,
.gpl_only = false, .gpl_only = false,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET, .arg1_type = ARG_PTR_TO_SOCK_COMMON,
}; };
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
...@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, ...@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{ {
sk = sk_to_full_sk(sk);
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk; return (unsigned long)sk;
...@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = { ...@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
.arg1_type = ARG_PTR_TO_SOCK_COMMON, .arg1_type = ARG_PTR_TO_SOCK_COMMON,
}; };
BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
sk = sk_to_full_sk(sk);
if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
return (unsigned long)sk;
return (unsigned long)NULL;
}
static const struct bpf_func_proto bpf_get_listener_sock_proto = {
.func = bpf_get_listener_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{ {
unsigned int iphdr_len; unsigned int iphdr_len;
...@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_INET #ifdef CONFIG_INET
case BPF_FUNC_tcp_sock: case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto; return &bpf_tcp_sock_proto;
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
case BPF_FUNC_skb_ecn_set_ce: case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto; return &bpf_skb_ecn_set_ce_proto;
#endif #endif
...@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_release_proto; return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock: case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto; return &bpf_tcp_sock_proto;
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
#endif #endif
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
......
...@@ -2366,6 +2366,14 @@ union bpf_attr { ...@@ -2366,6 +2366,14 @@ union bpf_attr {
* current value is ect (ECN capable). Works with IPv6 and IPv4. * current value is ect (ECN capable). Works with IPv6 and IPv4.
* Return * Return
* 1 if set, 0 if not set. * 1 if set, 0 if not set.
*
* struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
* Description
* Return a **struct bpf_sock** pointer in TCP_LISTEN state.
* bpf_sk_release() is unnecessary and not allowed.
* Return
* A **struct bpf_sock** pointer on success, or NULL in
* case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2465,7 +2473,8 @@ union bpf_attr { ...@@ -2465,7 +2473,8 @@ union bpf_attr {
FN(spin_unlock), \ FN(spin_unlock), \
FN(sk_fullsock), \ FN(sk_fullsock), \
FN(tcp_sock), \ FN(tcp_sock), \
FN(skb_ecn_set_ce), FN(skb_ecn_set_ce), \
FN(get_listener_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = ...@@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_sk_fullsock; (void *) BPF_FUNC_sk_fullsock;
static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_tcp_sock; (void *) BPF_FUNC_tcp_sock;
static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_get_listener_sock;
static int (*bpf_skb_ecn_set_ce)(void *ctx) = static int (*bpf_skb_ecn_set_ce)(void *ctx) =
(void *) BPF_FUNC_skb_ecn_set_ce; (void *) BPF_FUNC_skb_ecn_set_ce;
......
...@@ -8,38 +8,51 @@ ...@@ -8,38 +8,51 @@
#include "bpf_helpers.h" #include "bpf_helpers.h"
#include "bpf_endian.h" #include "bpf_endian.h"
enum bpf_array_idx { enum bpf_addr_array_idx {
SRV_IDX, ADDR_SRV_IDX,
CLI_IDX, ADDR_CLI_IDX,
__NR_BPF_ARRAY_IDX, __NR_BPF_ADDR_ARRAY_IDX,
};
enum bpf_result_array_idx {
EGRESS_SRV_IDX,
EGRESS_CLI_IDX,
INGRESS_LISTEN_IDX,
__NR_BPF_RESULT_ARRAY_IDX,
};
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
}; };
struct bpf_map_def SEC("maps") addr_map = { struct bpf_map_def SEC("maps") addr_map = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32), .key_size = sizeof(__u32),
.value_size = sizeof(struct sockaddr_in6), .value_size = sizeof(struct sockaddr_in6),
.max_entries = __NR_BPF_ARRAY_IDX, .max_entries = __NR_BPF_ADDR_ARRAY_IDX,
}; };
struct bpf_map_def SEC("maps") sock_result_map = { struct bpf_map_def SEC("maps") sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32), .key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_sock), .value_size = sizeof(struct bpf_sock),
.max_entries = __NR_BPF_ARRAY_IDX, .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
}; };
struct bpf_map_def SEC("maps") tcp_sock_result_map = { struct bpf_map_def SEC("maps") tcp_sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32), .key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_tcp_sock), .value_size = sizeof(struct bpf_tcp_sock),
.max_entries = __NR_BPF_ARRAY_IDX, .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
}; };
struct bpf_map_def SEC("maps") linum_map = { struct bpf_map_def SEC("maps") linum_map = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32), .key_size = sizeof(__u32),
.value_size = sizeof(__u32), .value_size = sizeof(__u32),
.max_entries = 1, .max_entries = __NR_BPF_LINUM_ARRAY_IDX,
}; };
static bool is_loopback6(__u32 *a6) static bool is_loopback6(__u32 *a6)
...@@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst, ...@@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst,
#define RETURN { \ #define RETURN { \
linum = __LINE__; \ linum = __LINE__; \
bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
return 1; \ return 1; \
} }
SEC("cgroup_skb/egress") SEC("cgroup_skb/egress")
int read_sock_fields(struct __sk_buff *skb) int egress_read_sock_fields(struct __sk_buff *skb)
{ {
__u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
struct sockaddr_in6 *srv_sa6, *cli_sa6; struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret; struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret; struct bpf_sock *sk, *sk_ret;
__u32 linum, idx0 = 0; __u32 linum, linum_idx;
linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk; sk = skb->sk;
if (!sk || sk->state == 10) if (!sk || sk->state == 10)
...@@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb) ...@@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb)
RETURN; RETURN;
if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
idx = srv_idx; result_idx = EGRESS_SRV_IDX;
else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
idx = cli_idx; result_idx = EGRESS_CLI_IDX;
else else
RETURN; RETURN;
sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
if (!sk_ret || !tp_ret)
RETURN;
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
RETURN;
}
SEC("cgroup_skb/ingress")
int ingress_read_sock_fields(struct __sk_buff *skb)
{
__u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
struct sockaddr_in6 *srv_sa6;
__u32 linum, linum_idx;
linum_idx = INGRESS_LINUM_IDX;
sk = skb->sk;
if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
RETURN;
srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
RETURN;
if (sk->state != 10 && sk->state != 12)
RETURN;
sk = bpf_get_listener_sock(sk);
if (!sk)
RETURN;
tp = bpf_tcp_sock(sk);
if (!tp)
RETURN;
sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
if (!sk_ret || !tp_ret) if (!sk_ret || !tp_ret)
RETURN; RETURN;
......
...@@ -16,10 +16,23 @@ ...@@ -16,10 +16,23 @@
#include "cgroup_helpers.h" #include "cgroup_helpers.h"
#include "bpf_rlimit.h" #include "bpf_rlimit.h"
enum bpf_array_idx { enum bpf_addr_array_idx {
SRV_IDX, ADDR_SRV_IDX,
CLI_IDX, ADDR_CLI_IDX,
__NR_BPF_ARRAY_IDX, __NR_BPF_ADDR_ARRAY_IDX,
};
enum bpf_result_array_idx {
EGRESS_SRV_IDX,
EGRESS_CLI_IDX,
INGRESS_LISTEN_IDX,
__NR_BPF_RESULT_ARRAY_IDX,
};
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
}; };
#define CHECK(condition, tag, format...) ({ \ #define CHECK(condition, tag, format...) ({ \
...@@ -41,8 +54,16 @@ static int linum_map_fd; ...@@ -41,8 +54,16 @@ static int linum_map_fd;
static int addr_map_fd; static int addr_map_fd;
static int tp_map_fd; static int tp_map_fd;
static int sk_map_fd; static int sk_map_fd;
static __u32 srv_idx = SRV_IDX;
static __u32 cli_idx = CLI_IDX; static __u32 addr_srv_idx = ADDR_SRV_IDX;
static __u32 addr_cli_idx = ADDR_CLI_IDX;
static __u32 egress_srv_idx = EGRESS_SRV_IDX;
static __u32 egress_cli_idx = EGRESS_CLI_IDX;
static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
static void init_loopback6(struct sockaddr_in6 *sa6) static void init_loopback6(struct sockaddr_in6 *sa6)
{ {
...@@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp) ...@@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp)
static void check_result(void) static void check_result(void)
{ {
struct bpf_tcp_sock srv_tp, cli_tp; struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
struct bpf_sock srv_sk, cli_sk; struct bpf_sock srv_sk, cli_sk, listen_sk;
__u32 linum, idx0 = 0; __u32 ingress_linum, egress_linum;
int err; int err;
err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno); "err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", &ingress_linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno); "err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno); "err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno); "err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno); "err:%d errno:%d", err, errno);
printf("listen_sk: ");
print_sk(&listen_sk);
printf("\n");
printf("srv_sk: "); printf("srv_sk: ");
print_sk(&srv_sk); print_sk(&srv_sk);
printf("\n"); printf("\n");
...@@ -124,6 +162,10 @@ static void check_result(void) ...@@ -124,6 +162,10 @@ static void check_result(void)
print_sk(&cli_sk); print_sk(&cli_sk);
printf("\n"); printf("\n");
printf("listen_tp: ");
print_tp(&listen_tp);
printf("\n");
printf("srv_tp: "); printf("srv_tp: ");
print_tp(&srv_tp); print_tp(&srv_tp);
printf("\n"); printf("\n");
...@@ -132,6 +174,19 @@ static void check_result(void) ...@@ -132,6 +174,19 @@ static void check_result(void)
print_tp(&cli_tp); print_tp(&cli_tp);
printf("\n"); printf("\n");
CHECK(listen_sk.state != 10 ||
listen_sk.family != AF_INET6 ||
listen_sk.protocol != IPPROTO_TCP ||
memcmp(listen_sk.src_ip6, &in6addr_loopback,
sizeof(listen_sk.src_ip6)) ||
listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
listen_sk.dst_port,
"Unexpected listen_sk",
"Check listen_sk output. ingress_linum:%u",
ingress_linum);
CHECK(srv_sk.state == 10 || CHECK(srv_sk.state == 10 ||
!srv_sk.state || !srv_sk.state ||
srv_sk.family != AF_INET6 || srv_sk.family != AF_INET6 ||
...@@ -142,7 +197,8 @@ static void check_result(void) ...@@ -142,7 +197,8 @@ static void check_result(void)
sizeof(srv_sk.dst_ip6)) || sizeof(srv_sk.dst_ip6)) ||
srv_sk.src_port != ntohs(srv_sa6.sin6_port) || srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
srv_sk.dst_port != cli_sa6.sin6_port, srv_sk.dst_port != cli_sa6.sin6_port,
"Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
egress_linum);
CHECK(cli_sk.state == 10 || CHECK(cli_sk.state == 10 ||
!cli_sk.state || !cli_sk.state ||
...@@ -154,21 +210,31 @@ static void check_result(void) ...@@ -154,21 +210,31 @@ static void check_result(void)
sizeof(cli_sk.dst_ip6)) || sizeof(cli_sk.dst_ip6)) ||
cli_sk.src_port != ntohs(cli_sa6.sin6_port) || cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
cli_sk.dst_port != srv_sa6.sin6_port, cli_sk.dst_port != srv_sa6.sin6_port,
"Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
egress_linum);
CHECK(listen_tp.data_segs_out ||
listen_tp.data_segs_in ||
listen_tp.total_retrans ||
listen_tp.bytes_acked,
"Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
ingress_linum);
CHECK(srv_tp.data_segs_out != 1 || CHECK(srv_tp.data_segs_out != 1 ||
srv_tp.data_segs_in || srv_tp.data_segs_in ||
srv_tp.snd_cwnd != 10 || srv_tp.snd_cwnd != 10 ||
srv_tp.total_retrans || srv_tp.total_retrans ||
srv_tp.bytes_acked != DATA_LEN, srv_tp.bytes_acked != DATA_LEN,
"Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
egress_linum);
CHECK(cli_tp.data_segs_out || CHECK(cli_tp.data_segs_out ||
cli_tp.data_segs_in != 1 || cli_tp.data_segs_in != 1 ||
cli_tp.snd_cwnd != 10 || cli_tp.snd_cwnd != 10 ||
cli_tp.total_retrans || cli_tp.total_retrans ||
cli_tp.bytes_received != DATA_LEN, cli_tp.bytes_received != DATA_LEN,
"Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
egress_linum);
} }
static void test(void) static void test(void)
...@@ -211,10 +277,10 @@ static void test(void) ...@@ -211,10 +277,10 @@ static void test(void)
err, errno); err, errno);
/* Update addr_map with srv_sa6 and cli_sa6 */ /* Update addr_map with srv_sa6 and cli_sa6 */
err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno); CHECK(err, "map_update", "err:%d errno:%d", err, errno);
err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno); CHECK(err, "map_update", "err:%d errno:%d", err, errno);
/* Connect from cli_sa6 to srv_sa6 */ /* Connect from cli_sa6 to srv_sa6 */
...@@ -273,9 +339,9 @@ int main(int argc, char **argv) ...@@ -273,9 +339,9 @@ int main(int argc, char **argv)
struct bpf_prog_load_attr attr = { struct bpf_prog_load_attr attr = {
.file = "test_sock_fields_kern.o", .file = "test_sock_fields_kern.o",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB, .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.expected_attach_type = BPF_CGROUP_INET_EGRESS,
}; };
int cgroup_fd, prog_fd, err; int cgroup_fd, egress_fd, ingress_fd, err;
struct bpf_program *ingress_prog;
struct bpf_object *obj; struct bpf_object *obj;
struct bpf_map *map; struct bpf_map *map;
...@@ -293,12 +359,24 @@ int main(int argc, char **argv) ...@@ -293,12 +359,24 @@ int main(int argc, char **argv)
err = join_cgroup(TEST_CGROUP); err = join_cgroup(TEST_CGROUP);
CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); ingress_prog = bpf_object__find_program_by_title(obj,
"cgroup_skb/ingress");
CHECK(!ingress_prog,
"bpf_object__find_program_by_title(cgroup_skb/ingress)",
"not found");
ingress_fd = bpf_program__fd(ingress_prog);
err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
"err:%d errno%d", err, errno); "err:%d errno%d", err, errno);
err = bpf_prog_attach(ingress_fd, cgroup_fd,
BPF_CGROUP_INET_INGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
"err:%d errno%d", err, errno);
close(cgroup_fd); close(cgroup_fd);
map = bpf_object__find_map_by_name(obj, "addr_map"); map = bpf_object__find_map_by_name(obj, "addr_map");
......
...@@ -605,3 +605,171 @@ ...@@ -605,3 +605,171 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS, .prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT, .result = ACCEPT,
}, },
{
"reference tracking: use ptr from bpf_tcp_sock() after release",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
},
{
"reference tracking: use ptr from bpf_sk_fullsock() after release",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
},
{
"reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
},
{
"reference tracking: use sk after bpf_sk_release(tp)",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
},
{
"reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
{
"reference tracking: bpf_sk_release(listen_sk)",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "reference has not been acquired before",
},
{
/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
"reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
.insns = {
BPF_SK_LOOKUP,
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "invalid mem access",
},
...@@ -342,7 +342,7 @@ ...@@ -342,7 +342,7 @@
}, },
.prog_type = BPF_PROG_TYPE_SCHED_CLS, .prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT, .result = REJECT,
.errstr = "type=sock_common expected=sock", .errstr = "reference has not been acquired before",
}, },
{ {
"bpf_sk_release(bpf_sk_fullsock(skb->sk))", "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
...@@ -380,5 +380,5 @@ ...@@ -380,5 +380,5 @@
}, },
.prog_type = BPF_PROG_TYPE_SCHED_CLS, .prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT, .result = REJECT,
.errstr = "type=tcp_sock expected=sock", .errstr = "reference has not been acquired before",
}, },
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment