Commit bb7a4257 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Align BPF TCP CCs implementing cong_control() with non-BPF CCs'

Jörn-Thorben Hinz says:

====================

This series corrects some inconveniences for a BPF TCP CC that
implements and uses tcp_congestion_ops.cong_control(). Until now, such a
CC did not have all necessary write access to struct sock and
unnecessarily needed to implement cong_avoid().

v4:
 - Remove braces around single statements after if
 - Don’t check pointer passed to bpf_link__destroy()
v3:
 - Add a selftest writing sk_pacing_*
 - Add a selftest with incomplete tcp_congestion_ops
 - Add a selftest with unsupported get_info()
 - Remove an unused variable
 - Reword a comment about reg() in bpf_struct_ops_map_update_elem()
v2:
 - Drop redundant check for required functions and just rely on
   tcp_register_congestion_control() (Martin KaFai Lau)
====================
Reviewed-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 9676fecc f14a3f64
...@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, ...@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock; goto unlock;
} }
/* Error during st_ops->reg(). It is very unlikely since /* Error during st_ops->reg(). Can happen if this struct_ops needs to be
* the above init_member() should have caught it earlier * verified as a whole, after all init_member() calls. Can also happen if
* before reg(). The only possibility is if there was a race * there was a race in registering the struct_ops (under the same name) to
* in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps. * a sub-system through different struct_ops's maps.
*/ */
set_memory_nx((long)st_map->image, 1); set_memory_nx((long)st_map->image, 1);
......
...@@ -14,18 +14,6 @@ ...@@ -14,18 +14,6 @@
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */ /* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
extern struct bpf_struct_ops bpf_tcp_congestion_ops; extern struct bpf_struct_ops bpf_tcp_congestion_ops;
static u32 optional_ops[] = {
offsetof(struct tcp_congestion_ops, init),
offsetof(struct tcp_congestion_ops, release),
offsetof(struct tcp_congestion_ops, set_state),
offsetof(struct tcp_congestion_ops, cwnd_event),
offsetof(struct tcp_congestion_ops, in_ack_event),
offsetof(struct tcp_congestion_ops, pkts_acked),
offsetof(struct tcp_congestion_ops, min_tso_segs),
offsetof(struct tcp_congestion_ops, sndbuf_expand),
offsetof(struct tcp_congestion_ops, cong_control),
};
static u32 unsupported_ops[] = { static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info), offsetof(struct tcp_congestion_ops, get_info),
}; };
...@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf) ...@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
return 0; return 0;
} }
static bool is_optional(u32 member_offset)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
if (member_offset == optional_ops[i])
return true;
}
return false;
}
static bool is_unsupported(u32 member_offset) static bool is_unsupported(u32 member_offset)
{ {
unsigned int i; unsigned int i;
...@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, ...@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
} }
switch (off) { switch (off) {
case offsetof(struct sock, sk_pacing_rate):
end = offsetofend(struct sock, sk_pacing_rate);
break;
case offsetof(struct sock, sk_pacing_status):
end = offsetofend(struct sock, sk_pacing_status);
break;
case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv): case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
end = offsetofend(struct inet_connection_sock, icsk_ca_priv); end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
break; break;
...@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, ...@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{ {
const struct tcp_congestion_ops *utcp_ca; const struct tcp_congestion_ops *utcp_ca;
struct tcp_congestion_ops *tcp_ca; struct tcp_congestion_ops *tcp_ca;
int prog_fd;
u32 moff; u32 moff;
utcp_ca = (const struct tcp_congestion_ops *)udata; utcp_ca = (const struct tcp_congestion_ops *)udata;
...@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, ...@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
return 1; return 1;
} }
if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
return 0;
/* Ensure bpf_prog is provided for compulsory func ptr */
prog_fd = (int)(*(unsigned long *)(udata + moff));
if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
return -EINVAL;
return 0; return 0;
} }
......
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#include "bpf_cubic.skel.h" #include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h" #include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h" #include "bpf_dctcp_release.skel.h"
#include "tcp_ca_write_sk_pacing.skel.h"
#include "tcp_ca_incompl_cong_ops.skel.h"
#include "tcp_ca_unsupp_cong_op.skel.h"
#ifndef ENOTSUPP #ifndef ENOTSUPP
#define ENOTSUPP 524 #define ENOTSUPP 524
...@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void) ...@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
bpf_dctcp_release__destroy(rel_skel); bpf_dctcp_release__destroy(rel_skel);
} }
static void test_write_sk_pacing(void)
{
struct tcp_ca_write_sk_pacing *skel;
struct bpf_link *link;
skel = tcp_ca_write_sk_pacing__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
ASSERT_OK_PTR(link, "attach_struct_ops");
bpf_link__destroy(link);
tcp_ca_write_sk_pacing__destroy(skel);
}
static void test_incompl_cong_ops(void)
{
struct tcp_ca_incompl_cong_ops *skel;
struct bpf_link *link;
skel = tcp_ca_incompl_cong_ops__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
/* That cong_avoid() and cong_control() are missing is only reported at
* this point:
*/
link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
ASSERT_ERR_PTR(link, "attach_struct_ops");
bpf_link__destroy(link);
tcp_ca_incompl_cong_ops__destroy(skel);
}
static void test_unsupp_cong_op(void)
{
libbpf_print_fn_t old_print_fn;
struct tcp_ca_unsupp_cong_op *skel;
err_str = "attach to unsupported member get_info";
found = false;
old_print_fn = libbpf_set_print(libbpf_debug_print);
skel = tcp_ca_unsupp_cong_op__open_and_load();
ASSERT_NULL(skel, "open_and_load");
ASSERT_EQ(found, true, "expected_err_msg");
tcp_ca_unsupp_cong_op__destroy(skel);
libbpf_set_print(old_print_fn);
}
void test_bpf_tcp_ca(void) void test_bpf_tcp_ca(void)
{ {
if (test__start_subtest("dctcp")) if (test__start_subtest("dctcp"))
...@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void) ...@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
test_dctcp_fallback(); test_dctcp_fallback();
if (test__start_subtest("rel_setsockopt")) if (test__start_subtest("rel_setsockopt"))
test_rel_setsockopt(); test_rel_setsockopt();
if (test__start_subtest("write_sk_pacing"))
test_write_sk_pacing();
if (test__start_subtest("incompl_cong_ops"))
test_incompl_cong_ops();
if (test__start_subtest("unsupp_cong_op"))
test_unsupp_cong_op();
} }
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/incompl_cong_ops_ssthresh")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
SEC("struct_ops/incompl_cong_ops_undo_cwnd")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
}
SEC(".struct_ops")
struct tcp_congestion_ops incompl_cong_ops = {
/* Intentionally leaving out any of the required cong_avoid() and
* cong_control() here.
*/
.ssthresh = (void *)incompl_cong_ops_ssthresh,
.undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
.name = "bpf_incompl_ops",
};
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
SEC("struct_ops/unsupp_cong_op_get_info")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
return 0;
}
SEC(".struct_ops")
struct tcp_congestion_ops unsupp_cong_op = {
.get_info = (void *)unsupp_cong_op_get_info,
.name = "bpf_unsupp_op",
};
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define USEC_PER_SEC 1000000UL
#define min(a, b) ((a) < (b) ? (a) : (b))
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
}
SEC("struct_ops/write_sk_pacing_init")
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{
#ifdef ENABLE_ATOMICS_TESTS
__sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
SK_PACING_NEEDED);
#else
sk->sk_pacing_status = SK_PACING_NEEDED;
#endif
}
SEC("struct_ops/write_sk_pacing_cong_control")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
const struct tcp_sock *tp = tcp_sk(sk);
unsigned long rate =
((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
(tp->srtt_us ?: 1U << 3);
sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
}
SEC("struct_ops/write_sk_pacing_ssthresh")
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
SEC("struct_ops/write_sk_pacing_undo_cwnd")
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
}
SEC(".struct_ops")
struct tcp_congestion_ops write_sk_pacing = {
.init = (void *)write_sk_pacing_init,
.cong_control = (void *)write_sk_pacing_cong_control,
.ssthresh = (void *)write_sk_pacing_ssthresh,
.undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
.name = "bpf_w_sk_pacing",
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment