Commit 0e32dfc8 authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov

bpf: Enable TCP congestion control kfunc from modules

This commit moves BTF ID lookup into the newly added registration
helper, in a way that the bbr, cubic, and dctcp implementation set up
their sets in the bpf_tcp_ca kfunc_btf_set list, while the ones not
dependent on modules are looked up from the wrapper function.

This lifts the restriction for them to be compiled as built in objects,
and can be loaded as modules if required. Also modify Makefile.modfinal
to call resolve_btfids for each module.

Note that since kernel kfunc_ids never overlap with module kfunc_ids, we
only match the owner for module btf id sets.

See following commits for background on use of:

 CONFIG_X86 ifdef:
 569c484f (bpf: Limit static tcp-cc functions in the .BTF_ids list to x86)

 CONFIG_DYNAMIC_FTRACE ifdef:
 7aae231a (bpf: tcp: Limit calling some tcp cc functions to CONFIG_DYNAMIC_FTRACE)
Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211002011757.311265-6-memxor@gmail.com
parent f614f2c7
...@@ -274,4 +274,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, ...@@ -274,4 +274,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \
THIS_MODULE } THIS_MODULE }
extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
#endif #endif
...@@ -6395,3 +6395,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, ...@@ -6395,3 +6395,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
__MUTEX_INITIALIZER(name.mutex) }; \ __MUTEX_INITIALIZER(name.mutex) }; \
EXPORT_SYMBOL_GPL(name) EXPORT_SYMBOL_GPL(name)
DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
...@@ -223,41 +223,13 @@ BTF_ID(func, tcp_reno_cong_avoid) ...@@ -223,41 +223,13 @@ BTF_ID(func, tcp_reno_cong_avoid)
BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_reno_undo_cwnd)
BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_slow_start)
BTF_ID(func, tcp_cong_avoid_ai) BTF_ID(func, tcp_cong_avoid_ai)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
BTF_ID(func, cubictcp_init)
BTF_ID(func, cubictcp_recalc_ssthresh)
BTF_ID(func, cubictcp_cong_avoid)
BTF_ID(func, cubictcp_state)
BTF_ID(func, cubictcp_cwnd_event)
BTF_ID(func, cubictcp_acked)
#endif
#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP)
BTF_ID(func, dctcp_init)
BTF_ID(func, dctcp_update_alpha)
BTF_ID(func, dctcp_cwnd_event)
BTF_ID(func, dctcp_ssthresh)
BTF_ID(func, dctcp_cwnd_undo)
BTF_ID(func, dctcp_state)
#endif
#if IS_BUILTIN(CONFIG_TCP_CONG_BBR)
BTF_ID(func, bbr_init)
BTF_ID(func, bbr_main)
BTF_ID(func, bbr_sndbuf_expand)
BTF_ID(func, bbr_undo_cwnd)
BTF_ID(func, bbr_cwnd_event)
BTF_ID(func, bbr_ssthresh)
BTF_ID(func, bbr_min_tso_segs)
BTF_ID(func, bbr_set_state)
#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_X86 */
BTF_SET_END(bpf_tcp_ca_kfunc_ids) BTF_SET_END(bpf_tcp_ca_kfunc_ids)
static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
{ {
return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
return true;
return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
} }
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
......
...@@ -56,6 +56,8 @@ ...@@ -56,6 +56,8 @@
* otherwise TCP stack falls back to an internal pacing using one high * otherwise TCP stack falls back to an internal pacing using one high
* resolution timer per TCP socket and may use more resources. * resolution timer per TCP socket and may use more resources.
*/ */
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/module.h> #include <linux/module.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <linux/inet_diag.h> #include <linux/inet_diag.h>
...@@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { ...@@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state, .set_state = bbr_set_state,
}; };
BTF_SET_START(tcp_bbr_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, bbr_init)
BTF_ID(func, bbr_main)
BTF_ID(func, bbr_sndbuf_expand)
BTF_ID(func, bbr_undo_cwnd)
BTF_ID(func, bbr_cwnd_event)
BTF_ID(func, bbr_ssthresh)
BTF_ID(func, bbr_min_tso_segs)
BTF_ID(func, bbr_set_state)
#endif
#endif
BTF_SET_END(tcp_bbr_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
static int __init bbr_register(void) static int __init bbr_register(void)
{ {
int ret;
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_bbr_cong_ops); ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
if (ret)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
return 0;
} }
static void __exit bbr_unregister(void) static void __exit bbr_unregister(void)
{ {
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops); tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
} }
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <net/tcp.h> #include <net/tcp.h>
...@@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { ...@@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic", .name = "cubic",
}; };
BTF_SET_START(tcp_cubic_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, cubictcp_init)
BTF_ID(func, cubictcp_recalc_ssthresh)
BTF_ID(func, cubictcp_cong_avoid)
BTF_ID(func, cubictcp_state)
BTF_ID(func, cubictcp_cwnd_event)
BTF_ID(func, cubictcp_acked)
#endif
#endif
BTF_SET_END(tcp_cubic_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
static int __init cubictcp_register(void) static int __init cubictcp_register(void)
{ {
int ret;
BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
/* Precompute a bunch of the scaling factors that are used per-packet /* Precompute a bunch of the scaling factors that are used per-packet
...@@ -514,11 +533,16 @@ static int __init cubictcp_register(void) ...@@ -514,11 +533,16 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */ /* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10); do_div(cube_factor, bic_scale * 10);
return tcp_register_congestion_control(&cubictcp); ret = tcp_register_congestion_control(&cubictcp);
if (ret)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
return 0;
} }
static void __exit cubictcp_unregister(void) static void __exit cubictcp_unregister(void)
{ {
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp); tcp_unregister_congestion_control(&cubictcp);
} }
......
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
* Glenn Judd <glenn.judd@morganstanley.com> * Glenn Judd <glenn.judd@morganstanley.com>
*/ */
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <net/tcp.h> #include <net/tcp.h>
...@@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { ...@@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno", .name = "dctcp-reno",
}; };
BTF_SET_START(tcp_dctcp_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID(func, dctcp_init)
BTF_ID(func, dctcp_update_alpha)
BTF_ID(func, dctcp_cwnd_event)
BTF_ID(func, dctcp_ssthresh)
BTF_ID(func, dctcp_cwnd_undo)
BTF_ID(func, dctcp_state)
#endif
#endif
BTF_SET_END(tcp_dctcp_kfunc_ids)
static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
static int __init dctcp_register(void) static int __init dctcp_register(void)
{ {
int ret;
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&dctcp); ret = tcp_register_congestion_control(&dctcp);
if (ret)
return ret;
register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
return 0;
} }
static void __exit dctcp_unregister(void) static void __exit dctcp_unregister(void)
{ {
unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp); tcp_unregister_congestion_control(&dctcp);
} }
......
...@@ -41,6 +41,7 @@ quiet_cmd_btf_ko = BTF [M] $@ ...@@ -41,6 +41,7 @@ quiet_cmd_btf_ko = BTF [M] $@
cmd_btf_ko = \ cmd_btf_ko = \
if [ -f vmlinux ]; then \ if [ -f vmlinux ]; then \
LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \
$(RESOLVE_BTFIDS) -b vmlinux $@; \
else \ else \
printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
fi; fi;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment