Commit fc7566ad authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov

bpf: Introduce bpf_preempt_[disable,enable] kfuncs

Introduce two new BPF kfuncs, bpf_preempt_disable and
bpf_preempt_enable. These kfuncs allow disabling preemption in BPF
programs. Nesting is allowed, since the intended use cases includes
building native BPF spin locks without kernel helper involvement. Apart
from that, this can be used to per-CPU data structures for cases where
programs (or userspace) may preempt one or the other. Currently, while
per-CPU access is stable, whether it will be consistent is not
guaranteed, as only migration is disabled for BPF programs.

Global functions are disallowed from being called, but support for them
will be added as a follow up not just preempt kfuncs, but rcu_read_lock
kfuncs as well. Static subprog calls are permitted. Sleepable helpers
and kfuncs are disallowed in non-preemptible regions.
Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20240424031315.2757363-2-memxor@gmail.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent dc92febf
...@@ -421,6 +421,7 @@ struct bpf_verifier_state { ...@@ -421,6 +421,7 @@ struct bpf_verifier_state {
struct bpf_active_lock active_lock; struct bpf_active_lock active_lock;
bool speculative; bool speculative;
bool active_rcu_lock; bool active_rcu_lock;
u32 active_preempt_lock;
/* If this state was ever pointed-to by other state's loop_entry field /* If this state was ever pointed-to by other state's loop_entry field
* this flag would be set to true. Used to avoid freeing such states * this flag would be set to true. Used to avoid freeing such states
* while they are still in use. * while they are still in use.
......
...@@ -2734,6 +2734,16 @@ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, ...@@ -2734,6 +2734,16 @@ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ); return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ);
} }
__bpf_kfunc void bpf_preempt_disable(void)
{
preempt_disable();
}
__bpf_kfunc void bpf_preempt_enable(void)
{
preempt_enable();
}
__bpf_kfunc_end_defs(); __bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids) BTF_KFUNCS_START(generic_btf_ids)
...@@ -2814,6 +2824,8 @@ BTF_ID_FLAGS(func, bpf_modify_return_test_tp) ...@@ -2814,6 +2824,8 @@ BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
BTF_ID_FLAGS(func, bpf_wq_init) BTF_ID_FLAGS(func, bpf_wq_init)
BTF_ID_FLAGS(func, bpf_wq_set_callback_impl) BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
BTF_ID_FLAGS(func, bpf_wq_start) BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_KFUNCS_END(common_btf_ids) BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = { static const struct btf_kfunc_id_set common_kfunc_set = {
......
...@@ -1434,6 +1434,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, ...@@ -1434,6 +1434,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
} }
dst_state->speculative = src->speculative; dst_state->speculative = src->speculative;
dst_state->active_rcu_lock = src->active_rcu_lock; dst_state->active_rcu_lock = src->active_rcu_lock;
dst_state->active_preempt_lock = src->active_preempt_lock;
dst_state->in_sleepable = src->in_sleepable; dst_state->in_sleepable = src->in_sleepable;
dst_state->curframe = src->curframe; dst_state->curframe = src->curframe;
dst_state->active_lock.ptr = src->active_lock.ptr; dst_state->active_lock.ptr = src->active_lock.ptr;
...@@ -9599,6 +9600,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -9599,6 +9600,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL; return -EINVAL;
} }
/* Only global subprogs cannot be called with preemption disabled. */
if (env->cur_state->active_preempt_lock) {
verbose(env, "global function calls are not allowed with preemption disabled,\n"
"use static function instead\n");
return -EINVAL;
}
if (err) { if (err) {
verbose(env, "Caller passes invalid args into func#%d ('%s')\n", verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
subprog, sub_name); subprog, sub_name);
...@@ -10285,6 +10293,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -10285,6 +10293,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
env->insn_aux_data[insn_idx].storage_get_func_atomic = true; env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
} }
if (env->cur_state->active_preempt_lock) {
if (fn->might_sleep) {
verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
func_id_name(func_id), func_id);
return -EINVAL;
}
if (in_sleepable(env) && is_storage_get_function(func_id))
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
}
meta.func_id = func_id; meta.func_id = func_id;
/* check args */ /* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
...@@ -11027,6 +11046,8 @@ enum special_kfunc_type { ...@@ -11027,6 +11046,8 @@ enum special_kfunc_type {
KF_bpf_percpu_obj_drop_impl, KF_bpf_percpu_obj_drop_impl,
KF_bpf_throw, KF_bpf_throw,
KF_bpf_wq_set_callback_impl, KF_bpf_wq_set_callback_impl,
KF_bpf_preempt_disable,
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new, KF_bpf_iter_css_task_new,
}; };
...@@ -11081,6 +11102,8 @@ BTF_ID(func, bpf_percpu_obj_new_impl) ...@@ -11081,6 +11102,8 @@ BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl) BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_ID(func, bpf_throw) BTF_ID(func, bpf_throw)
BTF_ID(func, bpf_wq_set_callback_impl) BTF_ID(func, bpf_wq_set_callback_impl)
BTF_ID(func, bpf_preempt_disable)
BTF_ID(func, bpf_preempt_enable)
#ifdef CONFIG_CGROUPS #ifdef CONFIG_CGROUPS
BTF_ID(func, bpf_iter_css_task_new) BTF_ID(func, bpf_iter_css_task_new)
#else #else
...@@ -11107,6 +11130,16 @@ static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) ...@@ -11107,6 +11130,16 @@ static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
} }
static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
}
static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
}
static enum kfunc_ptr_arg_type static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta, struct bpf_kfunc_call_arg_meta *meta,
...@@ -12195,11 +12228,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char ...@@ -12195,11 +12228,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p) int *insn_idx_p)
{ {
const struct btf_type *t, *ptr_type; bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
u32 i, nargs, ptr_type_id, release_ref_obj_id; u32 i, nargs, ptr_type_id, release_ref_obj_id;
struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name; const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock; const struct btf_type *t, *ptr_type;
struct bpf_kfunc_call_arg_meta meta; struct bpf_kfunc_call_arg_meta meta;
struct bpf_insn_aux_data *insn_aux; struct bpf_insn_aux_data *insn_aux;
int err, insn_idx = *insn_idx_p; int err, insn_idx = *insn_idx_p;
...@@ -12260,6 +12293,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -12260,6 +12293,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
if (env->cur_state->active_rcu_lock) { if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state; struct bpf_func_state *state;
struct bpf_reg_state *reg; struct bpf_reg_state *reg;
...@@ -12292,6 +12328,22 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -12292,6 +12328,22 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL; return -EINVAL;
} }
if (env->cur_state->active_preempt_lock) {
if (preempt_disable) {
env->cur_state->active_preempt_lock++;
} else if (preempt_enable) {
env->cur_state->active_preempt_lock--;
} else if (sleepable) {
verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
return -EACCES;
}
} else if (preempt_disable) {
env->cur_state->active_preempt_lock++;
} else if (preempt_enable) {
verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
return -EINVAL;
}
/* In case of release function, we get register number of refcounted /* In case of release function, we get register number of refcounted
* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/ */
...@@ -15439,6 +15491,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) ...@@ -15439,6 +15491,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL; return -EINVAL;
} }
if (env->cur_state->active_preempt_lock) {
verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_preempt_disable-ed region\n");
return -EINVAL;
}
if (regs[ctx_reg].type != PTR_TO_CTX) { if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env, verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
...@@ -17006,6 +17063,9 @@ static bool states_equal(struct bpf_verifier_env *env, ...@@ -17006,6 +17063,9 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->active_rcu_lock != cur->active_rcu_lock) if (old->active_rcu_lock != cur->active_rcu_lock)
return false; return false;
if (old->active_preempt_lock != cur->active_preempt_lock)
return false;
if (old->in_sleepable != cur->in_sleepable) if (old->in_sleepable != cur->in_sleepable)
return false; return false;
...@@ -17957,6 +18017,13 @@ static int do_check(struct bpf_verifier_env *env) ...@@ -17957,6 +18017,13 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL; return -EINVAL;
} }
if (env->cur_state->active_preempt_lock && !env->cur_state->curframe) {
verbose(env, "%d bpf_preempt_enable%s missing\n",
env->cur_state->active_preempt_lock,
env->cur_state->active_preempt_lock == 1 ? " is" : "(s) are");
return -EINVAL;
}
/* We must do check_reference_leak here before /* We must do check_reference_leak here before
* prepare_func_exit to handle the case when * prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback * state->curframe > 0, it may be a callback
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment