Commit 9bb00b28 authored by Yonghong Song's avatar Yonghong Song Committed by Alexei Starovoitov

bpf: Add kfunc bpf_rcu_read_lock/unlock()

Add two kfunc's bpf_rcu_read_lock() and bpf_rcu_read_unlock(). These two kfunc's
can be used for all program types. The following is an example about how
rcu pointer are used w.r.t. bpf_rcu_read_lock()/bpf_rcu_read_unlock().

  struct task_struct {
    ...
    struct task_struct              *last_wakee;
    struct task_struct __rcu        *real_parent;
    ...
  };

Let us say prog does 'task = bpf_get_current_task_btf()' to get a
'task' pointer. The basic rules are:
  - 'real_parent = task->real_parent' should be inside bpf_rcu_read_lock
    region. This is to simulate rcu_dereference() operation. The
    'real_parent' is marked as MEM_RCU only if (1). task->real_parent is
    inside bpf_rcu_read_lock region, and (2). task is a trusted ptr. So
    MEM_RCU marked ptr can be 'trusted' inside the bpf_rcu_read_lock region.
  - 'last_wakee = real_parent->last_wakee' should be inside bpf_rcu_read_lock
    region since it tries to access rcu protected memory.
  - the ptr 'last_wakee' will be marked as PTR_UNTRUSTED since in general
    it is not clear whether the object pointed by 'last_wakee' is valid or
    not even inside bpf_rcu_read_lock region.

The verifier will reset all rcu pointer register states to untrusted
at bpf_rcu_read_unlock() kfunc call site, so any such rcu pointer
won't be trusted any more outside the bpf_rcu_read_lock() region.

The current implementation does not support nested rcu read lock
region in the prog.
Acked-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221124053217.2373910-1-yhs@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 01685c5b
...@@ -572,6 +572,9 @@ enum bpf_type_flag { ...@@ -572,6 +572,9 @@ enum bpf_type_flag {
*/ */
PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS), PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS),
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX, __BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
}; };
......
...@@ -344,6 +344,7 @@ struct bpf_verifier_state { ...@@ -344,6 +344,7 @@ struct bpf_verifier_state {
u32 id; u32 id;
} active_lock; } active_lock;
bool speculative; bool speculative;
bool active_rcu_lock;
/* first and last insn idx of this verifier state */ /* first and last insn idx of this verifier state */
u32 first_insn_idx; u32 first_insn_idx;
...@@ -445,6 +446,7 @@ struct bpf_insn_aux_data { ...@@ -445,6 +446,7 @@ struct bpf_insn_aux_data {
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */ bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
u8 alu_state; /* used in combination with alu_limit */ u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */ /* below fields are initialized once */
...@@ -534,6 +536,7 @@ struct bpf_verifier_env { ...@@ -534,6 +536,7 @@ struct bpf_verifier_env {
bool bypass_spec_v1; bool bypass_spec_v1;
bool bypass_spec_v4; bool bypass_spec_v4;
bool seen_direct_write; bool seen_direct_write;
bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo; const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log; struct bpf_verifier_log log;
...@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) ...@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
} }
} }
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) #define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
static inline bool bpf_type_has_unsafe_modifiers(u32 type) static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{ {
......
...@@ -6238,6 +6238,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, ...@@ -6238,6 +6238,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
/* check __percpu tag */ /* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0) if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU; tmp_flag = MEM_PERCPU;
/* check __rcu tag */
if (strcmp(tag_value, "rcu") == 0)
tmp_flag = MEM_RCU;
} }
stype = btf_type_skip_modifiers(btf, mtype->type, &id); stype = btf_type_skip_modifiers(btf, mtype->type, &id);
......
...@@ -1990,6 +1990,16 @@ void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) ...@@ -1990,6 +1990,16 @@ void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
return obj__ign; return obj__ign;
} }
void bpf_rcu_read_lock(void)
{
rcu_read_lock();
}
void bpf_rcu_read_unlock(void)
{
rcu_read_unlock();
}
__diag_pop(); __diag_pop();
BTF_SET8_START(generic_btf_ids) BTF_SET8_START(generic_btf_ids)
...@@ -2031,6 +2041,8 @@ BTF_ID(func, bpf_cgroup_release) ...@@ -2031,6 +2041,8 @@ BTF_ID(func, bpf_cgroup_release)
BTF_SET8_START(common_btf_ids) BTF_SET8_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_SET8_END(common_btf_ids) BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = { static const struct btf_kfunc_id_set common_kfunc_set = {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment