Commit 6a3cd331 authored by Dave Marchevsky's avatar Dave Marchevsky Committed by Alexei Starovoitov

bpf: Migrate release_on_unlock logic to non-owning ref semantics

This patch introduces non-owning reference semantics to the verifier,
specifically linked_list API kfunc handling. release_on_unlock logic for
refs is refactored - with small functional changes - to implement these
semantics, and bpf_list_push_{front,back} are migrated to use them.

When a list node is pushed to a list, the program still has a pointer to
the node:

  n = bpf_obj_new(typeof(*n));

  bpf_spin_lock(&l);
  bpf_list_push_back(&l, n);
  /* n still points to the just-added node */
  bpf_spin_unlock(&l);

What the verifier considers n to be after the push, and thus what can be
done with n, are changed by this patch.

Common properties both before/after this patch:
  * After push, n is only a valid reference to the node until end of
    critical section
  * After push, n cannot be pushed to any list
  * After push, the program can read the node's fields using n

Before:
  * After push, n retains the ref_obj_id which it received on
    bpf_obj_new, but the associated bpf_reference_state's
    release_on_unlock field is set to true
    * release_on_unlock field and associated logic is used to implement
      "n is only a valid ref until end of critical section"
  * After push, n cannot be written to, the node must be removed from
    the list before writing to its fields
  * After push, n is marked PTR_UNTRUSTED

After:
  * After push, n's ref is released and ref_obj_id set to 0. NON_OWN_REF
    type flag is added to reg's type, indicating that it's a non-owning
    reference.
    * NON_OWN_REF flag and logic is used to implement "n is only a
      valid ref until end of critical section"
  * n can be written to (except for special fields e.g. bpf_list_node,
    timer, ...)

Summary of specific implementation changes to achieve the above:

  * release_on_unlock field, ref_set_release_on_unlock helper, and logic
    to "release on unlock" based on that field are removed

  * The anonymous active_lock struct used by bpf_verifier_state is
    pulled out into a named struct bpf_active_lock.

  * NON_OWN_REF type flag is introduced along with verifier logic
    changes to handle non-owning refs

  * Helpers are added to use NON_OWN_REF flag to implement non-owning
    ref semantics as described above
    * invalidate_non_owning_refs - helper to clobber all non-owning refs
      matching a particular bpf_active_lock identity. Replaces
      release_on_unlock logic in process_spin_lock.
    * ref_set_non_owning - set NON_OWN_REF type flag after doing some
      sanity checking
    * ref_convert_owning_non_owning - convert owning reference w/
      specified ref_obj_id to non-owning references. Set NON_OWN_REF
      flag for each reg with that ref_obj_id and 0-out its ref_obj_id

  * Update linked_list selftests to account for minor semantic
    differences introduced by this patch
    * Writes to a release_on_unlock node ref are not allowed, while
      writes to non-owning reference pointees are. As a result the
      linked_list "write after push" failure tests are no longer scenarios
      that should fail.
    * The test##missing_lock##op and test##incorrect_lock##op
      macro-generated failure tests need to have a valid node argument in
      order to have the same error output as before. Otherwise
      verification will fail early and the expected error output won't be seen.
Signed-off-by: default avatarDave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230212092715.1422619-2-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 39c536ac
......@@ -181,6 +181,7 @@ enum btf_field_type {
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
BPF_LIST_NODE = (1 << 5),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
};
struct btf_field_kptr {
......@@ -576,6 +577,11 @@ enum bpf_type_flag {
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
/* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning.
* Currently only valid for linked-list and rbtree nodes.
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
......
......@@ -43,6 +43,22 @@ enum bpf_reg_liveness {
REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
};
/* For every reg representing a map value or allocated object pointer,
* we consider the tuple of (ptr, id) for them to be unique in verifier
* context and conside them to not alias each other for the purposes of
* tracking lock state.
*/
struct bpf_active_lock {
/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
* there's no active lock held, and other fields have no
* meaning. If non-NULL, it indicates that a lock is held and
* id member has the reg->id of the register which can be >= 0.
*/
void *ptr;
/* This will be reg->id */
u32 id;
};
struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
......@@ -226,11 +242,6 @@ struct bpf_reference_state {
* exiting a callback function.
*/
int callback_ref;
/* Mark the reference state to release the registers sharing the same id
* on bpf_spin_unlock (for nodes that we will lose ownership to but are
* safe to access inside the critical section).
*/
bool release_on_unlock;
};
/* state of the program:
......@@ -331,21 +342,8 @@ struct bpf_verifier_state {
u32 branches;
u32 insn_idx;
u32 curframe;
/* For every reg representing a map value or allocated object pointer,
* we consider the tuple of (ptr, id) for them to be unique in verifier
* context and conside them to not alias each other for the purposes of
* tracking lock state.
*/
struct {
/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
* there's no active lock held, and other fields have no
* meaning. If non-NULL, it indicates that a lock is held and
* id member has the reg->id of the register which can be >= 0.
*/
void *ptr;
/* This will be reg->id */
u32 id;
} active_lock;
struct bpf_active_lock active_lock;
bool speculative;
bool active_rcu_lock;
......
......@@ -190,6 +190,9 @@ struct bpf_verifier_stack_elem {
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
......@@ -457,6 +460,11 @@ static bool type_is_ptr_alloc_obj(u32 type)
return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
}
static bool type_is_non_owning_ref(u32 type)
{
return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
}
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
......@@ -1073,6 +1081,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose_a("id=%d", reg->id);
if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (type_is_non_owning_ref(reg->type))
verbose_a("%s", "non_own_ref");
if (t != SCALAR_VALUE)
verbose_a("off=%d", reg->off);
if (type_is_pkt_pointer(t))
......@@ -5052,7 +5062,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
!reg->ref_obj_id) {
verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
return -EFAULT;
}
......@@ -6042,9 +6053,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
cur->active_lock.ptr = btf;
cur->active_lock.id = reg->id;
} else {
struct bpf_func_state *fstate = cur_func(env);
void *ptr;
int i;
if (map)
ptr = map;
......@@ -6060,25 +6069,11 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
verbose(env, "bpf_spin_unlock of different lock\n");
return -EINVAL;
}
cur->active_lock.ptr = NULL;
cur->active_lock.id = 0;
for (i = fstate->acquired_refs - 1; i >= 0; i--) {
int err;
invalidate_non_owning_refs(env);
/* Complain on error because this reference state cannot
* be freed before this point, as bpf_spin_lock critical
* section does not allow functions that release the
* allocated object immediately.
*/
if (!fstate->refs[i].release_on_unlock)
continue;
err = release_reference(env, fstate->refs[i].id);
if (err) {
verbose(env, "failed to release release_on_unlock reference");
return err;
}
}
cur->active_lock.ptr = NULL;
cur->active_lock.id = 0;
}
return 0;
}
......@@ -6546,6 +6541,23 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
return 0;
}
static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
{
struct btf_field *field;
struct btf_record *rec;
rec = reg_btf_record(reg);
if (!rec)
return NULL;
field = btf_record_find(rec, off, fields);
if (!field)
return NULL;
return field;
}
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type)
......@@ -6567,6 +6579,18 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
*/
if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
return 0;
if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
return __check_ptr_off_reg(env, reg, regno, true);
verbose(env, "R%d must have zero offset when passed to release func\n",
regno);
verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
kernel_type_name(reg->btf, reg->btf_id), reg->off);
return -EINVAL;
}
/* Doing check_ptr_off_reg check for the offset will catch this
* because fixed_off_ok is false, but checking here allows us
* to give the user a better error message.
......@@ -6601,6 +6625,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
* can be non-zero. This was already checked above. So pass
......@@ -7363,6 +7388,17 @@ static int release_reference(struct bpf_verifier_env *env,
return 0;
}
static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
{
struct bpf_func_state *unused;
struct bpf_reg_state *reg;
bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
if (type_is_non_owning_ref(reg->type))
__mark_reg_unknown(env, reg);
}));
}
static void clear_caller_saved_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *regs)
{
......@@ -8915,38 +8951,54 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
return 0;
}
static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = cur_func(env);
struct bpf_verifier_state *state = env->cur_state;
if (!state->active_lock.ptr) {
verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
return -EFAULT;
}
if (type_flag(reg->type) & NON_OWN_REF) {
verbose(env, "verifier internal error: NON_OWN_REF already set\n");
return -EFAULT;
}
reg->type |= NON_OWN_REF;
return 0;
}
static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
{
struct bpf_func_state *state, *unused;
struct bpf_reg_state *reg;
int i;
/* bpf_spin_lock only allows calling list_push and list_pop, no BPF
* subprogs, no global functions. This means that the references would
* not be released inside the critical section but they may be added to
* the reference state, and the acquired_refs are never copied out for a
* different frame as BPF to BPF calls don't work in bpf_spin_lock
* critical sections.
*/
state = cur_func(env);
if (!ref_obj_id) {
verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
verbose(env, "verifier internal error: ref_obj_id is zero for "
"owning -> non-owning conversion\n");
return -EFAULT;
}
for (i = 0; i < state->acquired_refs; i++) {
if (state->refs[i].id == ref_obj_id) {
if (state->refs[i].release_on_unlock) {
verbose(env, "verifier internal error: expected false release_on_unlock");
return -EFAULT;
if (state->refs[i].id != ref_obj_id)
continue;
/* Clear ref_obj_id here so release_reference doesn't clobber
* the whole reg
*/
bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
if (reg->ref_obj_id == ref_obj_id) {
reg->ref_obj_id = 0;
ref_set_non_owning(env, reg);
}
state->refs[i].release_on_unlock = true;
/* Now mark everyone sharing same ref_obj_id as untrusted */
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->ref_obj_id == ref_obj_id)
reg->type |= PTR_UNTRUSTED;
}));
return 0;
}
}
verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
return -EFAULT;
}
......@@ -9081,7 +9133,6 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
{
const struct btf_type *et, *t;
struct btf_field *field;
struct btf_record *rec;
u32 list_node_off;
if (meta->btf != btf_vmlinux ||
......@@ -9098,9 +9149,8 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
return -EINVAL;
}
rec = reg_btf_record(reg);
list_node_off = reg->off + reg->var_off.value;
field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
field = reg_find_field_offset(reg, list_node_off, BPF_LIST_NODE);
if (!field || field->offset != list_node_off) {
verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
return -EINVAL;
......@@ -9126,8 +9176,8 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
btf_name_by_offset(field->graph_root.btf, et->name_off));
return -EINVAL;
}
/* Set arg#1 for expiration after unlock */
return ref_set_release_on_unlock(env, reg->ref_obj_id);
return 0;
}
static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
......@@ -9406,11 +9456,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock;
struct bpf_kfunc_call_arg_meta meta;
u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
const struct btf_type *ret_t;
......@@ -9505,6 +9555,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
meta.func_id == special_kfunc_list[KF_bpf_list_push_back]) {
release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
func_name, func_id);
return err;
}
err = release_reference(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
func_name, func_id);
return err;
}
}
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
......@@ -11825,8 +11893,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
*/
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
return;
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
WARN_ON_ONCE(reg->off))
return;
if (is_null) {
reg->type = SCALAR_VALUE;
/* We don't need id and ref_obj_id from this point
......
......@@ -78,8 +78,6 @@ static struct {
{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
{ "write_after_push_front", "only read is supported" },
{ "write_after_push_back", "only read is supported" },
{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
{ "use_after_unlock_push_back", "invalid mem access 'scalar'" },
{ "double_push_front", "arg#1 expected pointer to allocated object" },
......
......@@ -260,7 +260,7 @@ int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head
{
int ret;
ret = list_push_pop_multiple(lock ,head, false);
ret = list_push_pop_multiple(lock, head, false);
if (ret)
return ret;
return list_push_pop_multiple(lock, head, true);
......
......@@ -54,28 +54,44 @@
return 0; \
}
CHECK(kptr, push_front, &f->head);
CHECK(kptr, push_back, &f->head);
CHECK(kptr, pop_front, &f->head);
CHECK(kptr, pop_back, &f->head);
CHECK(global, push_front, &ghead);
CHECK(global, push_back, &ghead);
CHECK(global, pop_front, &ghead);
CHECK(global, pop_back, &ghead);
CHECK(map, push_front, &v->head);
CHECK(map, push_back, &v->head);
CHECK(map, pop_front, &v->head);
CHECK(map, pop_back, &v->head);
CHECK(inner_map, push_front, &iv->head);
CHECK(inner_map, push_back, &iv->head);
CHECK(inner_map, pop_front, &iv->head);
CHECK(inner_map, pop_back, &iv->head);
#undef CHECK
#define CHECK(test, op, hexpr, nexpr) \
SEC("?tc") \
int test##_missing_lock_##op(void *ctx) \
{ \
INIT; \
void (*p)(void *, void *) = (void *)&bpf_list_##op; \
p(hexpr, nexpr); \
return 0; \
}
CHECK(kptr, push_front, &f->head, b);
CHECK(kptr, push_back, &f->head, b);
CHECK(global, push_front, &ghead, f);
CHECK(global, push_back, &ghead, f);
CHECK(map, push_front, &v->head, f);
CHECK(map, push_back, &v->head, f);
CHECK(inner_map, push_front, &iv->head, f);
CHECK(inner_map, push_back, &iv->head, f);
#undef CHECK
#define CHECK(test, op, lexpr, hexpr) \
SEC("?tc") \
int test##_incorrect_lock_##op(void *ctx) \
......@@ -108,11 +124,47 @@ CHECK(inner_map, pop_back, &iv->head);
CHECK(inner_map_global, op, &iv->lock, &ghead); \
CHECK(inner_map_map, op, &iv->lock, &v->head);
CHECK_OP(push_front);
CHECK_OP(push_back);
CHECK_OP(pop_front);
CHECK_OP(pop_back);
#undef CHECK
#undef CHECK_OP
#define CHECK(test, op, lexpr, hexpr, nexpr) \
SEC("?tc") \
int test##_incorrect_lock_##op(void *ctx) \
{ \
INIT; \
void (*p)(void *, void*) = (void *)&bpf_list_##op; \
bpf_spin_lock(lexpr); \
p(hexpr, nexpr); \
return 0; \
}
#define CHECK_OP(op) \
CHECK(kptr_kptr, op, &f1->lock, &f2->head, b); \
CHECK(kptr_global, op, &f1->lock, &ghead, f); \
CHECK(kptr_map, op, &f1->lock, &v->head, f); \
CHECK(kptr_inner_map, op, &f1->lock, &iv->head, f); \
\
CHECK(global_global, op, &glock2, &ghead, f); \
CHECK(global_kptr, op, &glock, &f1->head, b); \
CHECK(global_map, op, &glock, &v->head, f); \
CHECK(global_inner_map, op, &glock, &iv->head, f); \
\
CHECK(map_map, op, &v->lock, &v2->head, f); \
CHECK(map_kptr, op, &v->lock, &f2->head, b); \
CHECK(map_global, op, &v->lock, &ghead, f); \
CHECK(map_inner_map, op, &v->lock, &iv->head, f); \
\
CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, f); \
CHECK(inner_map_kptr, op, &iv->lock, &f2->head, b); \
CHECK(inner_map_global, op, &iv->lock, &ghead, f); \
CHECK(inner_map_map, op, &iv->lock, &v->head, f);
CHECK_OP(push_front);
CHECK_OP(push_back);
#undef CHECK
#undef CHECK_OP
#undef INIT
......@@ -303,34 +355,6 @@ int direct_write_node(void *ctx)
return 0;
}
static __always_inline
int write_after_op(void (*push_op)(void *head, void *node))
{
struct foo *f;
f = bpf_obj_new(typeof(*f));
if (!f)
return 0;
bpf_spin_lock(&glock);
push_op(&ghead, &f->node);
f->data = 42;
bpf_spin_unlock(&glock);
return 0;
}
SEC("?tc")
int write_after_push_front(void *ctx)
{
return write_after_op((void *)bpf_list_push_front);
}
SEC("?tc")
int write_after_push_back(void *ctx)
{
return write_after_op((void *)bpf_list_push_back);
}
static __always_inline
int use_after_unlock(void (*op)(void *head, void *node))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment