Commit 6a3cd331 authored by Dave Marchevsky's avatar Dave Marchevsky Committed by Alexei Starovoitov

bpf: Migrate release_on_unlock logic to non-owning ref semantics

This patch introduces non-owning reference semantics to the verifier,
specifically linked_list API kfunc handling. release_on_unlock logic for
refs is refactored - with small functional changes - to implement these
semantics, and bpf_list_push_{front,back} are migrated to use them.

When a list node is pushed to a list, the program still has a pointer to
the node:

  n = bpf_obj_new(typeof(*n));

  bpf_spin_lock(&l);
  bpf_list_push_back(&l, n);
  /* n still points to the just-added node */
  bpf_spin_unlock(&l);

What the verifier considers n to be after the push, and thus what can be
done with n, are changed by this patch.

Common properties both before/after this patch:
  * After push, n is only a valid reference to the node until end of
    critical section
  * After push, n cannot be pushed to any list
  * After push, the program can read the node's fields using n

Before:
  * After push, n retains the ref_obj_id which it received on
    bpf_obj_new, but the associated bpf_reference_state's
    release_on_unlock field is set to true
    * release_on_unlock field and associated logic is used to implement
      "n is only a valid ref until end of critical section"
  * After push, n cannot be written to, the node must be removed from
    the list before writing to its fields
  * After push, n is marked PTR_UNTRUSTED

After:
  * After push, n's ref is released and ref_obj_id set to 0. NON_OWN_REF
    type flag is added to reg's type, indicating that it's a non-owning
    reference.
    * NON_OWN_REF flag and logic is used to implement "n is only a
      valid ref until end of critical section"
  * n can be written to (except for special fields e.g. bpf_list_node,
    timer, ...)

Summary of specific implementation changes to achieve the above:

  * release_on_unlock field, ref_set_release_on_unlock helper, and logic
    to "release on unlock" based on that field are removed

  * The anonymous active_lock struct used by bpf_verifier_state is
    pulled out into a named struct bpf_active_lock.

  * NON_OWN_REF type flag is introduced along with verifier logic
    changes to handle non-owning refs

  * Helpers are added to use NON_OWN_REF flag to implement non-owning
    ref semantics as described above
    * invalidate_non_owning_refs - helper to clobber all non-owning refs
      matching a particular bpf_active_lock identity. Replaces
      release_on_unlock logic in process_spin_lock.
    * ref_set_non_owning - set NON_OWN_REF type flag after doing some
      sanity checking
    * ref_convert_owning_non_owning - convert owning reference w/
      specified ref_obj_id to non-owning references. Set NON_OWN_REF
      flag for each reg with that ref_obj_id and 0-out its ref_obj_id

  * Update linked_list selftests to account for minor semantic
    differences introduced by this patch
    * Writes to a release_on_unlock node ref are not allowed, while
      writes to non-owning reference pointees are. As a result the
      linked_list "write after push" failure tests are no longer scenarios
      that should fail.
    * The test##missing_lock##op and test##incorrect_lock##op
      macro-generated failure tests need to have a valid node argument in
      order to have the same error output as before. Otherwise
      verification will fail early and the expected error output won't be seen.
Signed-off-by: default avatarDave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230212092715.1422619-2-davemarchevsky@fb.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 39c536ac
......@@ -181,6 +181,7 @@ enum btf_field_type {
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
BPF_LIST_NODE = (1 << 5),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
};
struct btf_field_kptr {
......@@ -576,6 +577,11 @@ enum bpf_type_flag {
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
/* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning.
* Currently only valid for linked-list and rbtree nodes.
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
......
......@@ -43,6 +43,22 @@ enum bpf_reg_liveness {
REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
};
/* For every reg representing a map value or allocated object pointer,
* we consider the tuple of (ptr, id) for them to be unique in verifier
* context and conside them to not alias each other for the purposes of
* tracking lock state.
*/
struct bpf_active_lock {
/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
* there's no active lock held, and other fields have no
* meaning. If non-NULL, it indicates that a lock is held and
* id member has the reg->id of the register which can be >= 0.
*/
void *ptr;
/* This will be reg->id */
u32 id;
};
struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
......@@ -226,11 +242,6 @@ struct bpf_reference_state {
* exiting a callback function.
*/
int callback_ref;
/* Mark the reference state to release the registers sharing the same id
* on bpf_spin_unlock (for nodes that we will lose ownership to but are
* safe to access inside the critical section).
*/
bool release_on_unlock;
};
/* state of the program:
......@@ -331,21 +342,8 @@ struct bpf_verifier_state {
u32 branches;
u32 insn_idx;
u32 curframe;
/* For every reg representing a map value or allocated object pointer,
* we consider the tuple of (ptr, id) for them to be unique in verifier
* context and conside them to not alias each other for the purposes of
* tracking lock state.
*/
struct {
/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
* there's no active lock held, and other fields have no
* meaning. If non-NULL, it indicates that a lock is held and
* id member has the reg->id of the register which can be >= 0.
*/
void *ptr;
/* This will be reg->id */
u32 id;
} active_lock;
struct bpf_active_lock active_lock;
bool speculative;
bool active_rcu_lock;
......
This diff is collapsed.
......@@ -78,8 +78,6 @@ static struct {
{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
{ "write_after_push_front", "only read is supported" },
{ "write_after_push_back", "only read is supported" },
{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
{ "use_after_unlock_push_back", "invalid mem access 'scalar'" },
{ "double_push_front", "arg#1 expected pointer to allocated object" },
......
......@@ -260,7 +260,7 @@ int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head
{
int ret;
ret = list_push_pop_multiple(lock ,head, false);
ret = list_push_pop_multiple(lock, head, false);
if (ret)
return ret;
return list_push_pop_multiple(lock, head, true);
......
......@@ -54,28 +54,44 @@
return 0; \
}
CHECK(kptr, push_front, &f->head);
CHECK(kptr, push_back, &f->head);
CHECK(kptr, pop_front, &f->head);
CHECK(kptr, pop_back, &f->head);
CHECK(global, push_front, &ghead);
CHECK(global, push_back, &ghead);
CHECK(global, pop_front, &ghead);
CHECK(global, pop_back, &ghead);
CHECK(map, push_front, &v->head);
CHECK(map, push_back, &v->head);
CHECK(map, pop_front, &v->head);
CHECK(map, pop_back, &v->head);
CHECK(inner_map, push_front, &iv->head);
CHECK(inner_map, push_back, &iv->head);
CHECK(inner_map, pop_front, &iv->head);
CHECK(inner_map, pop_back, &iv->head);
#undef CHECK
#define CHECK(test, op, hexpr, nexpr) \
SEC("?tc") \
int test##_missing_lock_##op(void *ctx) \
{ \
INIT; \
void (*p)(void *, void *) = (void *)&bpf_list_##op; \
p(hexpr, nexpr); \
return 0; \
}
CHECK(kptr, push_front, &f->head, b);
CHECK(kptr, push_back, &f->head, b);
CHECK(global, push_front, &ghead, f);
CHECK(global, push_back, &ghead, f);
CHECK(map, push_front, &v->head, f);
CHECK(map, push_back, &v->head, f);
CHECK(inner_map, push_front, &iv->head, f);
CHECK(inner_map, push_back, &iv->head, f);
#undef CHECK
#define CHECK(test, op, lexpr, hexpr) \
SEC("?tc") \
int test##_incorrect_lock_##op(void *ctx) \
......@@ -108,11 +124,47 @@ CHECK(inner_map, pop_back, &iv->head);
CHECK(inner_map_global, op, &iv->lock, &ghead); \
CHECK(inner_map_map, op, &iv->lock, &v->head);
CHECK_OP(push_front);
CHECK_OP(push_back);
CHECK_OP(pop_front);
CHECK_OP(pop_back);
#undef CHECK
#undef CHECK_OP
#define CHECK(test, op, lexpr, hexpr, nexpr) \
SEC("?tc") \
int test##_incorrect_lock_##op(void *ctx) \
{ \
INIT; \
void (*p)(void *, void*) = (void *)&bpf_list_##op; \
bpf_spin_lock(lexpr); \
p(hexpr, nexpr); \
return 0; \
}
#define CHECK_OP(op) \
CHECK(kptr_kptr, op, &f1->lock, &f2->head, b); \
CHECK(kptr_global, op, &f1->lock, &ghead, f); \
CHECK(kptr_map, op, &f1->lock, &v->head, f); \
CHECK(kptr_inner_map, op, &f1->lock, &iv->head, f); \
\
CHECK(global_global, op, &glock2, &ghead, f); \
CHECK(global_kptr, op, &glock, &f1->head, b); \
CHECK(global_map, op, &glock, &v->head, f); \
CHECK(global_inner_map, op, &glock, &iv->head, f); \
\
CHECK(map_map, op, &v->lock, &v2->head, f); \
CHECK(map_kptr, op, &v->lock, &f2->head, b); \
CHECK(map_global, op, &v->lock, &ghead, f); \
CHECK(map_inner_map, op, &v->lock, &iv->head, f); \
\
CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, f); \
CHECK(inner_map_kptr, op, &iv->lock, &f2->head, b); \
CHECK(inner_map_global, op, &iv->lock, &ghead, f); \
CHECK(inner_map_map, op, &iv->lock, &v->head, f);
CHECK_OP(push_front);
CHECK_OP(push_back);
#undef CHECK
#undef CHECK_OP
#undef INIT
......@@ -303,34 +355,6 @@ int direct_write_node(void *ctx)
return 0;
}
static __always_inline
int write_after_op(void (*push_op)(void *head, void *node))
{
struct foo *f;
f = bpf_obj_new(typeof(*f));
if (!f)
return 0;
bpf_spin_lock(&glock);
push_op(&ghead, &f->node);
f->data = 42;
bpf_spin_unlock(&glock);
return 0;
}
SEC("?tc")
int write_after_push_front(void *ctx)
{
return write_after_op((void *)bpf_list_push_front);
}
SEC("?tc")
int write_after_push_back(void *ctx)
{
return write_after_op((void *)bpf_list_push_back);
}
static __always_inline
int use_after_unlock(void (*op)(void *head, void *node))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment