Commit 97e03f52 authored by Joanne Koong's avatar Joanne Koong Committed by Andrii Nakryiko

bpf: Add verifier support for dynptrs

This patch adds the bulk of the verifier work for supporting dynamic
pointers (dynptrs) in bpf.

A bpf_dynptr is opaque to the bpf program. It is a 16-byte structure
defined internally as:

struct bpf_dynptr_kern {
    void *data;
    u32 size;
    u32 offset;
} __aligned(8);

The upper 8 bits of *size* is reserved (it contains extra metadata about
read-only status and dynptr type). Consequently, a dynptr only supports
memory less than 16 MB.

There are different types of dynptrs (eg malloc, ringbuf, ...). In this
patchset, the most basic one, dynptrs to a bpf program's local memory,
is added. For now only local memory that is of reg type PTR_TO_MAP_VALUE
is supported.

In the verifier, dynptr state information will be tracked in stack
slots. When the program passes in an uninitialized dynptr
(ARG_PTR_TO_DYNPTR | MEM_UNINIT), the stack slots corresponding
to the frame pointer where the dynptr resides at are marked
STACK_DYNPTR. For helper functions that take in initialized dynptrs (eg
bpf_dynptr_read + bpf_dynptr_write which are added later in this
patchset), the verifier enforces that the dynptr has been initialized
properly by checking that their corresponding stack slots have been
marked as STACK_DYNPTR.

The 6th patch in this patchset adds test cases that the verifier should
successfully reject, such as for example attempting to use a dynptr
after doing a direct write into it inside the bpf program.
Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarDavid Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-2-joannelkoong@gmail.com
parent 1ec5ee8c
...@@ -392,10 +392,15 @@ enum bpf_type_flag { ...@@ -392,10 +392,15 @@ enum bpf_type_flag {
MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to memory local to the bpf program. */
DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX, __BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
}; };
#define DYNPTR_TYPE_FLAG_MASK DYNPTR_TYPE_LOCAL
/* Max number of base types. */ /* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
...@@ -438,6 +443,7 @@ enum bpf_arg_type { ...@@ -438,6 +443,7 @@ enum bpf_arg_type {
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
__BPF_ARG_TYPE_MAX, __BPF_ARG_TYPE_MAX,
/* Extended arg_types. */ /* Extended arg_types. */
...@@ -2376,4 +2382,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, ...@@ -2376,4 +2382,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 **bin_buf, u32 num_args); u32 **bin_buf, u32 num_args);
void bpf_bprintf_cleanup(void); void bpf_bprintf_cleanup(void);
/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);
enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
};
#endif /* _LINUX_BPF_H */ #endif /* _LINUX_BPF_H */
...@@ -72,6 +72,18 @@ struct bpf_reg_state { ...@@ -72,6 +72,18 @@ struct bpf_reg_state {
u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
/* For dynptr stack slots */
struct {
enum bpf_dynptr_type type;
/* A dynptr is 16 bytes so it takes up 2 stack slots.
* We need to track which slot is the first slot
* to protect against cases where the user may try to
* pass in an address starting at the second slot of the
* dynptr.
*/
bool first_slot;
} dynptr;
/* Max size from any of the above. */ /* Max size from any of the above. */
struct { struct {
unsigned long raw1; unsigned long raw1;
...@@ -174,9 +186,15 @@ enum bpf_stack_slot_type { ...@@ -174,9 +186,15 @@ enum bpf_stack_slot_type {
STACK_SPILL, /* register spilled into stack */ STACK_SPILL, /* register spilled into stack */
STACK_MISC, /* BPF program wrote some data into this slot */ STACK_MISC, /* BPF program wrote some data into this slot */
STACK_ZERO, /* BPF program wrote constant zero */ STACK_ZERO, /* BPF program wrote constant zero */
/* A dynptr is stored in this stack slot. The type of dynptr
* is stored in bpf_stack_state->spilled_ptr.dynptr.type
*/
STACK_DYNPTR,
}; };
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern)
#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE)
struct bpf_stack_state { struct bpf_stack_state {
struct bpf_reg_state spilled_ptr; struct bpf_reg_state spilled_ptr;
......
...@@ -6528,6 +6528,11 @@ struct bpf_timer { ...@@ -6528,6 +6528,11 @@ struct bpf_timer {
__u64 :64; __u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_dynptr {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_sysctl { struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1). __u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write. * Allows 1,2,4-byte read, but no write.
......
...@@ -259,6 +259,7 @@ struct bpf_call_arg_meta { ...@@ -259,6 +259,7 @@ struct bpf_call_arg_meta {
u32 ret_btf_id; u32 ret_btf_id;
u32 subprogno; u32 subprogno;
struct bpf_map_value_off_desc *kptr_off_desc; struct bpf_map_value_off_desc *kptr_off_desc;
u8 uninit_dynptr_regno;
}; };
struct btf *btf_vmlinux; struct btf *btf_vmlinux;
...@@ -581,6 +582,7 @@ static char slot_type_char[] = { ...@@ -581,6 +582,7 @@ static char slot_type_char[] = {
[STACK_SPILL] = 'r', [STACK_SPILL] = 'r',
[STACK_MISC] = 'm', [STACK_MISC] = 'm',
[STACK_ZERO] = '0', [STACK_ZERO] = '0',
[STACK_DYNPTR] = 'd',
}; };
static void print_liveness(struct bpf_verifier_env *env, static void print_liveness(struct bpf_verifier_env *env,
...@@ -596,6 +598,25 @@ static void print_liveness(struct bpf_verifier_env *env, ...@@ -596,6 +598,25 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "D"); verbose(env, "D");
} }
static int get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
}
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
/* We need to check that slots between [spi - nr_slots + 1, spi] are
* within [0, allocated_stack).
*
* Please note that the spi grows downwards. For example, a dynptr
* takes the size of two stack slots; the first slot will be at
* spi and the second slot will be at spi - 1.
*/
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
static struct bpf_func_state *func(struct bpf_verifier_env *env, static struct bpf_func_state *func(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg) const struct bpf_reg_state *reg)
{ {
...@@ -647,6 +668,108 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) ...@@ -647,6 +668,108 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL; env->scratched_stack_slots = ~0ULL;
} }
static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
{
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
case DYNPTR_TYPE_LOCAL:
return BPF_DYNPTR_TYPE_LOCAL;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
}
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type type;
int spi, i;
spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return -EINVAL;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_DYNPTR;
state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
}
type = arg_to_dynptr_type(arg_type);
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;
state->stack[spi].spilled_ptr.dynptr.first_slot = true;
state->stack[spi].spilled_ptr.dynptr.type = type;
state->stack[spi - 1].spilled_ptr.dynptr.type = type;
return 0;
}
static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi, i;
spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return -EINVAL;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
state->stack[spi].spilled_ptr.dynptr.first_slot = false;
state->stack[spi].spilled_ptr.dynptr.type = 0;
state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
return 0;
}
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi = get_spi(reg->off);
int i;
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return true;
for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
return false;
}
return true;
}
static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type)
{
struct bpf_func_state *state = func(env, reg);
int spi = get_spi(reg->off);
int i;
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;
for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
return false;
}
/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
if (arg_type == ARG_PTR_TO_DYNPTR)
return true;
return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
}
/* The reg state of a pointer or a bounded scalar was saved when /* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack. * it was spilled to the stack.
*/ */
...@@ -5400,6 +5523,11 @@ static bool arg_type_is_release(enum bpf_arg_type type) ...@@ -5400,6 +5523,11 @@ static bool arg_type_is_release(enum bpf_arg_type type)
return type & OBJ_RELEASE; return type & OBJ_RELEASE;
} }
static bool arg_type_is_dynptr(enum bpf_arg_type type)
{
return base_type(type) == ARG_PTR_TO_DYNPTR;
}
static int int_ptr_type_to_size(enum bpf_arg_type type) static int int_ptr_type_to_size(enum bpf_arg_type type)
{ {
if (type == ARG_PTR_TO_INT) if (type == ARG_PTR_TO_INT)
...@@ -5539,6 +5667,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { ...@@ -5539,6 +5667,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_TIMER] = &timer_types,
[ARG_PTR_TO_KPTR] = &kptr_types, [ARG_PTR_TO_KPTR] = &kptr_types,
[ARG_PTR_TO_DYNPTR] = &stack_ptr_types,
}; };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno, static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
...@@ -5628,8 +5757,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, ...@@ -5628,8 +5757,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
bool fixed_off_ok = false; bool fixed_off_ok = false;
switch ((u32)type) { switch ((u32)type) {
case SCALAR_VALUE:
/* Pointer types where reg offset is explicitly allowed: */ /* Pointer types where reg offset is explicitly allowed: */
case PTR_TO_STACK:
if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
verbose(env, "cannot pass in dynptr at an offset\n");
return -EINVAL;
}
fallthrough;
case PTR_TO_PACKET: case PTR_TO_PACKET:
case PTR_TO_PACKET_META: case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY: case PTR_TO_MAP_KEY:
...@@ -5639,7 +5773,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, ...@@ -5639,7 +5773,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_MEM | MEM_ALLOC: case PTR_TO_MEM | MEM_ALLOC:
case PTR_TO_BUF: case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY: case PTR_TO_BUF | MEM_RDONLY:
case PTR_TO_STACK: case SCALAR_VALUE:
/* Some of the argument types nevertheless require a /* Some of the argument types nevertheless require a
* zero register offset. * zero register offset.
*/ */
...@@ -5837,6 +5971,36 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -5837,6 +5971,36 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
} else if (arg_type_is_dynptr(arg_type)) {
if (arg_type & MEM_UNINIT) {
if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
return -EINVAL;
}
/* We only support one dynptr being uninitialized at the moment,
* which is sufficient for the helper functions we have right now.
*/
if (meta->uninit_dynptr_regno) {
verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
return -EFAULT;
}
meta->uninit_dynptr_regno = regno;
} else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
const char *err_extra = "";
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
case DYNPTR_TYPE_LOCAL:
err_extra = "local ";
break;
default:
break;
}
verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
err_extra, arg + 1);
return -EINVAL;
}
} else if (arg_type_is_alloc_size(arg_type)) { } else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) { if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n", verbose(env, "R%d is not a known constant'\n",
...@@ -6970,9 +7134,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -6970,9 +7134,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env); regs = cur_regs(env);
if (meta.uninit_dynptr_regno) {
/* we write BPF_DW bits (8 bytes) at a time */
for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
i, BPF_DW, BPF_WRITE, -1, false);
if (err)
return err;
}
err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
insn_idx);
if (err)
return err;
}
if (meta.release_regno) { if (meta.release_regno) {
err = -EINVAL; err = -EINVAL;
if (meta.ref_obj_id) if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
else if (meta.ref_obj_id)
err = release_reference(env, meta.ref_obj_id); err = release_reference(env, meta.ref_obj_id);
/* meta.ref_obj_id can only be 0 if register that is meant to be /* meta.ref_obj_id can only be 0 if register that is meant to be
* released is NULL, which must be > R0. * released is NULL, which must be > R0.
......
...@@ -634,6 +634,7 @@ class PrinterHelpers(Printer): ...@@ -634,6 +634,7 @@ class PrinterHelpers(Printer):
'struct file', 'struct file',
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr',
] ]
known_types = { known_types = {
'...', '...',
...@@ -684,6 +685,7 @@ class PrinterHelpers(Printer): ...@@ -684,6 +685,7 @@ class PrinterHelpers(Printer):
'struct file', 'struct file',
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr',
} }
mapped_types = { mapped_types = {
'u8': '__u8', 'u8': '__u8',
......
...@@ -6528,6 +6528,11 @@ struct bpf_timer { ...@@ -6528,6 +6528,11 @@ struct bpf_timer {
__u64 :64; __u64 :64;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_dynptr {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_sysctl { struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1). __u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write. * Allows 1,2,4-byte read, but no write.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment