Commit acb12c85 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'verify-callbacks-as-if-they-are-called-unknown-number-of-times'

Eduard Zingerman says:

====================
verify callbacks as if they are called unknown number of times

This series updates verifier logic for callback functions handling.
Current master simulates callback body execution exactly once,
which leads to verifier not detecting unsafe programs like below:

    static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
    {
        ctx->i = 0;
        return 0;
    }

    SEC("?raw_tp")
    int unsafe_on_zero_iter(void *unused)
    {
        struct num_context loop_ctx = { .i = 32 };
        __u8 choice_arr[2] = { 0, 1 };

        bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
        return choice_arr[loop_ctx.i];
    }

This was reported previously in [0].
The basic idea of the fix is to schedule callback entry state for
verification in env->head until some identical, previously visited
state in current DFS state traversal is found. Same logic as with open
coded iterators, and builds on top recent fixes [1] for those.

The series is structured as follows:
- patches #1,2,3 update strobemeta, xdp_synproxy selftests and
  bpf_loop_bench benchmark to allow convergence of the bpf_loop
  callback states;
- patches #4,5 just shuffle the code a bit;
- patch #6 is the main part of the series;
- patch #7 adds test cases for #6;
- patch #8 extend patch #6 with same speculative scalar widening
  logic, as used for open coded iterators;
- patch #9 adds test cases for #8;
- patch #10 extends patch #6 to track maximal number of callback
  executions specifically for bpf_loop();
- patch #11 adds test cases for #10.

Veristat results comparing this series to master+patches #1,2,3 using selftests
show the following difference:

File                       Program        States (A)  States (B)  States (DIFF)
-------------------------  -------------  ----------  ----------  -------------
bpf_loop_bench.bpf.o       benchmark               1           2  +1 (+100.00%)
pyperf600_bpf_loop.bpf.o   on_event              322         407  +85 (+26.40%)
strobemeta_bpf_loop.bpf.o  on_event              113         151  +38 (+33.63%)
xdp_synproxy_kern.bpf.o    syncookie_tc          341         291  -50 (-14.66%)
xdp_synproxy_kern.bpf.o    syncookie_xdp         344         301  -43 (-12.50%)

Veristat results comparing this series to master using Tetragon BPF
files [2] also show some differences.
States diff varies from +2% to +15% on 23 programs out of 186,
no new failures.

Changelog:
- V3 [5] -> V4, changes suggested by Andrii:
  - validate mark_chain_precision() result in patch #10;
  - renaming s/cumulative_callback_depth/callback_unroll_depth/.
- V2 [4] -> V3:
  - fixes in expected log messages for test cases:
    - callback_result_precise;
    - parent_callee_saved_reg_precise_with_callback;
    - parent_stack_slot_precise_with_callback;
  - renamings (suggested by Alexei):
    - s/callback_iter_depth/cumulative_callback_depth/
    - s/is_callback_iter_next/calls_callback/
    - s/mark_callback_iter_next/mark_calls_callback/
  - prepare_func_exit() updated to exit with -EFAULT when
    callee->in_callback_fn is true but calls_callback() is not true
    for callsite;
  - test case 'bpf_loop_iter_limit_nested' rewritten to use return
    value check instead of verifier log message checks
    (suggested by Alexei).
- V1 [3] -> V2, changes suggested by Andrii:
  - small changes for error handling code in __check_func_call();
  - callback body processing log is now matched in relevant
    verifier_subprog_precision.c tests;
  - R1 passed to bpf_loop() is now always marked as precise;
  - log level 2 message for bpf_loop() iteration termination instead of
    iteration depth messages;
  - __no_msg macro removed;
  - bpf_loop_iter_limit_nested updated to avoid using __no_msg;
  - commit message for patch #3 updated according to Alexei's request.

[0] https://lore.kernel.org/bpf/CA+vRuzPChFNXmouzGG+wsy=6eMcfr1mFG0F3g7rbg-sedGKW3w@mail.gmail.com/
[1] https://lore.kernel.org/bpf/20231024000917.12153-1-eddyz87@gmail.com/
[2] git@github.com:cilium/tetragon.git
[3] https://lore.kernel.org/bpf/20231116021803.9982-1-eddyz87@gmail.com/T/#t
[4] https://lore.kernel.org/bpf/20231118013355.7943-1-eddyz87@gmail.com/T/#t
[5] https://lore.kernel.org/bpf/20231120225945.11741-1-eddyz87@gmail.com/T/#t
====================

Link: https://lore.kernel.org/r/20231121020701.26440-1-eddyz87@gmail.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents fcb905d8 57e2a52d
...@@ -301,6 +301,17 @@ struct bpf_func_state { ...@@ -301,6 +301,17 @@ struct bpf_func_state {
struct tnum callback_ret_range; struct tnum callback_ret_range;
bool in_async_callback_fn; bool in_async_callback_fn;
bool in_exception_callback_fn; bool in_exception_callback_fn;
/* For callback calling functions that limit number of possible
* callback executions (e.g. bpf_loop) keeps track of current
* simulated iteration number.
* Value in frame N refers to number of times callback with frame
* N+1 was simulated, e.g. for the following call:
*
* bpf_loop(..., fn, ...); | suppose current frame is N
* | fn would be simulated in frame N+1
* | number of simulations is tracked in frame N
*/
u32 callback_depth;
/* The following fields should be last. See copy_func_state() */ /* The following fields should be last. See copy_func_state() */
int acquired_refs; int acquired_refs;
...@@ -400,6 +411,7 @@ struct bpf_verifier_state { ...@@ -400,6 +411,7 @@ struct bpf_verifier_state {
struct bpf_idx_pair *jmp_history; struct bpf_idx_pair *jmp_history;
u32 jmp_history_cnt; u32 jmp_history_cnt;
u32 dfs_depth; u32 dfs_depth;
u32 callback_unroll_depth;
}; };
#define bpf_get_spilled_reg(slot, frame, mask) \ #define bpf_get_spilled_reg(slot, frame, mask) \
...@@ -511,6 +523,10 @@ struct bpf_insn_aux_data { ...@@ -511,6 +523,10 @@ struct bpf_insn_aux_data {
* this instruction, regardless of any heuristics * this instruction, regardless of any heuristics
*/ */
bool force_checkpoint; bool force_checkpoint;
/* true if instruction is a call to a helper function that
* accepts callback function as a parameter.
*/
bool calls_callback;
}; };
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
......
...@@ -547,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) ...@@ -547,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
return func_id == BPF_FUNC_dynptr_data; return func_id == BPF_FUNC_dynptr_data;
} }
static bool is_callback_calling_kfunc(u32 btf_id); static bool is_sync_callback_calling_kfunc(u32 btf_id);
static bool is_bpf_throw_kfunc(struct bpf_insn *insn); static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
static bool is_callback_calling_function(enum bpf_func_id func_id) static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
{ {
return func_id == BPF_FUNC_for_each_map_elem || return func_id == BPF_FUNC_for_each_map_elem ||
func_id == BPF_FUNC_timer_set_callback ||
func_id == BPF_FUNC_find_vma || func_id == BPF_FUNC_find_vma ||
func_id == BPF_FUNC_loop || func_id == BPF_FUNC_loop ||
func_id == BPF_FUNC_user_ringbuf_drain; func_id == BPF_FUNC_user_ringbuf_drain;
...@@ -564,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id) ...@@ -564,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id)
return func_id == BPF_FUNC_timer_set_callback; return func_id == BPF_FUNC_timer_set_callback;
} }
static bool is_callback_calling_function(enum bpf_func_id func_id)
{
return is_sync_callback_calling_function(func_id) ||
is_async_callback_calling_function(func_id);
}
static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
{
return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
(bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
}
static bool is_storage_get_function(enum bpf_func_id func_id) static bool is_storage_get_function(enum bpf_func_id func_id)
{ {
return func_id == BPF_FUNC_sk_storage_get || return func_id == BPF_FUNC_sk_storage_get ||
...@@ -1808,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, ...@@ -1808,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->first_insn_idx = src->first_insn_idx; dst_state->first_insn_idx = src->first_insn_idx;
dst_state->last_insn_idx = src->last_insn_idx; dst_state->last_insn_idx = src->last_insn_idx;
dst_state->dfs_depth = src->dfs_depth; dst_state->dfs_depth = src->dfs_depth;
dst_state->callback_unroll_depth = src->callback_unroll_depth;
dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->used_as_loop_entry = src->used_as_loop_entry;
for (i = 0; i <= src->curframe; i++) { for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i]; dst = dst_state->frame[i];
...@@ -3439,13 +3451,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env, ...@@ -3439,13 +3451,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env,
reg->subreg_def = DEF_NOT_SUBREG; reg->subreg_def = DEF_NOT_SUBREG;
} }
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
enum reg_arg_type t) enum reg_arg_type t)
{ {
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
struct bpf_reg_state *reg, *regs = state->regs; struct bpf_reg_state *reg;
bool rw64; bool rw64;
if (regno >= MAX_BPF_REG) { if (regno >= MAX_BPF_REG) {
...@@ -3486,6 +3496,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, ...@@ -3486,6 +3496,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return 0; return 0;
} }
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
enum reg_arg_type t)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
return __check_reg_arg(env, state->regs, regno, t);
}
static void mark_jmp_point(struct bpf_verifier_env *env, int idx) static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
{ {
env->insn_aux_data[idx].jmp_point = true; env->insn_aux_data[idx].jmp_point = true;
...@@ -3724,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) ...@@ -3724,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
} }
} }
static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
/* For given verifier state backtrack_insn() is called from the last insn to /* For given verifier state backtrack_insn() is called from the last insn to
* the first insn. Its purpose is to compute a bitmask of registers and * the first insn. Its purpose is to compute a bitmask of registers and
* stack slots that needs precision in the parent verifier state. * stack slots that needs precision in the parent verifier state.
...@@ -3899,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, ...@@ -3899,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
return -EFAULT; return -EFAULT;
return 0; return 0;
} }
} else if ((bpf_helper_call(insn) && } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
is_callback_calling_function(insn->imm) && /* exit from callback subprog to callback-calling helper or
!is_async_callback_calling_function(insn->imm)) || * kfunc call. Use idx/subseq_idx check to discern it from
(bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { * straight line code backtracking.
/* callback-calling helper or kfunc call, which means * Unlike the subprog call handling above, we shouldn't
* we are exiting from subprog, but unlike the subprog * propagate precision of r1-r5 (if any requested), as they are
* call handling above, we shouldn't propagate * not actually arguments passed directly to callback subprogs
* precision of r1-r5 (if any requested), as they are
* not actually arguments passed directly to callback
* subprogs
*/ */
if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
...@@ -3943,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, ...@@ -3943,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
} else if (opcode == BPF_EXIT) { } else if (opcode == BPF_EXIT) {
bool r0_precise; bool r0_precise;
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { /* Backtracking to a nested function call, 'idx' is a part of
/* if backtracing was looking for registers R1-R5 * the inner frame 'subseq_idx' is a part of the outer frame.
* they should have been found already. * In case of a regular function call, instructions giving
* precision to registers R1-R5 should have been found already.
* In case of a callback, it is ok to have R1-R5 marked for
* backtracking, as these registers are set by the function
* invoking callback.
*/ */
if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
WARN_ONCE(1, "verifier backtracking bug"); WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT; return -EFAULT;
...@@ -9350,7 +9376,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env, ...@@ -9350,7 +9376,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
/* after the call registers r0 - r5 were scratched */ /* after the call registers r0 - r5 were scratched */
for (i = 0; i < CALLER_SAVED_REGS; i++) { for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]); mark_reg_not_init(env, regs, caller_saved[i]);
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
} }
} }
...@@ -9363,11 +9389,10 @@ static int set_callee_state(struct bpf_verifier_env *env, ...@@ -9363,11 +9389,10 @@ static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller, struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx); struct bpf_func_state *callee, int insn_idx);
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
int *insn_idx, int subprog, set_callee_state_fn set_callee_state_cb,
set_callee_state_fn set_callee_state_cb) struct bpf_verifier_state *state)
{ {
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller, *callee; struct bpf_func_state *caller, *callee;
int err; int err;
...@@ -9377,45 +9402,64 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -9377,45 +9402,64 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -E2BIG; return -E2BIG;
} }
caller = state->frame[state->curframe];
if (state->frame[state->curframe + 1]) { if (state->frame[state->curframe + 1]) {
verbose(env, "verifier bug. Frame %d already allocated\n", verbose(env, "verifier bug. Frame %d already allocated\n",
state->curframe + 1); state->curframe + 1);
return -EFAULT; return -EFAULT;
} }
err = btf_check_subprog_call(env, subprog, caller->regs); caller = state->frame[state->curframe];
if (err == -EFAULT) callee = kzalloc(sizeof(*callee), GFP_KERNEL);
return err; if (!callee)
if (subprog_is_global(env, subprog)) { return -ENOMEM;
if (err) { state->frame[state->curframe + 1] = callee;
verbose(env, "Caller passes invalid args into func#%d\n",
subprog);
return err;
} else {
if (env->log.level & BPF_LOG_LEVEL)
verbose(env,
"Func#%d is global and valid. Skipping.\n",
subprog);
clear_caller_saved_regs(env, caller->regs);
/* All global functions return a 64-bit SCALAR_VALUE */ /* callee cannot access r0, r6 - r9 for reading and has to write
mark_reg_unknown(env, caller->regs, BPF_REG_0); * into its own stack before reading from it.
caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; * callee can read/write into caller's stack
*/
init_func_state(env, callee,
/* remember the callsite, it will be used by bpf_exit */
callsite,
state->curframe + 1 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
/* Transfer references to the callee */
err = copy_reference_state(callee, caller);
err = err ?: set_callee_state_cb(env, caller, callee, callsite);
if (err)
goto err_out;
/* only increment it after check_reg_arg() finished */
state->curframe++;
/* continue with next insn after call */
return 0; return 0;
}
} err_out:
free_func_state(callee);
state->frame[state->curframe + 1] = NULL;
return err;
}
static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
{
struct bpf_verifier_state *state = env->cur_state, *callback_state;
struct bpf_func_state *caller, *callee;
int err;
caller = state->frame[state->curframe];
err = btf_check_subprog_call(env, subprog, caller->regs);
if (err == -EFAULT)
return err;
/* set_callee_state is used for direct subprog calls, but we are /* set_callee_state is used for direct subprog calls, but we are
* interested in validating only BPF helpers that can call subprogs as * interested in validating only BPF helpers that can call subprogs as
* callbacks * callbacks
*/ */
if (set_callee_state_cb != set_callee_state) {
env->subprog_info[subprog].is_cb = true; env->subprog_info[subprog].is_cb = true;
if (bpf_pseudo_kfunc_call(insn) && if (bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_kfunc(insn->imm)) { !is_sync_callback_calling_kfunc(insn->imm)) {
verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
func_id_name(insn->imm), insn->imm); func_id_name(insn->imm), insn->imm);
return -EFAULT; return -EFAULT;
...@@ -9425,7 +9469,6 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -9425,7 +9469,6 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
func_id_name(insn->imm), insn->imm); func_id_name(insn->imm), insn->imm);
return -EFAULT; return -EFAULT;
} }
}
if (insn->code == (BPF_JMP | BPF_CALL) && if (insn->code == (BPF_JMP | BPF_CALL) &&
insn->src_reg == 0 && insn->src_reg == 0 &&
...@@ -9435,53 +9478,83 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -9435,53 +9478,83 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* there is no real recursion here. timer callbacks are async */ /* there is no real recursion here. timer callbacks are async */
env->subprog_info[subprog].is_async_cb = true; env->subprog_info[subprog].is_async_cb = true;
async_cb = push_async_cb(env, env->subprog_info[subprog].start, async_cb = push_async_cb(env, env->subprog_info[subprog].start,
*insn_idx, subprog); insn_idx, subprog);
if (!async_cb) if (!async_cb)
return -EFAULT; return -EFAULT;
callee = async_cb->frame[0]; callee = async_cb->frame[0];
callee->async_entry_cnt = caller->async_entry_cnt + 1; callee->async_entry_cnt = caller->async_entry_cnt + 1;
/* Convert bpf_timer_set_callback() args into timer callback args */ /* Convert bpf_timer_set_callback() args into timer callback args */
err = set_callee_state_cb(env, caller, callee, *insn_idx); err = set_callee_state_cb(env, caller, callee, insn_idx);
if (err)
return err;
return 0;
}
/* for callback functions enqueue entry to callback and
* proceed with next instruction within current frame.
*/
callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
if (!callback_state)
return -ENOMEM;
err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
callback_state);
if (err) if (err)
return err; return err;
callback_state->callback_unroll_depth++;
callback_state->frame[callback_state->curframe - 1]->callback_depth++;
caller->callback_depth = 0;
return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_state *caller;
int err, subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
return -EFAULT;
}
caller = state->frame[state->curframe];
err = btf_check_subprog_call(env, subprog, caller->regs);
if (err == -EFAULT)
return err;
if (subprog_is_global(env, subprog)) {
if (err) {
verbose(env, "Caller passes invalid args into func#%d\n", subprog);
return err;
}
if (env->log.level & BPF_LOG_LEVEL)
verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
clear_caller_saved_regs(env, caller->regs); clear_caller_saved_regs(env, caller->regs);
/* All global functions return a 64-bit SCALAR_VALUE */
mark_reg_unknown(env, caller->regs, BPF_REG_0); mark_reg_unknown(env, caller->regs, BPF_REG_0);
caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
/* continue with next insn after call */ /* continue with next insn after call */
return 0; return 0;
} }
callee = kzalloc(sizeof(*callee), GFP_KERNEL); /* for regular function entry setup new frame and continue
if (!callee) * from that frame.
return -ENOMEM;
state->frame[state->curframe + 1] = callee;
/* callee cannot access r0, r6 - r9 for reading and has to write
* into its own stack before reading from it.
* callee can read/write into caller's stack
*/ */
init_func_state(env, callee, err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
/* remember the callsite, it will be used by bpf_exit */
*insn_idx /* callsite */,
state->curframe + 1 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
/* Transfer references to the callee */
err = copy_reference_state(callee, caller);
if (err)
goto err_out;
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err) if (err)
goto err_out; return err;
clear_caller_saved_regs(env, caller->regs); clear_caller_saved_regs(env, caller->regs);
/* only increment it after check_reg_arg() finished */
state->curframe++;
/* and go analyze first insn of the callee */ /* and go analyze first insn of the callee */
*insn_idx = env->subprog_info[subprog].start - 1; *insn_idx = env->subprog_info[subprog].start - 1;
...@@ -9489,14 +9562,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -9489,14 +9562,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
verbose(env, "caller:\n"); verbose(env, "caller:\n");
print_verifier_state(env, caller, true); print_verifier_state(env, caller, true);
verbose(env, "callee:\n"); verbose(env, "callee:\n");
print_verifier_state(env, callee, true); print_verifier_state(env, state->frame[state->curframe], true);
} }
return 0;
err_out: return 0;
free_func_state(callee);
state->frame[state->curframe + 1] = NULL;
return err;
} }
int map_set_for_each_callback_args(struct bpf_verifier_env *env, int map_set_for_each_callback_args(struct bpf_verifier_env *env,
...@@ -9540,22 +9609,6 @@ static int set_callee_state(struct bpf_verifier_env *env, ...@@ -9540,22 +9609,6 @@ static int set_callee_state(struct bpf_verifier_env *env,
return 0; return 0;
} }
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
int subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn);
return -EFAULT;
}
return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
}
static int set_map_elem_callback_state(struct bpf_verifier_env *env, static int set_map_elem_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller, struct bpf_func_state *caller,
struct bpf_func_state *callee, struct bpf_func_state *callee,
...@@ -9748,9 +9801,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) ...@@ -9748,9 +9801,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{ {
struct bpf_verifier_state *state = env->cur_state; struct bpf_verifier_state *state = env->cur_state, *prev_st;
struct bpf_func_state *caller, *callee; struct bpf_func_state *caller, *callee;
struct bpf_reg_state *r0; struct bpf_reg_state *r0;
bool in_callback_fn;
int err; int err;
callee = state->frame[state->curframe]; callee = state->frame[state->curframe];
...@@ -9779,6 +9833,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) ...@@ -9779,6 +9833,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
return -EINVAL; return -EINVAL;
} }
if (!calls_callback(env, callee->callsite)) {
verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
*insn_idx, callee->callsite);
return -EFAULT;
}
} else { } else {
/* return to the caller whatever r0 had in the callee */ /* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0; caller->regs[BPF_REG_0] = *r0;
...@@ -9796,7 +9855,16 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) ...@@ -9796,7 +9855,16 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return err; return err;
} }
/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
* there function call logic would reschedule callback visit. If iteration
* converges is_state_visited() would prune that visit eventually.
*/
in_callback_fn = callee->in_callback_fn;
if (in_callback_fn)
*insn_idx = callee->callsite;
else
*insn_idx = callee->callsite + 1; *insn_idx = callee->callsite + 1;
if (env->log.level & BPF_LOG_LEVEL) { if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "returning from callee:\n"); verbose(env, "returning from callee:\n");
print_verifier_state(env, callee, true); print_verifier_state(env, callee, true);
...@@ -9807,6 +9875,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) ...@@ -9807,6 +9875,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
* bpf_throw, this will be done by copy_verifier_state for extra frames. */ * bpf_throw, this will be done by copy_verifier_state for extra frames. */
free_func_state(callee); free_func_state(callee);
state->frame[state->curframe--] = NULL; state->frame[state->curframe--] = NULL;
/* for callbacks widen imprecise scalars to make programs like below verify:
*
* struct ctx { int i; }
* void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
* ...
* struct ctx = { .i = 0; }
* bpf_loop(100, cb, &ctx, 0);
*
* This is similar to what is done in process_iter_next_call() for open
* coded iterators.
*/
prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
if (prev_st) {
err = widen_imprecise_scalars(env, prev_st, state);
if (err)
return err;
}
return 0; return 0;
} }
...@@ -10209,15 +10295,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -10209,15 +10295,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
} }
break; break;
case BPF_FUNC_for_each_map_elem: case BPF_FUNC_for_each_map_elem:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_map_elem_callback_state); set_map_elem_callback_state);
break; break;
case BPF_FUNC_timer_set_callback: case BPF_FUNC_timer_set_callback:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_timer_callback_state); set_timer_callback_state);
break; break;
case BPF_FUNC_find_vma: case BPF_FUNC_find_vma:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_find_vma_callback_state); set_find_vma_callback_state);
break; break;
case BPF_FUNC_snprintf: case BPF_FUNC_snprintf:
...@@ -10225,8 +10311,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -10225,8 +10311,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
break; break;
case BPF_FUNC_loop: case BPF_FUNC_loop:
update_loop_inline_state(env, meta.subprogno); update_loop_inline_state(env, meta.subprogno);
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, /* Verifier relies on R1 value to determine if bpf_loop() iteration
* is finished, thus mark it precise.
*/
err = mark_chain_precision(env, BPF_REG_1);
if (err)
return err;
if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_loop_callback_state); set_loop_callback_state);
} else {
cur_func(env)->callback_depth = 0;
if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "frame%d bpf_loop iteration limit reached\n",
env->cur_state->curframe);
}
break; break;
case BPF_FUNC_dynptr_from_mem: case BPF_FUNC_dynptr_from_mem:
if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
...@@ -10322,7 +10421,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ...@@ -10322,7 +10421,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
break; break;
} }
case BPF_FUNC_user_ringbuf_drain: case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_user_ringbuf_callback_state); set_user_ringbuf_callback_state);
break; break;
} }
...@@ -11211,7 +11310,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id) ...@@ -11211,7 +11310,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
} }
static bool is_callback_calling_kfunc(u32 btf_id) static bool is_sync_callback_calling_kfunc(u32 btf_id)
{ {
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
} }
...@@ -11963,6 +12062,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -11963,6 +12062,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EACCES; return -EACCES;
} }
/* Check the arguments */
err = check_kfunc_args(env, &meta, insn_idx);
if (err < 0)
return err;
if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_rbtree_add_callback_state);
if (err) {
verbose(env, "kfunc %s#%d failed callback verification\n",
func_name, meta.func_id);
return err;
}
}
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
...@@ -11998,10 +12112,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -11998,10 +12112,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL; return -EINVAL;
} }
/* Check the arguments */
err = check_kfunc_args(env, &meta, insn_idx);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted /* In case of release function, we get register number of refcounted
* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/ */
...@@ -12035,16 +12145,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -12035,16 +12145,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
} }
} }
if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_rbtree_add_callback_state);
if (err) {
verbose(env, "kfunc %s#%d failed callback verification\n",
func_name, meta.func_id);
return err;
}
}
if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
if (!bpf_jit_supports_exceptions()) { if (!bpf_jit_supports_exceptions()) {
verbose(env, "JIT does not support calling kfunc %s#%d\n", verbose(env, "JIT does not support calling kfunc %s#%d\n",
...@@ -15408,6 +15508,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) ...@@ -15408,6 +15508,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
return env->insn_aux_data[insn_idx].force_checkpoint; return env->insn_aux_data[insn_idx].force_checkpoint;
} }
static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].calls_callback = true;
}
static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
{
return env->insn_aux_data[insn_idx].calls_callback;
}
enum { enum {
DONE_EXPLORING = 0, DONE_EXPLORING = 0,
...@@ -15521,6 +15630,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) ...@@ -15521,6 +15630,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
* async state will be pushed for further exploration. * async state will be pushed for further exploration.
*/ */
mark_prune_point(env, t); mark_prune_point(env, t);
/* For functions that invoke callbacks it is not known how many times
* callback would be called. Verifier models callback calling functions
* by repeatedly visiting callback bodies and returning to origin call
* instruction.
* In order to stop such iteration verifier needs to identify when a
* state identical some state from a previous iteration is reached.
* Check below forces creation of checkpoint before callback calling
* instruction to allow search for such identical states.
*/
if (is_sync_callback_calling_insn(insn)) {
mark_calls_callback(env, t);
mark_force_checkpoint(env, t);
mark_prune_point(env, t);
mark_jmp_point(env, t);
}
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
struct bpf_kfunc_call_arg_meta meta; struct bpf_kfunc_call_arg_meta meta;
...@@ -16990,10 +17114,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) ...@@ -16990,10 +17114,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
} }
goto skip_inf_loop_check; goto skip_inf_loop_check;
} }
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, true))
goto hit;
goto skip_inf_loop_check;
}
/* attempt to detect infinite loop to avoid unnecessary doomed work */ /* attempt to detect infinite loop to avoid unnecessary doomed work */
if (states_maybe_looping(&sl->state, cur) && if (states_maybe_looping(&sl->state, cur) &&
states_equal(env, &sl->state, cur, false) && states_equal(env, &sl->state, cur, false) &&
!iter_active_depths_differ(&sl->state, cur)) { !iter_active_depths_differ(&sl->state, cur) &&
sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
verbose_linfo(env, insn_idx, "; "); verbose_linfo(env, insn_idx, "; ");
verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "infinite loop detected at insn %d\n", insn_idx);
verbose(env, "cur state:"); verbose(env, "cur state:");
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "verifier_helper_restricted.skel.h" #include "verifier_helper_restricted.skel.h"
#include "verifier_helper_value_access.skel.h" #include "verifier_helper_value_access.skel.h"
#include "verifier_int_ptr.skel.h" #include "verifier_int_ptr.skel.h"
#include "verifier_iterating_callbacks.skel.h"
#include "verifier_jeq_infer_not_null.skel.h" #include "verifier_jeq_infer_not_null.skel.h"
#include "verifier_ld_ind.skel.h" #include "verifier_ld_ind.skel.h"
#include "verifier_ldsx.skel.h" #include "verifier_ldsx.skel.h"
...@@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces ...@@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
......
...@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) ...@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
return 0; return 0;
} }
SEC("fentry/" SYS_PREFIX "sys_getpgid") static int outer_loop(__u32 index, void *data)
int benchmark(void *ctx)
{ {
for (int i = 0; i < 1000; i++) {
bpf_loop(nr_loops, empty_callback, NULL, 0); bpf_loop(nr_loops, empty_callback, NULL, 0);
__sync_add_and_fetch(&hits, nr_loops); __sync_add_and_fetch(&hits, nr_loops);
} return 0;
}
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int benchmark(void *ctx)
{
bpf_loop(1000, outer_loop, NULL, 0);
return 0; return 0;
} }
...@@ -33,6 +33,7 @@ int underflow_prog(void *ctx) ...@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
if (!p) if (!p)
return 0; return 0;
bpf_for_each_map_elem(&array_map, cb1, &p, 0); bpf_for_each_map_elem(&array_map, cb1, &p, 0);
bpf_kfunc_call_test_release(p);
return 0; return 0;
} }
......
...@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) ...@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
return 0; return 0;
bpf_spin_lock(&lock); bpf_spin_lock(&lock);
bpf_rbtree_add(&rbtree, &f->node, rbless); bpf_rbtree_add(&rbtree, &f->node, rbless);
bpf_spin_unlock(&lock);
return 0; return 0;
} }
...@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) ...@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
if (!f) if (!f)
return 0; return 0;
bpf_loop(5, subprog_cb_ref, NULL, 0); bpf_loop(5, subprog_cb_ref, NULL, 0);
bpf_obj_drop(f);
return 0; return 0;
} }
......
...@@ -24,9 +24,11 @@ struct task_struct {}; ...@@ -24,9 +24,11 @@ struct task_struct {};
#define STACK_TABLE_EPOCH_SHIFT 20 #define STACK_TABLE_EPOCH_SHIFT 20
#define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_STR_LEN 1
#define STROBE_MAX_CFGS 32 #define STROBE_MAX_CFGS 32
#define READ_MAP_VAR_PAYLOAD_CAP \
((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
#define STROBE_MAX_PAYLOAD \ #define STROBE_MAX_PAYLOAD \
(STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
struct strobe_value_header { struct strobe_value_header {
/* /*
...@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, ...@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base, size_t idx, void *tls_base,
struct strobe_value_generic *value, struct strobe_value_generic *value,
struct strobemeta_payload *data, struct strobemeta_payload *data,
void *payload) size_t off)
{ {
void *location; void *location;
uint64_t len; uint64_t len;
...@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, ...@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0; return 0;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
/* /*
* if bpf_probe_read_user_str returns error (<0), due to casting to * if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is * unsinged int, it will become big number, so next check is
...@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, ...@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0; return 0;
data->str_lens[idx] = len; data->str_lens[idx] = len;
return len; return off + len;
} }
static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base, size_t idx, void *tls_base,
struct strobe_value_generic *value, struct strobe_value_generic *value,
struct strobemeta_payload *data, struct strobemeta_payload *data,
void *payload) size_t off)
{ {
struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map; struct strobe_map_raw map;
...@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
location = calc_location(&cfg->map_locs[idx], tls_base); location = calc_location(&cfg->map_locs[idx], tls_base);
if (!location) if (!location)
return payload; return off;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
return payload; return off;
descr->id = map.id; descr->id = map.id;
descr->cnt = map.cnt; descr->cnt = map.cnt;
...@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1; data->req_meta_valid = 1;
} }
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len; descr->tag_len = len;
payload += len; off += len;
} }
#ifdef NO_UNROLL #ifdef NO_UNROLL
...@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
break; break;
descr->key_lens[i] = 0; descr->key_lens[i] = 0;
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].key); map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len; descr->key_lens[i] = len;
payload += len; off += len;
} }
descr->val_lens[i] = 0; descr->val_lens[i] = 0;
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].val); map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len; descr->val_lens[i] = len;
payload += len; off += len;
} }
} }
return payload; return off;
} }
#ifdef USE_BPF_LOOP #ifdef USE_BPF_LOOP
...@@ -455,14 +457,20 @@ struct read_var_ctx { ...@@ -455,14 +457,20 @@ struct read_var_ctx {
struct strobemeta_payload *data; struct strobemeta_payload *data;
void *tls_base; void *tls_base;
struct strobemeta_cfg *cfg; struct strobemeta_cfg *cfg;
void *payload; size_t payload_off;
/* value gets mutated */ /* value gets mutated */
struct strobe_value_generic *value; struct strobe_value_generic *value;
enum read_type type; enum read_type type;
}; };
static int read_var_callback(__u32 index, struct read_var_ctx *ctx) static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
{ {
/* lose precision info for ctx->payload_off, verifier won't track
* double xor, barrier_var() is needed to force clang keep both xors.
*/
ctx->payload_off ^= index;
barrier_var(ctx->payload_off);
ctx->payload_off ^= index;
switch (ctx->type) { switch (ctx->type) {
case READ_INT_VAR: case READ_INT_VAR:
if (index >= STROBE_MAX_INTS) if (index >= STROBE_MAX_INTS)
...@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) ...@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
case READ_MAP_VAR: case READ_MAP_VAR:
if (index >= STROBE_MAX_MAPS) if (index >= STROBE_MAX_MAPS)
return 1; return 1;
ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
ctx->value, ctx->data, ctx->payload); return 1;
ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
ctx->value, ctx->data, ctx->payload_off);
break; break;
case READ_STR_VAR: case READ_STR_VAR:
if (index >= STROBE_MAX_STRS) if (index >= STROBE_MAX_STRS)
return 1; return 1;
ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
ctx->value, ctx->data, ctx->payload); return 1;
ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
ctx->value, ctx->data, ctx->payload_off);
break; break;
} }
return 0; return 0;
...@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
pid_t pid = bpf_get_current_pid_tgid() >> 32; pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0}; struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg; struct strobemeta_cfg *cfg;
void *tls_base, *payload; size_t payload_off;
void *tls_base;
cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
if (!cfg) if (!cfg)
...@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
data->int_vals_set_mask = 0; data->int_vals_set_mask = 0;
data->req_meta_valid = 0; data->req_meta_valid = 0;
payload = data->payload; payload_off = 0;
/* /*
* we don't have struct task_struct definition, it should be: * we don't have struct task_struct definition, it should be:
* tls_base = (void *)task->thread.fsbase; * tls_base = (void *)task->thread.fsbase;
...@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
.tls_base = tls_base, .tls_base = tls_base,
.value = &value, .value = &value,
.data = data, .data = data,
.payload = payload, .payload_off = 0,
}; };
int err; int err;
...@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_MAPS) if (err != STROBE_MAX_MAPS)
return NULL; return NULL;
payload_off = ctx.payload_off;
/* this should not really happen, here only to satisfy verifer */
if (payload_off > sizeof(data->payload))
payload_off = sizeof(data->payload);
#else #else
#ifdef NO_UNROLL #ifdef NO_UNROLL
#pragma clang loop unroll(disable) #pragma clang loop unroll(disable)
...@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll #pragma unroll
#endif /* NO_UNROLL */ #endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) { for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload += read_str_var(cfg, i, tls_base, &value, data, payload); payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
} }
#ifdef NO_UNROLL #ifdef NO_UNROLL
#pragma clang loop unroll(disable) #pragma clang loop unroll(disable)
...@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll #pragma unroll
#endif /* NO_UNROLL */ #endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) { for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload = read_map_var(cfg, i, tls_base, &value, data, payload); payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
} }
#endif /* USE_BPF_LOOP */ #endif /* USE_BPF_LOOP */
...@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, ...@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
* return pointer right after end of payload, so it's possible to * return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent * calculate exact amount of useful data that needs to be sent
*/ */
return payload; return &data->payload[payload_off];
} }
SEC("raw_tracepoint/kfree_skb") SEC("raw_tracepoint/kfree_skb")
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 8);
__type(key, __u32);
__type(value, __u64);
} map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
__uint(max_entries, 8);
} ringbuf SEC(".maps");
struct vm_area_struct;
struct bpf_map;
struct buf_context {
char *buf;
};
struct num_context {
__u64 i;
__u64 j;
};
__u8 choice_arr[2] = { 0, 1 };
static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
{
if (idx == 0) {
ctx->buf = (char *)(0xDEAD);
return 0;
}
if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
return 1;
return 0;
}
SEC("?raw_tp")
__failure __msg("R1 type=scalar expected=fp")
int unsafe_on_2nd_iter(void *unused)
{
char buf[4];
struct buf_context loop_ctx = { .buf = buf };
bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
return 0;
}
static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
{
ctx->i = 0;
return 0;
}
SEC("?raw_tp")
__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
int unsafe_on_zero_iter(void *unused)
{
struct num_context loop_ctx = { .i = 32 };
bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
return choice_arr[loop_ctx.i];
}
static int widening_cb(__u32 idx, struct num_context *ctx)
{
++ctx->i;
return 0;
}
SEC("?raw_tp")
__success
int widening(void *unused)
{
struct num_context loop_ctx = { .i = 0, .j = 1 };
bpf_loop(100, widening_cb, &loop_ctx, 0);
/* loop_ctx.j is not changed during callback iteration,
* verifier should not apply widening to it.
*/
return choice_arr[loop_ctx.j];
}
static int loop_detection_cb(__u32 idx, struct num_context *ctx)
{
for (;;) {}
return 0;
}
SEC("?raw_tp")
__failure __msg("infinite loop detected")
int loop_detection(void *unused)
{
struct num_context loop_ctx = { .i = 0 };
bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
return 0;
}
static __always_inline __u64 oob_state_machine(struct num_context *ctx)
{
switch (ctx->i) {
case 0:
ctx->i = 1;
break;
case 1:
ctx->i = 32;
break;
}
return 0;
}
static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
{
return oob_state_machine(data);
}
SEC("?raw_tp")
__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
int unsafe_for_each_map_elem(void *unused)
{
struct num_context loop_ctx = { .i = 0 };
bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
return choice_arr[loop_ctx.i];
}
static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
{
return oob_state_machine(data);
}
SEC("?raw_tp")
__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
int unsafe_ringbuf_drain(void *unused)
{
struct num_context loop_ctx = { .i = 0 };
bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
return choice_arr[loop_ctx.i];
}
static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
{
return oob_state_machine(data);
}
SEC("?raw_tp")
__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
int unsafe_find_vma(void *unused)
{
struct task_struct *task = bpf_get_current_task_btf();
struct num_context loop_ctx = { .i = 0 };
bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
return choice_arr[loop_ctx.i];
}
static int iter_limit_cb(__u32 idx, struct num_context *ctx)
{
ctx->i++;
return 0;
}
SEC("?raw_tp")
__success
int bpf_loop_iter_limit_ok(void *unused)
{
struct num_context ctx = { .i = 0 };
bpf_loop(1, iter_limit_cb, &ctx, 0);
return choice_arr[ctx.i];
}
SEC("?raw_tp")
__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
int bpf_loop_iter_limit_overflow(void *unused)
{
struct num_context ctx = { .i = 0 };
bpf_loop(2, iter_limit_cb, &ctx, 0);
return choice_arr[ctx.i];
}
static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
{
ctx->i += 100;
return 0;
}
static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
{
ctx->i += 10;
return 0;
}
static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
{
ctx->i += 1;
bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
return 0;
}
/* Check that path visiting every callback function once had been
* reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
* with each decimal digit corresponding to a callback visit marker.
*/
SEC("socket")
__success __retval(111111)
int bpf_loop_iter_limit_nested(void *unused)
{
struct num_context ctx1 = { .i = 0 };
struct num_context ctx2 = { .i = 0 };
__u64 a, b, c;
bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
a = ctx1.i;
b = ctx2.i;
/* Force 'ctx1.i' and 'ctx2.i' precise. */
c = choice_arr[(a + b) % 2];
/* This makes 'c' zero, but neither clang nor verifier know it. */
c /= 10;
/* Make sure that verifier does not visit 'impossible' states:
* enumerate all possible callback visit masks.
*/
if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
asm volatile ("r0 /= 0;" ::: "r0");
return 1000 * a + b + c;
}
char _license[] SEC("license") = "GPL";
...@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void) ...@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
SEC("?raw_tp") SEC("?raw_tp")
__success __log_level(2) __success __log_level(2)
/* First simulated path does not include callback body,
* r1 and r4 are always precise for bpf_loop() calls.
*/
__msg("9: (85) call bpf_loop#181")
__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
__msg("mark_precise: frame0: parent state regs=r4 stack=:")
__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
__msg("mark_precise: frame0: parent state regs=r1 stack=:")
__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
/* r6 precision propagation */
__msg("14: (0f) r1 += r6") __msg("14: (0f) r1 += r6")
__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: last_idx 14 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
__msg("mark_precise: frame0: parent state regs=r0 stack=:") __msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
__msg("mark_precise: frame0: last_idx 18 first_idx 0") /* State entering callback body popped from states stack */
__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") __msg("from 9 to 17: frame1:")
__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
__msg("17: (b7) r0 = 0")
__msg("18: (95) exit")
__msg("returning from callee:")
__msg("to caller at 9:")
__msg("frame 0: propagating r1,r4")
__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
__msg("from 18 to 9: safe")
__naked int callback_result_precise(void) __naked int callback_result_precise(void)
{ {
asm volatile ( asm volatile (
...@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void) ...@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
SEC("?raw_tp") SEC("?raw_tp")
__success __log_level(2) __success __log_level(2)
/* First simulated path does not include callback body */
__msg("12: (0f) r1 += r6") __msg("12: (0f) r1 += r6")
__msg("mark_precise: frame0: last_idx 12 first_idx 10") __msg("mark_precise: frame0: last_idx 12 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs=r6 stack=:") __msg("mark_precise: frame0: parent state regs=r6 stack=:")
__msg("mark_precise: frame0: last_idx 16 first_idx 0") __msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
/* State entering callback body popped from states stack */
__msg("from 9 to 15: frame1:")
__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
__msg("15: (b7) r0 = 0")
__msg("16: (95) exit")
__msg("returning from callee:")
__msg("to caller at 9:")
/* r1, r4 are always precise for bpf_loop(),
* r6 was marked before backtracking to callback body.
*/
__msg("frame 0: propagating r1,r4,r6")
__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs= stack=:")
__msg("from 16 to 9: safe")
__naked int parent_callee_saved_reg_precise_with_callback(void) __naked int parent_callee_saved_reg_precise_with_callback(void)
{ {
asm volatile ( asm volatile (
...@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void) ...@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
SEC("?raw_tp") SEC("?raw_tp")
__success __log_level(2) __success __log_level(2)
/* First simulated path does not include callback body */
__msg("14: (0f) r1 += r6") __msg("14: (0f) r1 += r6")
__msg("mark_precise: frame0: last_idx 14 first_idx 11") __msg("mark_precise: frame0: last_idx 14 first_idx 10")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs= stack=-8:") __msg("mark_precise: frame0: parent state regs= stack=-8:")
__msg("mark_precise: frame0: last_idx 18 first_idx 0") __msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
/* State entering callback body popped from states stack */
__msg("from 10 to 17: frame1:")
__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
__msg("17: (b7) r0 = 0")
__msg("18: (95) exit")
__msg("returning from callee:")
__msg("to caller at 10:")
/* r1, r4 are always precise for bpf_loop(),
* fp-8 was marked before backtracking to callback body.
*/
__msg("frame 0: propagating r1,r4,fp-8")
__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
__msg("mark_precise: frame0: parent state regs= stack=:")
__msg("from 18 to 10: safe")
__naked int parent_stack_slot_precise_with_callback(void) __naked int parent_stack_slot_precise_with_callback(void)
{ {
asm volatile ( asm volatile (
......
...@@ -53,6 +53,8 @@ ...@@ -53,6 +53,8 @@
#define DEFAULT_TTL 64 #define DEFAULT_TTL 64
#define MAX_ALLOWED_PORTS 8 #define MAX_ALLOWED_PORTS 8
#define MAX_PACKET_OFF 0xffff
#define swap(a, b) \ #define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
...@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void) ...@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void)
} }
struct tcpopt_context { struct tcpopt_context {
__u8 *ptr; void *data;
__u8 *end;
void *data_end; void *data_end;
__be32 *tsecr; __be32 *tsecr;
__u8 wscale; __u8 wscale;
bool option_timestamp; bool option_timestamp;
bool option_sack; bool option_sack;
__u32 off;
}; };
static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
{ {
__u8 opcode, opsize; __u64 off = ctx->off;
__u8 *data;
if (ctx->ptr >= ctx->end) /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
return 1; if (off > MAX_PACKET_OFF - sz)
if (ctx->ptr >= ctx->data_end) return NULL;
return 1;
opcode = ctx->ptr[0]; data = ctx->data + off;
barrier_var(data);
if (data + sz >= ctx->data_end)
return NULL;
if (opcode == TCPOPT_EOL) ctx->off += sz;
return 1; return data;
if (opcode == TCPOPT_NOP) { }
++ctx->ptr;
return 0;
}
if (ctx->ptr + 1 >= ctx->end) static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
return 1; {
if (ctx->ptr + 1 >= ctx->data_end) __u8 *opcode, *opsize, *wscale, *tsecr;
__u32 off = ctx->off;
opcode = next(ctx, 1);
if (!opcode)
return 1; return 1;
opsize = ctx->ptr[1];
if (opsize < 2) if (*opcode == TCPOPT_EOL)
return 1; return 1;
if (*opcode == TCPOPT_NOP)
return 0;
if (ctx->ptr + opsize > ctx->end) opsize = next(ctx, 1);
if (!opsize || *opsize < 2)
return 1; return 1;
switch (opcode) { switch (*opcode) {
case TCPOPT_WINDOW: case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) wscale = next(ctx, 1);
ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; if (!wscale)
return 1;
if (*opsize == TCPOLEN_WINDOW)
ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
break; break;
case TCPOPT_TIMESTAMP: case TCPOPT_TIMESTAMP:
if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { tsecr = next(ctx, 4);
if (!tsecr)
return 1;
if (*opsize == TCPOLEN_TIMESTAMP) {
ctx->option_timestamp = true; ctx->option_timestamp = true;
/* Client's tsval becomes our tsecr. */ /* Client's tsval becomes our tsecr. */
*ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); *ctx->tsecr = get_unaligned((__be32 *)tsecr);
} }
break; break;
case TCPOPT_SACK_PERM: case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM) if (*opsize == TCPOLEN_SACK_PERM)
ctx->option_sack = true; ctx->option_sack = true;
break; break;
} }
ctx->ptr += opsize; ctx->off = off + *opsize;
return 0; return 0;
} }
...@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) ...@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
static __always_inline bool tscookie_init(struct tcphdr *tcp_header, static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
__u16 tcp_len, __be32 *tsval, __u16 tcp_len, __be32 *tsval,
__be32 *tsecr, void *data_end) __be32 *tsecr, void *data, void *data_end)
{ {
struct tcpopt_context loop_ctx = { struct tcpopt_context loop_ctx = {
.ptr = (__u8 *)(tcp_header + 1), .data = data,
.end = (__u8 *)tcp_header + tcp_len,
.data_end = data_end, .data_end = data_end,
.tsecr = tsecr, .tsecr = tsecr,
.wscale = TS_OPT_WSCALE_MASK, .wscale = TS_OPT_WSCALE_MASK,
.option_timestamp = false, .option_timestamp = false,
.option_sack = false, .option_sack = false,
/* Note: currently verifier would track .off as unbound scalar.
* In case if verifier would at some point get smarter and
* compute bounded value for this var, beware that it might
* hinder bpf_loop() convergence validation.
*/
.off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
}; };
u32 cookie; u32 cookie;
...@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, ...@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
cookie = (__u32)value; cookie = (__u32)value;
if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
&tsopt_buf[0], &tsopt_buf[1], data_end)) &tsopt_buf[0], &tsopt_buf[1], data, data_end))
tsopt = tsopt_buf; tsopt = tsopt_buf;
/* Check that there is enough space for a SYNACK. It also covers /* Check that there is enough space for a SYNACK. It also covers
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment