Commit e384c7b7 authored by Kui-Feng Lee's avatar Kui-Feng Lee Committed by Andrii Nakryiko

bpf, x86: Create bpf_tramp_run_ctx on the caller thread's stack

BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
stacks and set/reset the current bpf_run_ctx before/after calling a
bpf_prog.
Signed-off-by: default avatarKui-Feng Lee <kuifeng@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220510205923.3206889-3-kuifeng@fb.com
parent f7e0beaf
...@@ -1763,14 +1763,30 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, ...@@ -1763,14 +1763,30 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size, struct bpf_tramp_link *l, int stack_size,
bool save_ret) int run_ctx_off, bool save_ret)
{ {
u8 *prog = *pprog; u8 *prog = *pprog;
u8 *jmp_insn; u8 *jmp_insn;
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
struct bpf_prog *p = l->link.prog; struct bpf_prog *p = l->link.prog;
/* mov rdi, 0 */
emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
/* Prepare struct bpf_tramp_run_ctx.
*
* bpf_tramp_run_ctx is already preserved by
* arch_prepare_bpf_trampoline().
*
* mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi
*/
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
/* arg1: mov rdi, progs[i] */ /* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_call(&prog, if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_enter_sleepable : p->aux->sleepable ? __bpf_prog_enter_sleepable :
__bpf_prog_enter, prog)) __bpf_prog_enter, prog))
...@@ -1816,6 +1832,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, ...@@ -1816,6 +1832,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: mov rsi, rbx <- start time in nsec */ /* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_call(&prog, if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_exit_sleepable : p->aux->sleepable ? __bpf_prog_exit_sleepable :
__bpf_prog_exit, prog)) __bpf_prog_exit, prog))
...@@ -1853,14 +1871,14 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) ...@@ -1853,14 +1871,14 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size, struct bpf_tramp_links *tl, int stack_size,
bool save_ret) int run_ctx_off, bool save_ret)
{ {
int i; int i;
u8 *prog = *pprog; u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) { for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
save_ret)) run_ctx_off, save_ret))
return -EINVAL; return -EINVAL;
} }
*pprog = prog; *pprog = prog;
...@@ -1869,7 +1887,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, ...@@ -1869,7 +1887,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size, struct bpf_tramp_links *tl, int stack_size,
u8 **branches) int run_ctx_off, u8 **branches)
{ {
u8 *prog = *pprog; u8 *prog = *pprog;
int i; int i;
...@@ -1880,7 +1898,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, ...@@ -1880,7 +1898,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tl->nr_links; i++) { for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, true)) if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
return -EINVAL; return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit: /* mod_ret prog stored return value into [rbp - 8]. Emit:
...@@ -1986,7 +2004,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -1986,7 +2004,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
void *orig_call) void *orig_call)
{ {
int ret, i, nr_args = m->nr_args; int ret, i, nr_args = m->nr_args;
int regs_off, ip_off, args_off, stack_size = nr_args * 8; int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
...@@ -2016,6 +2034,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2016,6 +2034,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* RBP - args_off [ args count ] always * RBP - args_off [ args count ] always
* *
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
* RBP - run_ctx_off [ bpf_tramp_run_ctx ]
*/ */
/* room for return value of orig_call or fentry prog */ /* room for return value of orig_call or fentry prog */
...@@ -2034,6 +2054,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2034,6 +2054,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ip_off = stack_size; ip_off = stack_size;
stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
run_ctx_off = stack_size;
if (flags & BPF_TRAMP_F_SKIP_FRAME) { if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip patched call instruction and point orig_call to actual /* skip patched call instruction and point orig_call to actual
* body of the kernel function. * body of the kernel function.
...@@ -2081,7 +2104,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2081,7 +2104,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
} }
if (fentry->nr_links) if (fentry->nr_links)
if (invoke_bpf(m, &prog, fentry, regs_off, if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET)) flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL; return -EINVAL;
...@@ -2092,7 +2115,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2092,7 +2115,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM; return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
branches)) { run_ctx_off, branches)) {
ret = -EINVAL; ret = -EINVAL;
goto cleanup; goto cleanup;
} }
...@@ -2129,7 +2152,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ...@@ -2129,7 +2152,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
} }
if (fexit->nr_links) if (fexit->nr_links)
if (invoke_bpf(m, &prog, fexit, regs_off, false)) { if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
ret = -EINVAL; ret = -EINVAL;
goto cleanup; goto cleanup;
} }
......
...@@ -730,6 +730,8 @@ struct bpf_tramp_links { ...@@ -730,6 +730,8 @@ struct bpf_tramp_links {
int nr_links; int nr_links;
}; };
struct bpf_tramp_run_ctx;
/* Different use cases for BPF trampoline: /* Different use cases for BPF trampoline:
* 1. replace nop at the function entry (kprobe equivalent) * 1. replace nop at the function entry (kprobe equivalent)
* flags = BPF_TRAMP_F_RESTORE_REGS * flags = BPF_TRAMP_F_RESTORE_REGS
...@@ -756,10 +758,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i ...@@ -756,10 +758,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
struct bpf_tramp_links *tlinks, struct bpf_tramp_links *tlinks,
void *orig_call); void *orig_call);
/* these two functions are called from generated trampoline */ /* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(struct bpf_prog *prog); u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
...@@ -1351,6 +1354,12 @@ struct bpf_trace_run_ctx { ...@@ -1351,6 +1354,12 @@ struct bpf_trace_run_ctx {
u64 bpf_cookie; u64 bpf_cookie;
}; };
struct bpf_tramp_run_ctx {
struct bpf_run_ctx run_ctx;
u64 bpf_cookie;
struct bpf_run_ctx *saved_run_ctx;
};
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{ {
struct bpf_run_ctx *old_ctx = NULL; struct bpf_run_ctx *old_ctx = NULL;
......
...@@ -5020,6 +5020,7 @@ static bool syscall_prog_is_valid_access(int off, int size, ...@@ -5020,6 +5020,7 @@ static bool syscall_prog_is_valid_access(int off, int size,
BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
{ {
struct bpf_prog * __maybe_unused prog; struct bpf_prog * __maybe_unused prog;
struct bpf_tramp_run_ctx __maybe_unused run_ctx;
switch (cmd) { switch (cmd) {
case BPF_MAP_CREATE: case BPF_MAP_CREATE:
...@@ -5047,13 +5048,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) ...@@ -5047,13 +5048,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
return -EINVAL; return -EINVAL;
} }
if (!__bpf_prog_enter_sleepable(prog)) { run_ctx.bpf_cookie = 0;
run_ctx.saved_run_ctx = NULL;
if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
/* recursion detected */ /* recursion detected */
bpf_prog_put(prog); bpf_prog_put(prog);
return -EBUSY; return -EBUSY;
} }
attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
bpf_prog_put(prog); bpf_prog_put(prog);
return 0; return 0;
#endif #endif
......
...@@ -568,11 +568,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) ...@@ -568,11 +568,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
* [2..MAX_U64] - execute bpf prog and record execution time. * [2..MAX_U64] - execute bpf prog and record execution time.
* This is start time. * This is start time.
*/ */
u64 notrace __bpf_prog_enter(struct bpf_prog *prog) u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU) __acquires(RCU)
{ {
rcu_read_lock(); rcu_read_lock();
migrate_disable(); migrate_disable();
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog); inc_misses_counter(prog);
return 0; return 0;
...@@ -602,29 +605,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog, ...@@ -602,29 +605,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
} }
} }
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU) __releases(RCU)
{ {
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start); update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active)); __this_cpu_dec(*(prog->active));
migrate_enable(); migrate_enable();
rcu_read_unlock(); rcu_read_unlock();
} }
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
{ {
rcu_read_lock_trace(); rcu_read_lock_trace();
migrate_disable(); migrate_disable();
might_fault(); might_fault();
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog); inc_misses_counter(prog);
return 0; return 0;
} }
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
return bpf_prog_start_time(); return bpf_prog_start_time();
} }
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
{ {
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start); update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active)); __this_cpu_dec(*(prog->active));
migrate_enable(); migrate_enable();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment