Commit ae240823 authored by KP Singh's avatar KP Singh Committed by Alexei Starovoitov

bpf: Introduce BPF_MODIFY_RETURN

When multiple programs are attached, each program receives the return
value from the previous program on the stack and the last program
provides the return value to the attached function.

The fmod_ret bpf programs are run after the fentry programs and before
the fexit programs. The original function is only called if all the
fmod_ret programs return 0 to avoid any unintended side-effects. The
success value, i.e. 0 is not currently configurable but can be made so
where user-space can specify it at load time.

For example:

int func_to_be_attached(int a, int b)
{  <--- do_fentry

do_fmod_ret:
   <update ret by calling fmod_ret>
   if (ret != 0)
        goto do_fexit;

original_function:

    <side_effects_happen_here>

}  <--- do_fexit

The fmod_ret program attached to this function can be defined as:

SEC("fmod_ret/func_to_be_attached")
int BPF_PROG(func_name, int a, int b, int ret)
{
        // This will skip the original function logic.
        return 1;
}

The first fmod_ret program is passed 0 in its return argument.
Signed-off-by: default avatarKP Singh <kpsingh@google.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200304191853.1529-4-kpsingh@chromium.org
parent 7e639208
...@@ -1362,7 +1362,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, ...@@ -1362,7 +1362,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
} }
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_prog *p, int stack_size) struct bpf_prog *p, int stack_size, bool mod_ret)
{ {
u8 *prog = *pprog; u8 *prog = *pprog;
int cnt = 0; int cnt = 0;
...@@ -1383,6 +1383,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, ...@@ -1383,6 +1383,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (emit_call(&prog, p->bpf_func, prog)) if (emit_call(&prog, p->bpf_func, prog))
return -EINVAL; return -EINVAL;
/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
* of the previous call which is then passed on the stack to
* the next BPF program.
*/
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
/* arg1: mov rdi, progs[i] */ /* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
(u32) (long) p); (u32) (long) p);
...@@ -1442,6 +1449,23 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) ...@@ -1442,6 +1449,23 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
return 0; return 0;
} }
static int emit_mod_ret_check_imm8(u8 **pprog, int value)
{
u8 *prog = *pprog;
int cnt = 0;
if (!is_imm8(value))
return -EINVAL;
if (value == 0)
EMIT2(0x85, add_2reg(0xC0, BPF_REG_0, BPF_REG_0));
else
EMIT3(0x83, add_1reg(0xF8, BPF_REG_0), value);
*pprog = prog;
return 0;
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size) struct bpf_tramp_progs *tp, int stack_size)
{ {
...@@ -1449,9 +1473,49 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, ...@@ -1449,9 +1473,49 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog; u8 *prog = *pprog;
for (i = 0; i < tp->nr_progs; i++) { for (i = 0; i < tp->nr_progs; i++) {
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size)) if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
return -EINVAL;
}
*pprog = prog;
return 0;
}
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size,
u8 **branches)
{
u8 *prog = *pprog;
int i;
/* The first fmod_ret program will receive a garbage return value.
* Set this to 0 to avoid confusing the program.
*/
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tp->nr_progs; i++) {
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
return -EINVAL; return -EINVAL;
/* Generate a branch:
*
* if (ret != 0)
* goto do_fexit;
*
* If needed this can be extended to any integer value which can
* be passed by user-space when the program is loaded.
*/
if (emit_mod_ret_check_imm8(&prog, 0))
return -EINVAL;
/* Save the location of the branch and Generate 6 nops
* (4 bytes for an offset and 2 bytes for the jump) These nops
* are replaced with a conditional jump once do_fexit (i.e. the
* start of the fexit invocation) is finalized.
*/
branches[i] = prog;
emit_nops(&prog, 4 + 2);
} }
*pprog = prog; *pprog = prog;
return 0; return 0;
} }
...@@ -1521,10 +1585,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, ...@@ -1521,10 +1585,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
struct bpf_tramp_progs *tprogs, struct bpf_tramp_progs *tprogs,
void *orig_call) void *orig_call)
{ {
int cnt = 0, nr_args = m->nr_args; int ret, i, cnt = 0, nr_args = m->nr_args;
int stack_size = nr_args * 8; int stack_size = nr_args * 8;
struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
u8 **branches = NULL;
u8 *prog; u8 *prog;
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
...@@ -1557,24 +1623,60 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, ...@@ -1557,24 +1623,60 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
if (invoke_bpf(m, &prog, fentry, stack_size)) if (invoke_bpf(m, &prog, fentry, stack_size))
return -EINVAL; return -EINVAL;
if (fmod_ret->nr_progs) {
branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
branches)) {
ret = -EINVAL;
goto cleanup;
}
}
if (flags & BPF_TRAMP_F_CALL_ORIG) { if (flags & BPF_TRAMP_F_CALL_ORIG) {
if (fentry->nr_progs) if (fentry->nr_progs || fmod_ret->nr_progs)
restore_regs(m, &prog, nr_args, stack_size); restore_regs(m, &prog, nr_args, stack_size);
/* call original function */ /* call original function */
if (emit_call(&prog, orig_call, prog)) if (emit_call(&prog, orig_call, prog)) {
return -EINVAL; ret = -EINVAL;
goto cleanup;
}
/* remember return value in a stack for bpf prog to access */ /* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
} }
if (fmod_ret->nr_progs) {
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
* aligned.
*/
emit_align(&prog, 16);
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
for (i = 0; i < fmod_ret->nr_progs; i++)
emit_cond_near_jump(&branches[i], prog, branches[i],
X86_JNE);
}
if (fexit->nr_progs) if (fexit->nr_progs)
if (invoke_bpf(m, &prog, fexit, stack_size)) if (invoke_bpf(m, &prog, fexit, stack_size)) {
return -EINVAL; ret = -EINVAL;
goto cleanup;
}
if (flags & BPF_TRAMP_F_RESTORE_REGS) if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_regs(m, &prog, nr_args, stack_size); restore_regs(m, &prog, nr_args, stack_size);
/* This needs to be done regardless. If there were fmod_ret programs,
* the return value is only updated on the stack and still needs to be
* restored to R0.
*/
if (flags & BPF_TRAMP_F_CALL_ORIG) if (flags & BPF_TRAMP_F_CALL_ORIG)
/* restore original return value back into RAX */ /* restore original return value back into RAX */
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
...@@ -1586,9 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, ...@@ -1586,9 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
EMIT1(0xC3); /* ret */ EMIT1(0xC3); /* ret */
/* Make sure the trampoline generation logic doesn't overflow */ /* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
return -EFAULT; ret = -EFAULT;
return prog - (u8 *)image; goto cleanup;
}
ret = prog - (u8 *)image;
cleanup:
kfree(branches);
return ret;
} }
static int emit_fallback_jump(u8 **pprog) static int emit_fallback_jump(u8 **pprog)
......
...@@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); ...@@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
enum bpf_tramp_prog_type { enum bpf_tramp_prog_type {
BPF_TRAMP_FENTRY, BPF_TRAMP_FENTRY,
BPF_TRAMP_FEXIT, BPF_TRAMP_FEXIT,
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX, BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */ BPF_TRAMP_REPLACE, /* more than MAX */
}; };
......
...@@ -210,6 +210,7 @@ enum bpf_attach_type { ...@@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP, BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY, BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT, BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
......
...@@ -3710,7 +3710,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, ...@@ -3710,7 +3710,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
nr_args--; nr_args--;
} }
if (prog->expected_attach_type == BPF_TRACE_FEXIT && if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
arg == nr_args) { arg == nr_args) {
if (!t) if (!t)
/* Default prog with 5 args. 6th arg is retval. */ /* Default prog with 5 args. 6th arg is retval. */
......
...@@ -2324,6 +2324,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) ...@@ -2324,6 +2324,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
if (prog->expected_attach_type != BPF_TRACE_FENTRY && if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT && prog->expected_attach_type != BPF_TRACE_FEXIT &&
prog->expected_attach_type != BPF_MODIFY_RETURN &&
prog->type != BPF_PROG_TYPE_EXT) { prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL; err = -EINVAL;
goto out_put_prog; goto out_put_prog;
......
...@@ -232,7 +232,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) ...@@ -232,7 +232,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out; goto out;
} }
if (tprogs[BPF_TRAMP_FEXIT].nr_progs) if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
/* Though the second half of trampoline page is unused a task could be /* Though the second half of trampoline page is unused a task could be
...@@ -269,6 +270,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) ...@@ -269,6 +270,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
switch (t) { switch (t) {
case BPF_TRACE_FENTRY: case BPF_TRACE_FENTRY:
return BPF_TRAMP_FENTRY; return BPF_TRAMP_FENTRY;
case BPF_MODIFY_RETURN:
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT: case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT; return BPF_TRAMP_FEXIT;
default: default:
......
...@@ -9950,6 +9950,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ...@@ -9950,6 +9950,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!prog_extension) if (!prog_extension)
return -EINVAL; return -EINVAL;
/* fallthrough */ /* fallthrough */
case BPF_MODIFY_RETURN:
case BPF_TRACE_FENTRY: case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT: case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) { if (!btf_type_is_func(t)) {
......
...@@ -210,6 +210,7 @@ enum bpf_attach_type { ...@@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP, BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY, BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT, BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment