Commit 6c21a2a6 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-stack-tracker'

Alexei Starovoitov says:

====================
bpf: stack depth tracking

Introduce tracking of bpf program stack depth in the verifier and use that
info to reduce bpf program stack consumption in the interpreter and x64 JIT.
Other JITs can take advantage of it as well in the future.
Most of the programs consume very little stack, so it's good optimization
in general and it's the first step toward bpf to bpf function calls.

Also use internal opcode for bpf_tail_call() marking to make clear
that jmp|call|x opcode is not uapi and may be used for actual
indirect call opcode in the future.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d2e0ef49 2960ae48
......@@ -586,7 +586,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
/* tail call */
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
if (emit_bpf_tail_call(ctx))
return -EFAULT;
break;
......
......@@ -938,7 +938,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
/*
* Tail call
*/
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
ctx->seen |= SEEN_TAILCALL;
bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
break;
......
......@@ -991,7 +991,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
}
break;
}
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
/*
* Implicit input:
* B1: pointer to ctx
......
......@@ -1217,7 +1217,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}
/* tail call */
case BPF_JMP | BPF_CALL |BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
emit_tail_call(ctx);
break;
......
......@@ -19,9 +19,6 @@
*/
#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
#define MAX_BPF_STACK (512 /* from filter.h */ + \
32 /* space for rbx,r13,r14,r15 */ + \
8 /* space for skb_copy_bits */)
#define FUNC(name) \
.globl name; \
......@@ -66,7 +63,7 @@ FUNC(sk_load_byte_positive_offset)
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
lea -MAX_BPF_STACK + 32(%rbp), %rdx;\
lea 32(%rbp), %rdx;\
FRAME_BEGIN; \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
......@@ -83,14 +80,14 @@ FUNC(sk_load_byte_positive_offset)
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
mov - MAX_BPF_STACK + 32(%rbp),%eax
mov 32(%rbp),%eax
bswap %eax
ret
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
mov - MAX_BPF_STACK + 32(%rbp),%ax
mov 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret
......@@ -98,7 +95,7 @@ bpf_slow_path_half:
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl - MAX_BPF_STACK + 32(%rbp),%eax
movzbl 32(%rbp),%eax
ret
#define sk_negative_common(SIZE) \
......@@ -148,9 +145,10 @@ FUNC(sk_load_byte_negative_offset)
bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
mov - MAX_BPF_STACK(%rbp),%rbx
mov - MAX_BPF_STACK + 8(%rbp),%r13
mov - MAX_BPF_STACK + 16(%rbp),%r14
mov - MAX_BPF_STACK + 24(%rbp),%r15
mov (%rbp),%rbx
mov 8(%rbp),%r13
mov 16(%rbp),%r14
mov 24(%rbp),%r15
add $40, %rbp
leaveq
ret
......@@ -197,17 +197,16 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
#define STACKSIZE \
(MAX_BPF_STACK + \
32 /* space for rbx, r13, r14, r15 */ + \
#define AUX_STACK_SPACE \
(32 /* space for rbx, r13, r14, r15 */ + \
8 /* space for skb_copy_bits() buffer */)
#define PROLOGUE_SIZE 48
#define PROLOGUE_SIZE 37
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
static void emit_prologue(u8 **pprog)
static void emit_prologue(u8 **pprog, u32 stack_depth)
{
u8 *prog = *pprog;
int cnt = 0;
......@@ -215,13 +214,17 @@ static void emit_prologue(u8 **pprog)
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
/* sub rsp, STACKSIZE */
EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE);
/* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
EMIT3_off32(0x48, 0x81, 0xEC,
round_up(stack_depth, 8) + AUX_STACK_SPACE);
/* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
/* all classic BPF filters use R6(rbx) save it */
/* mov qword ptr [rbp-X],rbx */
EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE);
/* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0);
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
......@@ -231,12 +234,12 @@ static void emit_prologue(u8 **pprog)
* than synthetic ones. Therefore not worth adding complexity.
*/
/* mov qword ptr [rbp-X],r13 */
EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8);
/* mov qword ptr [rbp-X],r14 */
EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16);
/* mov qword ptr [rbp-X],r15 */
EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
/* mov qword ptr [rbp+8],r13 */
EMIT4(0x4C, 0x89, 0x6D, 8);
/* mov qword ptr [rbp+16],r14 */
EMIT4(0x4C, 0x89, 0x75, 16);
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);
/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
* we need to reset the counter to 0. It's done in two instructions,
......@@ -246,8 +249,8 @@ static void emit_prologue(u8 **pprog)
/* xor eax, eax */
EMIT2(0x31, 0xc0);
/* mov qword ptr [rbp-X], rax */
EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
/* mov qword ptr [rbp+32], rax */
EMIT4(0x48, 0x89, 0x45, 32);
BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
......@@ -289,13 +292,13 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 36
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
......@@ -361,7 +364,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;
emit_prologue(&prog);
emit_prologue(&prog, bpf_prog->aux->stack_depth);
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
......@@ -877,7 +880,7 @@ xadd: if (is_imm8(insn->off))
}
break;
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
emit_bpf_tail_call(&prog);
break;
......@@ -1036,15 +1039,17 @@ xadd: if (is_imm8(insn->off))
seen_exit = true;
/* update cleanup_addr */
ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp-X] */
EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE);
/* mov r13, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8);
/* mov r14, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16);
/* mov r15, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24);
/* mov rbx, qword ptr [rbp+0] */
EMIT4(0x48, 0x8B, 0x5D, 0);
/* mov r13, qword ptr [rbp+8] */
EMIT4(0x4C, 0x8B, 0x6D, 8);
/* mov r14, qword ptr [rbp+16] */
EMIT4(0x4C, 0x8B, 0x75, 16);
/* mov r15, qword ptr [rbp+24] */
EMIT4(0x4C, 0x8B, 0x7D, 24);
/* add rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
......
......@@ -171,6 +171,7 @@ struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
u32 max_ctx_offset;
u32 stack_depth;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
const struct bpf_verifier_ops *ops;
......
......@@ -57,6 +57,9 @@ struct bpf_prog_aux;
#define BPF_REG_AX MAX_BPF_REG
#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1)
/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0
/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
......
......@@ -763,10 +763,10 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
*
* Decode and execute eBPF instructions.
*/
static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
u64 *stack)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
u64 tmp;
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
......@@ -824,7 +824,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
......@@ -874,9 +874,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
#define CONT ({ insn++; goto select_insn; })
#define CONT_JMP ({ insn++; goto select_insn; })
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
ARG1 = (u64) (unsigned long) ctx;
select_insn:
goto *jumptable[insn->code];
......@@ -1219,7 +1216,39 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
}
STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
#define DEFINE_BPF_PROG_RUN(stack_size) \
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
u64 regs[MAX_BPF_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
return ___bpf_prog_run(regs, insn, stack); \
}
#define EVAL1(FN, X) FN(X)
#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
static unsigned int (*interpreters[])(const void *ctx,
const struct bpf_insn *insn) = {
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
......@@ -1268,7 +1297,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
fp->bpf_func = (void *) __bpf_prog_run;
fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32];
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
......
......@@ -926,6 +926,10 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
verbose("invalid stack off=%d size=%d\n", off, size);
return -EACCES;
}
if (env->prog->aux->stack_depth < -off)
env->prog->aux->stack_depth = -off;
if (t == BPF_WRITE) {
if (!env->allow_ptr_leaks &&
state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
......@@ -1032,6 +1036,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
return -EACCES;
}
if (env->prog->aux->stack_depth < -off)
env->prog->aux->stack_depth = -off;
if (meta && meta->raw_mode) {
meta->access_size = access_size;
meta->regno = regno;
......@@ -3167,7 +3174,8 @@ static int do_check(struct bpf_verifier_env *env)
insn_idx++;
}
verbose("processed %d insns\n", insn_processed);
verbose("processed %d insns, stack depth %d\n",
insn_processed, env->prog->aux->stack_depth);
return 0;
}
......@@ -3462,6 +3470,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
env->prog->aux->stack_depth = MAX_BPF_STACK;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
......@@ -3469,7 +3478,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* that doesn't support bpf_tail_call yet
*/
insn->imm = 0;
insn->code |= BPF_X;
insn->code = BPF_JMP | BPF_TAIL_CALL;
continue;
}
......
......@@ -84,6 +84,7 @@ struct bpf_test {
} test[MAX_SUBTESTS];
int (*fill_helper)(struct bpf_test *self);
__u8 frag_data[MAX_DATA];
int stack_depth; /* for eBPF only, since tests don't call verifier */
};
/* Large test cases need separate allocation and fill handler. */
......@@ -455,6 +456,7 @@ static int __bpf_fill_stxdw(struct bpf_test *self, int size)
self->u.ptr.insns = insn;
self->u.ptr.len = len;
self->stack_depth = 40;
return 0;
}
......@@ -2317,7 +2319,8 @@ static struct bpf_test tests[] = {
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6},
{ { 38, 256 } }
{ { 38, 256 } },
.stack_depth = 64,
},
/* BPF_ALU | BPF_MOV | BPF_X */
{
......@@ -4169,6 +4172,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xff } },
.stack_depth = 40,
},
{
"ST_MEM_B: Store/Load byte: max positive",
......@@ -4181,6 +4185,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x7f } },
.stack_depth = 40,
},
{
"STX_MEM_B: Store/Load byte: max negative",
......@@ -4194,6 +4199,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xff } },
.stack_depth = 40,
},
{
"ST_MEM_H: Store/Load half word: max negative",
......@@ -4206,6 +4212,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffff } },
.stack_depth = 40,
},
{
"ST_MEM_H: Store/Load half word: max positive",
......@@ -4218,6 +4225,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x7fff } },
.stack_depth = 40,
},
{
"STX_MEM_H: Store/Load half word: max negative",
......@@ -4231,6 +4239,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffff } },
.stack_depth = 40,
},
{
"ST_MEM_W: Store/Load word: max negative",
......@@ -4243,6 +4252,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffffffff } },
.stack_depth = 40,
},
{
"ST_MEM_W: Store/Load word: max positive",
......@@ -4255,6 +4265,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x7fffffff } },
.stack_depth = 40,
},
{
"STX_MEM_W: Store/Load word: max negative",
......@@ -4268,6 +4279,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffffffff } },
.stack_depth = 40,
},
{
"ST_MEM_DW: Store/Load double word: max negative",
......@@ -4280,6 +4292,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffffffff } },
.stack_depth = 40,
},
{
"ST_MEM_DW: Store/Load double word: max negative 2",
......@@ -4297,6 +4310,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x1 } },
.stack_depth = 40,
},
{
"ST_MEM_DW: Store/Load double word: max positive",
......@@ -4309,6 +4323,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x7fffffff } },
.stack_depth = 40,
},
{
"STX_MEM_DW: Store/Load double word: max negative",
......@@ -4322,6 +4337,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0xffffffff } },
.stack_depth = 40,
},
/* BPF_STX | BPF_XADD | BPF_W/DW */
{
......@@ -4336,6 +4352,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x22 } },
.stack_depth = 40,
},
{
"STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
......@@ -4351,6 +4368,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0 } },
.stack_depth = 40,
},
{
"STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
......@@ -4363,6 +4381,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x12 } },
.stack_depth = 40,
},
{
"STX_XADD_W: X + 1 + 1 + 1 + ...",
......@@ -4384,6 +4403,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x22 } },
.stack_depth = 40,
},
{
"STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
......@@ -4399,6 +4419,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0 } },
.stack_depth = 40,
},
{
"STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
......@@ -4411,6 +4432,7 @@ static struct bpf_test tests[] = {
INTERNAL,
{ },
{ { 0, 0x12 } },
.stack_depth = 40,
},
{
"STX_XADD_DW: X + 1 + 1 + 1 + ...",
......@@ -5809,6 +5831,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
/* Type doesn't really matter here as long as it's not unspec. */
fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
fp->aux->stack_depth = tests[which].stack_depth;
/* We cannot error here as we don't need type compatibility
* checks.
......
......@@ -352,7 +352,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* bpf_convert_filter - convert filter program
* @prog: the user passed filter program
* @len: the length of the user passed filter program
* @new_prog: buffer where converted program will be stored
* @new_prog: allocated 'struct bpf_prog' or NULL
* @new_len: pointer to store length of converted program
*
* Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
......@@ -364,14 +364,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*
* 2) 2nd pass to remap in two passes: 1st pass finds new
* jump offsets, 2nd pass remapping:
* new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
* bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
*/
static int bpf_convert_filter(struct sock_filter *prog, int len,
struct bpf_insn *new_prog, int *new_len)
struct bpf_prog *new_prog, int *new_len)
{
int new_flen = 0, pass = 0, target, i;
struct bpf_insn *new_insn;
int new_flen = 0, pass = 0, target, i, stack_off;
struct bpf_insn *new_insn, *first_insn = NULL;
struct sock_filter *fp;
int *addrs = NULL;
u8 bpf_src;
......@@ -383,6 +382,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
return -EINVAL;
if (new_prog) {
first_insn = new_prog->insnsi;
addrs = kcalloc(len, sizeof(*addrs),
GFP_KERNEL | __GFP_NOWARN);
if (!addrs)
......@@ -390,11 +390,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
}
do_pass:
new_insn = new_prog;
new_insn = first_insn;
fp = prog;
/* Classic BPF related prologue emission. */
if (new_insn) {
if (new_prog) {
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
......@@ -415,7 +415,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
struct bpf_insn *insn = tmp_insns;
if (addrs)
addrs[i] = new_insn - new_prog;
addrs[i] = new_insn - first_insn;
switch (fp->code) {
/* All arithmetic insns and skb loads map as-is. */
......@@ -561,17 +561,25 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
/* Store to stack. */
case BPF_ST:
case BPF_STX:
stack_off = fp->k * 4 + 4;
*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
BPF_ST ? BPF_REG_A : BPF_REG_X,
-(BPF_MEMWORDS - fp->k) * 4);
-stack_off);
/* check_load_and_stores() verifies that classic BPF can
* load from stack only after write, so tracking
* stack_depth for ST|STX insns is enough
*/
if (new_prog && new_prog->aux->stack_depth < stack_off)
new_prog->aux->stack_depth = stack_off;
break;
/* Load from stack. */
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
stack_off = fp->k * 4 + 4;
*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
BPF_REG_A : BPF_REG_X, BPF_REG_FP,
-(BPF_MEMWORDS - fp->k) * 4);
-stack_off);
break;
/* A = K or X = K */
......@@ -619,13 +627,13 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
if (!new_prog) {
/* Only calculating new length. */
*new_len = new_insn - new_prog;
*new_len = new_insn - first_insn;
return 0;
}
pass++;
if (new_flen != new_insn - new_prog) {
new_flen = new_insn - new_prog;
if (new_flen != new_insn - first_insn) {
new_flen = new_insn - first_insn;
if (pass > 2)
goto err;
goto do_pass;
......@@ -1017,7 +1025,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
if (err)
/* 2nd bpf_convert_filter() can fail only if it fails
* to allocate memory, remapping must succeed. Note,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment