Commit 156d0e29 authored by Naveen N. Rao's avatar Naveen N. Rao Committed by Michael Ellerman

powerpc/ebpf/jit: Implement JIT compiler for extended BPF

PPC64 eBPF JIT compiler.

Enable with:
  echo 1 > /proc/sys/net/core/bpf_jit_enable
or
  echo 2 > /proc/sys/net/core/bpf_jit_enable

... to see the generated JIT code. This can further be processed with
tools/net/bpf_jit_disasm.

With CONFIG_TEST_BPF=m and 'modprobe test_bpf':

 test_bpf: Summary: 305 PASSED, 0 FAILED, [297/297 JIT'ed]

... on both ppc64 BE and LE.

The details of the approach are documented through various comments in
the code.
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 6ac0ba5a
......@@ -128,7 +128,8 @@ config PPC
select IRQ_FORCED_THREADING
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_CBPF_JIT
select HAVE_CBPF_JIT if !PPC64
select HAVE_EBPF_JIT if PPC64
select HAVE_ARCH_JUMP_LABEL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_HAS_GCOV_PROFILE_ALL
......
......@@ -36,11 +36,13 @@
#define PPC_MIN_STKFRM 112
#ifdef __BIG_ENDIAN__
#define LHZX_BE stringify_in_c(lhzx)
#define LWZX_BE stringify_in_c(lwzx)
#define LDX_BE stringify_in_c(ldx)
#define STWX_BE stringify_in_c(stwx)
#define STDX_BE stringify_in_c(stdx)
#else
#define LHZX_BE stringify_in_c(lhbrx)
#define LWZX_BE stringify_in_c(lwbrx)
#define LDX_BE stringify_in_c(ldbrx)
#define STWX_BE stringify_in_c(stwbrx)
......
......@@ -142,9 +142,11 @@
#define PPC_INST_ISEL 0x7c00001e
#define PPC_INST_ISEL_MASK 0xfc00003e
#define PPC_INST_LDARX 0x7c0000a8
#define PPC_INST_STDCX 0x7c0001ad
#define PPC_INST_LSWI 0x7c0004aa
#define PPC_INST_LSWX 0x7c00042a
#define PPC_INST_LWARX 0x7c000028
#define PPC_INST_STWCX 0x7c00012d
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
......@@ -211,8 +213,11 @@
#define PPC_INST_LBZ 0x88000000
#define PPC_INST_LD 0xe8000000
#define PPC_INST_LHZ 0xa0000000
#define PPC_INST_LHBRX 0x7c00062c
#define PPC_INST_LWZ 0x80000000
#define PPC_INST_LHBRX 0x7c00062c
#define PPC_INST_LDBRX 0x7c000428
#define PPC_INST_STB 0x98000000
#define PPC_INST_STH 0xb0000000
#define PPC_INST_STD 0xf8000000
#define PPC_INST_STDU 0xf8000001
#define PPC_INST_STW 0x90000000
......@@ -221,22 +226,34 @@
#define PPC_INST_MTLR 0x7c0803a6
#define PPC_INST_CMPWI 0x2c000000
#define PPC_INST_CMPDI 0x2c200000
#define PPC_INST_CMPW 0x7c000000
#define PPC_INST_CMPD 0x7c200000
#define PPC_INST_CMPLW 0x7c000040
#define PPC_INST_CMPLD 0x7c200040
#define PPC_INST_CMPLWI 0x28000000
#define PPC_INST_CMPLDI 0x28200000
#define PPC_INST_ADDI 0x38000000
#define PPC_INST_ADDIS 0x3c000000
#define PPC_INST_ADD 0x7c000214
#define PPC_INST_SUB 0x7c000050
#define PPC_INST_BLR 0x4e800020
#define PPC_INST_BLRL 0x4e800021
#define PPC_INST_MULLD 0x7c0001d2
#define PPC_INST_MULLW 0x7c0001d6
#define PPC_INST_MULHWU 0x7c000016
#define PPC_INST_MULLI 0x1c000000
#define PPC_INST_DIVWU 0x7c000396
#define PPC_INST_DIVD 0x7c0003d2
#define PPC_INST_RLWINM 0x54000000
#define PPC_INST_RLWIMI 0x50000000
#define PPC_INST_RLDICL 0x78000000
#define PPC_INST_RLDICR 0x78000004
#define PPC_INST_SLW 0x7c000030
#define PPC_INST_SLD 0x7c000036
#define PPC_INST_SRW 0x7c000430
#define PPC_INST_SRD 0x7c000436
#define PPC_INST_SRAD 0x7c000634
#define PPC_INST_SRADI 0x7c000674
#define PPC_INST_AND 0x7c000038
#define PPC_INST_ANDDOT 0x7c000039
#define PPC_INST_OR 0x7c000378
......@@ -247,6 +264,7 @@
#define PPC_INST_XORI 0x68000000
#define PPC_INST_XORIS 0x6c000000
#define PPC_INST_NEG 0x7c0000d0
#define PPC_INST_EXTSW 0x7c0007b4
#define PPC_INST_BRANCH 0x48000000
#define PPC_INST_BRANCH_COND 0x40800000
#define PPC_INST_LBZCIX 0x7c0006aa
......
#
# Arch-specific network modules
#
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
else
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
endif
......@@ -2,6 +2,7 @@
* bpf_jit.h: BPF JIT compiler for PPC
*
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
* 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -13,7 +14,9 @@
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC64
#include <asm/types.h>
#ifdef PPC64_ELF_ABI_v1
#define FUNCTION_DESCR_SIZE 24
#else
#define FUNCTION_DESCR_SIZE 0
......@@ -52,6 +55,10 @@
___PPC_RA(base) | IMM_L(i))
#define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
......@@ -63,6 +70,19 @@
___PPC_RA(base) | IMM_L(i))
#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \
___PPC_RA(base) | ___PPC_RB(b))
#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \
___PPC_RA(base) | ___PPC_RB(b))
#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \
___PPC_RA(a) | ___PPC_RB(b) | \
__PPC_EH(eh))
#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \
___PPC_RA(a) | ___PPC_RB(b) | \
__PPC_EH(eh))
#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \
___PPC_RA(a) | ___PPC_RB(b))
#ifdef CONFIG_PPC64
#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
......@@ -76,14 +96,23 @@
#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \
___PPC_RB(b))
#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \
___PPC_RB(b))
#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \
___PPC_RB(b))
#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \
___PPC_RB(b))
#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \
___PPC_RB(a) | ___PPC_RA(b))
#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \
......@@ -92,6 +121,8 @@
___PPC_RA(a) | IMM_L(i))
#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(b))
#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \
......@@ -100,6 +131,7 @@
___PPC_RS(a) | ___PPC_RB(b))
#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(b))
#define PPC_MR(d, a) PPC_OR(d, a, a)
#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \
___PPC_RS(a) | IMM_L(i))
#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \
......@@ -110,13 +142,30 @@
___PPC_RS(a) | IMM_L(i))
#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \
___PPC_RS(a) | IMM_L(i))
#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \
___PPC_RS(a))
#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
(((i) & 0x20) >> 4))
#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))
#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))
#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB64(mb) | (((i) & 0x20) >> 4))
#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_ME64(me) | (((i) & 0x20) >> 4))
......@@ -127,6 +176,8 @@
#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31)
/* sldi = rldicr Rx, Ry, n, 63-n */
#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i))
/* sldi = rldicl Rx, Ry, 64-n, n */
#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i)
#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))
......
/*
* bpf_jit64.h: BPF JIT compiler for PPC64
*
* Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
* IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#ifndef _BPF_JIT64_H
#define _BPF_JIT64_H
#include "bpf_jit.h"
/*
* Stack layout:
*
* [ prev sp ] <-------------
* [ nv gpr save area ] 8*8 |
* fp (r31) --> [ ebpf stack space ] 512 |
* [ local/tmp var space ] 16 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 16
/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
#define BPF_PPC_STACK_SAVE (8*8)
/* Ensure this is quadword aligned */
#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \
MAX_BPF_STACK + BPF_PPC_STACK_SAVE)
#ifndef __ASSEMBLY__
/* BPF register usage */
#define SKB_HLEN_REG (MAX_BPF_REG + 0)
#define SKB_DATA_REG (MAX_BPF_REG + 1)
#define TMP_REG_1 (MAX_BPF_REG + 2)
#define TMP_REG_2 (MAX_BPF_REG + 3)
/* BPF to ppc register mappings */
static const int b2p[] = {
/* function return value */
[BPF_REG_0] = 8,
/* function arguments */
[BPF_REG_1] = 3,
[BPF_REG_2] = 4,
[BPF_REG_3] = 5,
[BPF_REG_4] = 6,
[BPF_REG_5] = 7,
/* non volatile registers */
[BPF_REG_6] = 27,
[BPF_REG_7] = 28,
[BPF_REG_8] = 29,
[BPF_REG_9] = 30,
/* frame pointer aka BPF_REG_10 */
[BPF_REG_FP] = 31,
/* eBPF jit internal registers */
[SKB_HLEN_REG] = 25,
[SKB_DATA_REG] = 26,
[TMP_REG_1] = 9,
[TMP_REG_2] = 10
};
/* Assembly helpers */
#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \
u64 func##_negative_offset(u64 r3, u64 r4); \
u64 func##_positive_offset(u64 r3, u64 r4);
DECLARE_LOAD_FUNC(sk_load_word);
DECLARE_LOAD_FUNC(sk_load_half);
DECLARE_LOAD_FUNC(sk_load_byte);
#define CHOOSE_LOAD_FUNC(imm, func) \
(imm < 0 ? \
(imm >= SKF_LL_OFF ? func##_negative_offset : func) : \
func##_positive_offset)
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
#define SEEN_SKB 0x4000 /* uses sk_buff */
struct codegen_context {
/*
* This is used to track register usage as well
* as calls to external helpers.
* - register usage is tracked with corresponding
* bits (r3-r10 and r25-r31)
* - rest of the bits can be used to track other
* things -- for now, we use bits 16 to 23
* encoded in SEEN_* macros above
*/
unsigned int seen;
unsigned int idx;
};
#endif /* !__ASSEMBLY__ */
#endif
/*
* bpf_jit_asm64.S: Packet/header access helper functions
* for PPC64 BPF compiler.
*
* Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
* IBM Corporation
*
* Based on bpf_jit_asm.S by Matt Evans
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <asm/ppc_asm.h>
#include <asm/ptrace.h>
#include "bpf_jit64.h"
/*
* All of these routines are called directly from generated code,
* with the below register usage:
* r27 skb pointer (ctx)
* r25 skb header length
* r26 skb->data pointer
* r4 offset
*
* Result is passed back in:
* r8 data read in host endian format (accumulator)
*
* r9 is used as a temporary register
*/
#define r_skb r27
#define r_hlen r25
#define r_data r26
#define r_off r4
#define r_val r8
#define r_tmp r9
_GLOBAL_TOC(sk_load_word)
cmpdi r_off, 0
blt bpf_slow_path_word_neg
b sk_load_word_positive_offset
_GLOBAL_TOC(sk_load_word_positive_offset)
/* Are we accessing past headlen? */
subi r_tmp, r_hlen, 4
cmpd r_tmp, r_off
blt bpf_slow_path_word
/* Nope, just hitting the header. cr0 here is eq or gt! */
LWZX_BE r_val, r_data, r_off
blr /* Return success, cr0 != LT */
_GLOBAL_TOC(sk_load_half)
cmpdi r_off, 0
blt bpf_slow_path_half_neg
b sk_load_half_positive_offset
_GLOBAL_TOC(sk_load_half_positive_offset)
subi r_tmp, r_hlen, 2
cmpd r_tmp, r_off
blt bpf_slow_path_half
LHZX_BE r_val, r_data, r_off
blr
_GLOBAL_TOC(sk_load_byte)
cmpdi r_off, 0
blt bpf_slow_path_byte_neg
b sk_load_byte_positive_offset
_GLOBAL_TOC(sk_load_byte_positive_offset)
cmpd r_hlen, r_off
ble bpf_slow_path_byte
lbzx r_val, r_data, r_off
blr
/*
* Call out to skb_copy_bits:
* Allocate a new stack frame here to remain ABI-compliant in
* stashing LR.
*/
#define bpf_slow_path_common(SIZE) \
mflr r0; \
std r0, PPC_LR_STKOFF(r1); \
stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
mr r3, r_skb; \
/* r4 = r_off as passed */ \
addi r5, r1, STACK_FRAME_MIN_SIZE; \
li r6, SIZE; \
bl skb_copy_bits; \
nop; \
/* save r5 */ \
addi r5, r1, STACK_FRAME_MIN_SIZE; \
/* r3 = 0 on success */ \
addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \
ld r0, PPC_LR_STKOFF(r1); \
mtlr r0; \
cmpdi r3, 0; \
blt bpf_error; /* cr0 = LT */
bpf_slow_path_word:
bpf_slow_path_common(4)
/* Data value is on stack, and cr0 != LT */
LWZX_BE r_val, 0, r5
blr
bpf_slow_path_half:
bpf_slow_path_common(2)
LHZX_BE r_val, 0, r5
blr
bpf_slow_path_byte:
bpf_slow_path_common(1)
lbzx r_val, 0, r5
blr
/*
* Call out to bpf_internal_load_pointer_neg_helper
*/
#define sk_negative_common(SIZE) \
mflr r0; \
std r0, PPC_LR_STKOFF(r1); \
stdu r1, -STACK_FRAME_MIN_SIZE(r1); \
mr r3, r_skb; \
/* r4 = r_off, as passed */ \
li r5, SIZE; \
bl bpf_internal_load_pointer_neg_helper; \
nop; \
addi r1, r1, STACK_FRAME_MIN_SIZE; \
ld r0, PPC_LR_STKOFF(r1); \
mtlr r0; \
/* R3 != 0 on success */ \
cmpldi r3, 0; \
beq bpf_error_slow; /* cr0 = EQ */
bpf_slow_path_word_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_word_negative_offset
_GLOBAL_TOC(sk_load_word_negative_offset)
sk_negative_common(4)
LWZX_BE r_val, 0, r3
blr
bpf_slow_path_half_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_half_negative_offset
_GLOBAL_TOC(sk_load_half_negative_offset)
sk_negative_common(2)
LHZX_BE r_val, 0, r3
blr
bpf_slow_path_byte_neg:
lis r_tmp, -32 /* SKF_LL_OFF */
cmpd r_off, r_tmp /* addr < SKF_* */
blt bpf_error /* cr0 = LT */
b sk_load_byte_negative_offset
_GLOBAL_TOC(sk_load_byte_negative_offset)
sk_negative_common(1)
lbzx r_val, 0, r3
blr
bpf_error_slow:
/* fabricate a cr0 = lt */
li r_tmp, -1
cmpdi r_tmp, 0
bpf_error:
/*
* Entered with cr0 = lt
* Generated code will 'blt epilogue', returning 0.
*/
li r_val, 0
blr
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment