Commit c3d9b9f3 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-hw-offload'

Jakub Kicinski says:

====================
BPF hardware offload (cls_bpf for now)

Rebased and improved.

v7:
 - fix patch 4.
v6 (patch 8 only):
 - explicitly check for registers >= MAX_BPF_REG;
 - fix leaky error path.
v5:
 - fix names of guard defines in bpf_verfier.h.
v4:
 - rename parser -> analyzer;
 - reorganize the analyzer patches a bit;
 - use bitfield.h directly.

--- merge blurb:
In the last year a lot of progress have been made on offloading
simpler TC classifiers.  There is also growing interest in using
BPF for generic high-speed packet processing in the kernel.
It seems beneficial to tie those two trends together and think
about hardware offloads of BPF programs.  This patch set presents
such offload to Netronome smart NICs.  cls_bpf is extended with
hardware offload capabilities and NFP driver gets a JIT translator
which in presence of capable firmware can be used to offload
the BPF program onto the card.

BPF JIT implementation is not 100% complete (e.g. missing instructions)
but it is functional.  Encouragingly it should be possible to
offload most (if not all) advanced BPF features onto the NIC -
including packet modification, maps, tunnel encap/decap etc.

Example of basic tests I used:
  __section_cls_entry
  int cls_entry(struct __sk_buff *skb)
  {
	if (load_byte(skb, 0) != 0x0)
		return 0;

	if (load_byte(skb, 4) != 0x1)
		return 0;

	skb->mark = 0xcafe;

	if (load_byte(skb, 50) != 0xff)
		return 0;

	return ~0U;
  }

Above code can be compiled with Clang and loaded like this:

	ethtool -K p1p1 hw-tc-offload on
	tc qdisc add dev p1p1 ingress
	tc filter add dev p1p1 parent ffff:  bpf obj prog.o action drop

This set implements the basic transparent offload, the skip_{sw,hw}
flags and reporting statistics for cls_bpf.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2d7a8926 e3b8baf0
......@@ -3,6 +3,13 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o
nfp_netvf-objs := \
nfp_net_common.o \
nfp_net_ethtool.o \
nfp_net_offload.o \
nfp_netvf_main.o
ifeq ($(CONFIG_BPF_SYSCALL),y)
nfp_netvf-objs += \
nfp_bpf_verifier.o \
nfp_bpf_jit.o
endif
nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NFP_ASM_H__
#define __NFP_ASM_H__ 1
#include "nfp_bpf.h"
#define REG_NONE 0
#define RE_REG_NO_DST 0x020
#define RE_REG_IMM 0x020
#define RE_REG_IMM_encode(x) \
(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
#define RE_REG_IMM_MAX 0x07fULL
#define RE_REG_XFR 0x080
#define UR_REG_XFR 0x180
#define UR_REG_NN 0x280
#define UR_REG_NO_DST 0x300
#define UR_REG_IMM UR_REG_NO_DST
#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
#define UR_REG_IMM_MAX 0x0ffULL
#define OP_BR_BASE 0x0d800000020ULL
#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL
#define OP_BR_MASK 0x0000000001fULL
#define OP_BR_EV_PIP 0x00000000300ULL
#define OP_BR_CSS 0x0000003c000ULL
#define OP_BR_DEFBR 0x00000300000ULL
#define OP_BR_ADDR_LO 0x007ffc00000ULL
#define OP_BR_ADDR_HI 0x10000000000ULL
#define nfp_is_br(_insn) \
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
enum br_mask {
BR_BEQ = 0x00,
BR_BNE = 0x01,
BR_BHS = 0x04,
BR_BLO = 0x05,
BR_BGE = 0x08,
BR_UNC = 0x18,
};
enum br_ev_pip {
BR_EV_PIP_UNCOND = 0,
BR_EV_PIP_COND = 1,
};
enum br_ctx_signal_state {
BR_CSS_NONE = 2,
};
#define OP_BBYTE_BASE 0x0c800000000ULL
#define OP_BB_A_SRC 0x000000000ffULL
#define OP_BB_BYTE 0x00000000300ULL
#define OP_BB_B_SRC 0x0000003fc00ULL
#define OP_BB_I8 0x00000040000ULL
#define OP_BB_EQ 0x00000080000ULL
#define OP_BB_DEFBR 0x00000300000ULL
#define OP_BB_ADDR_LO 0x007ffc00000ULL
#define OP_BB_ADDR_HI 0x10000000000ULL
#define OP_BALU_BASE 0x0e800000000ULL
#define OP_BA_A_SRC 0x000000003ffULL
#define OP_BA_B_SRC 0x000000ffc00ULL
#define OP_BA_DEFBR 0x00000300000ULL
#define OP_BA_ADDR_HI 0x0007fc00000ULL
#define OP_IMMED_A_SRC 0x000000003ffULL
#define OP_IMMED_B_SRC 0x000000ffc00ULL
#define OP_IMMED_IMM 0x0000ff00000ULL
#define OP_IMMED_WIDTH 0x00060000000ULL
#define OP_IMMED_INV 0x00080000000ULL
#define OP_IMMED_SHIFT 0x00600000000ULL
#define OP_IMMED_BASE 0x0f000000000ULL
#define OP_IMMED_WR_AB 0x20000000000ULL
enum immed_width {
IMMED_WIDTH_ALL = 0,
IMMED_WIDTH_BYTE = 1,
IMMED_WIDTH_WORD = 2,
};
enum immed_shift {
IMMED_SHIFT_0B = 0,
IMMED_SHIFT_1B = 1,
IMMED_SHIFT_2B = 2,
};
#define OP_SHF_BASE 0x08000000000ULL
#define OP_SHF_A_SRC 0x000000000ffULL
#define OP_SHF_SC 0x00000000300ULL
#define OP_SHF_B_SRC 0x0000003fc00ULL
#define OP_SHF_I8 0x00000040000ULL
#define OP_SHF_SW 0x00000080000ULL
#define OP_SHF_DST 0x0000ff00000ULL
#define OP_SHF_SHIFT 0x001f0000000ULL
#define OP_SHF_OP 0x00e00000000ULL
#define OP_SHF_DST_AB 0x01000000000ULL
#define OP_SHF_WR_AB 0x20000000000ULL
enum shf_op {
SHF_OP_NONE = 0,
SHF_OP_AND = 2,
SHF_OP_OR = 5,
};
enum shf_sc {
SHF_SC_R_ROT = 0,
SHF_SC_R_SHF = 1,
SHF_SC_L_SHF = 2,
SHF_SC_R_DSHF = 3,
};
#define OP_ALU_A_SRC 0x000000003ffULL
#define OP_ALU_B_SRC 0x000000ffc00ULL
#define OP_ALU_DST 0x0003ff00000ULL
#define OP_ALU_SW 0x00040000000ULL
#define OP_ALU_OP 0x00f80000000ULL
#define OP_ALU_DST_AB 0x01000000000ULL
#define OP_ALU_BASE 0x0a000000000ULL
#define OP_ALU_WR_AB 0x20000000000ULL
enum alu_op {
ALU_OP_NONE = 0x00,
ALU_OP_ADD = 0x01,
ALU_OP_NEG = 0x04,
ALU_OP_AND = 0x08,
ALU_OP_SUB_C = 0x0d,
ALU_OP_ADD_C = 0x11,
ALU_OP_OR = 0x14,
ALU_OP_SUB = 0x15,
ALU_OP_XOR = 0x18,
};
enum alu_dst_ab {
ALU_DST_A = 0,
ALU_DST_B = 1,
};
#define OP_LDF_BASE 0x0c000000000ULL
#define OP_LDF_A_SRC 0x000000000ffULL
#define OP_LDF_SC 0x00000000300ULL
#define OP_LDF_B_SRC 0x0000003fc00ULL
#define OP_LDF_I8 0x00000040000ULL
#define OP_LDF_SW 0x00000080000ULL
#define OP_LDF_ZF 0x00000100000ULL
#define OP_LDF_BMASK 0x0000f000000ULL
#define OP_LDF_SHF 0x001f0000000ULL
#define OP_LDF_WR_AB 0x20000000000ULL
#define OP_CMD_A_SRC 0x000000000ffULL
#define OP_CMD_CTX 0x00000000300ULL
#define OP_CMD_B_SRC 0x0000003fc00ULL
#define OP_CMD_TOKEN 0x000000c0000ULL
#define OP_CMD_XFER 0x00001f00000ULL
#define OP_CMD_CNT 0x0000e000000ULL
#define OP_CMD_SIG 0x000f0000000ULL
#define OP_CMD_TGT_CMD 0x07f00000000ULL
#define OP_CMD_MODE 0x1c0000000000ULL
struct cmd_tgt_act {
u8 token;
u8 tgt_cmd;
};
enum cmd_tgt_map {
CMD_TGT_READ8,
CMD_TGT_WRITE8,
CMD_TGT_READ_LE,
CMD_TGT_READ_SWAP_LE,
__CMD_TGT_MAP_SIZE,
};
enum cmd_mode {
CMD_MODE_40b_AB = 0,
CMD_MODE_40b_BA = 1,
CMD_MODE_32b = 4,
};
enum cmd_ctx_swap {
CMD_CTX_SWAP = 0,
CMD_CTX_NO_SWAP = 3,
};
#define OP_LCSR_BASE 0x0fc00000000ULL
#define OP_LCSR_A_SRC 0x000000003ffULL
#define OP_LCSR_B_SRC 0x000000ffc00ULL
#define OP_LCSR_WRITE 0x00000200000ULL
#define OP_LCSR_ADDR 0x001ffc00000ULL
enum lcsr_wr_src {
LCSR_WR_AREG,
LCSR_WR_BREG,
LCSR_WR_IMM,
};
#define OP_CARB_BASE 0x0e000000000ULL
#define OP_CARB_OR 0x00000010000ULL
#endif
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NFP_BPF_H__
#define __NFP_BPF_H__ 1
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/list.h>
#include <linux/types.h>
#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
/* For branch fixup logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
*/
#define OP_BR_SPECIAL 0xff00000000000000ULL
enum br_special {
OP_BR_NORMAL = 0,
OP_BR_GO_OUT,
OP_BR_GO_ABORT,
};
enum static_regs {
STATIC_REG_PKT = 1,
#define REG_PKT_BANK ALU_DST_A
STATIC_REG_IMM = 2, /* Bank AB */
};
enum nfp_bpf_action_type {
NN_ACT_TC_DROP,
NN_ACT_TC_REDIR,
NN_ACT_DIRECT,
};
/* Software register representation, hardware encoding in asm.h */
#define NN_REG_TYPE GENMASK(31, 24)
#define NN_REG_VAL GENMASK(7, 0)
enum nfp_bpf_reg_type {
NN_REG_GPR_A = BIT(0),
NN_REG_GPR_B = BIT(1),
NN_REG_NNR = BIT(2),
NN_REG_XFER = BIT(3),
NN_REG_IMM = BIT(4),
NN_REG_NONE = BIT(5),
};
#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B)
#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT)
#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM)
#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM)
#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM)
#define NFP_BPF_ABI_FLAGS reg_nnr(0)
#define NFP_BPF_ABI_FLAG_MARK 1
#define NFP_BPF_ABI_MARK reg_nnr(1)
#define NFP_BPF_ABI_PKT reg_nnr(2)
#define NFP_BPF_ABI_LEN reg_nnr(3)
struct nfp_prog;
struct nfp_insn_meta;
typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
#define nfp_prog_first_meta(nfp_prog) \
list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
#define nfp_prog_last_meta(nfp_prog) \
list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
#define nfp_meta_next(meta) list_next_entry(meta, l)
#define nfp_meta_prev(meta) list_prev_entry(meta, l)
/**
* struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @skip: skip this instruction (optimized out)
* @double_cb: callback for second part of the instruction
* @l: link on nfp_prog->insns list
*/
struct nfp_insn_meta {
struct bpf_insn insn;
unsigned int off;
unsigned short n;
bool skip;
instr_cb_t double_cb;
struct list_head l;
};
#define BPF_SIZE_MASK 0x18
static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
{
return BPF_CLASS(meta->insn.code);
}
static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
{
return BPF_SRC(meta->insn.code);
}
static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
{
return BPF_OP(meta->insn.code);
}
static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
{
return BPF_MODE(meta->insn.code);
}
/**
* struct nfp_prog - nfp BPF program
* @prog: machine code
* @prog_len: number of valid instructions in @prog array
* @__prog_alloc_len: alloc size of @prog array
* @act: BPF program/action type (TC DA, TC with action, XDP etc.)
* @num_regs: number of registers used by this program
* @regs_per_thread: number of basic registers allocated per thread
* @start_off: address of the first instruction in the memory
* @tgt_out: jump target for normal exit
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
* @tgt_done: jump target to get the next packet
* @n_translated: number of successfully translated instructions (for errors)
* @error: error code if something went wrong
* @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
*/
struct nfp_prog {
u64 *prog;
unsigned int prog_len;
unsigned int __prog_alloc_len;
enum nfp_bpf_action_type act;
unsigned int num_regs;
unsigned int regs_per_thread;
unsigned int start_off;
unsigned int tgt_out;
unsigned int tgt_abort;
unsigned int tgt_done;
unsigned int n_translated;
int error;
struct list_head insns;
};
struct nfp_bpf_result {
unsigned int n_instr;
bool dense_mode;
};
#ifdef CONFIG_BPF_SYSCALL
int
nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
unsigned int prog_start, unsigned int prog_done,
unsigned int prog_sz, struct nfp_bpf_result *res);
#else
int
nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
unsigned int prog_start, unsigned int prog_done,
unsigned int prog_sz, struct nfp_bpf_result *res)
{
return -ENOTSUPP;
}
#endif
int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
#endif
This diff is collapsed.
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define pr_fmt(fmt) "NFP net bpf: " fmt
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/kernel.h>
#include <linux/pkt_cls.h>
#include "nfp_bpf.h"
/* Analyzer/verifier definitions */
struct nfp_bpf_analyzer_priv {
struct nfp_prog *prog;
struct nfp_insn_meta *meta;
};
static struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns)
{
unsigned int forward, backward, i;
backward = meta->n - insn_idx;
forward = insn_idx - meta->n;
if (min(forward, backward) > n_insns - insn_idx - 1) {
backward = n_insns - insn_idx - 1;
meta = nfp_prog_last_meta(nfp_prog);
}
if (min(forward, backward) > insn_idx && backward > insn_idx) {
forward = insn_idx;
meta = nfp_prog_first_meta(nfp_prog);
}
if (forward < backward)
for (i = 0; i < forward; i++)
meta = nfp_meta_next(meta);
else
for (i = 0; i < backward; i++)
meta = nfp_meta_prev(meta);
return meta;
}
static int
nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
const struct bpf_verifier_env *env)
{
const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
if (reg0->type != CONST_IMM) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
if (nfp_prog->act != NN_ACT_DIRECT &&
reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
reg0->imm != TC_ACT_QUEUED) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
return 0;
}
static int
nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
const struct bpf_verifier_env *env, u8 reg)
{
if (env->cur_state.regs[reg].type != PTR_TO_CTX)
return -EINVAL;
return 0;
}
static int
nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
{
struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
struct nfp_insn_meta *meta = priv->meta;
meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
priv->meta = meta;
if (meta->insn.src_reg == BPF_REG_10 ||
meta->insn.dst_reg == BPF_REG_10) {
pr_err("stack not yet supported\n");
return -EINVAL;
}
if (meta->insn.src_reg >= MAX_BPF_REG ||
meta->insn.dst_reg >= MAX_BPF_REG) {
pr_err("program uses extended registers - jit hardening?\n");
return -EINVAL;
}
if (meta->insn.code == (BPF_JMP | BPF_EXIT))
return nfp_bpf_check_exit(priv->prog, env);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
return nfp_bpf_check_ctx_ptr(priv->prog, env,
meta->insn.src_reg);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
return nfp_bpf_check_ctx_ptr(priv->prog, env,
meta->insn.dst_reg);
return 0;
}
static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
.insn_hook = nfp_verify_insn,
};
int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
{
struct nfp_bpf_analyzer_priv *priv;
int ret;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->prog = nfp_prog;
priv->meta = nfp_prog_first_meta(nfp_prog);
ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
kfree(priv);
return ret;
}
......@@ -62,6 +62,9 @@
/* Max time to wait for NFP to respond on updates (in seconds) */
#define NFP_NET_POLL_TIMEOUT 5
/* Interval for reading offloaded filter stats */
#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100)
/* Bar allocation */
#define NFP_NET_CTRL_BAR 0
#define NFP_NET_Q0_BAR 2
......@@ -220,7 +223,7 @@ struct nfp_net_tx_ring {
#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11))
#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10))
#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9))
#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8))
#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8))
#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7))
#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6))
#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5))
......@@ -266,6 +269,8 @@ struct nfp_net_rx_desc {
};
};
#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
struct nfp_net_rx_hash {
__be32 hash_type;
__be32 hash;
......@@ -405,6 +410,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
fw_ver->minor == minor;
}
struct nfp_stat_pair {
u64 pkts;
u64 bytes;
};
/**
* struct nfp_net - NFP network device structure
* @pdev: Backpointer to PCI device
......@@ -413,6 +423,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @is_vf: Is the driver attached to a VF?
* @is_nfp3200: Is the driver for a NFP-3200 card?
* @fw_loaded: Is the firmware loaded?
* @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf
* @ctrl: Local copy of the control register/word.
* @fl_bufsz: Currently configured size of the freelist buffers
* @rx_offset: Offset in the RX buffers where packet data starts
......@@ -427,6 +438,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @rss_cfg: RSS configuration
* @rss_key: RSS secret key
* @rss_itbl: RSS indirection table
* @rx_filter: Filter offload statistics - dropped packets/bytes
* @rx_filter_prev: Filter offload statistics - values from previous update
* @rx_filter_change: Jiffies when statistics last changed
* @rx_filter_stats_timer: Timer for polling filter offload statistics
* @rx_filter_lock: Lock protecting timer state changes (teardown)
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
* @num_tx_rings: Currently configured number of TX rings
......@@ -473,6 +489,7 @@ struct nfp_net {
unsigned is_vf:1;
unsigned is_nfp3200:1;
unsigned fw_loaded:1;
unsigned bpf_offload_skip_sw:1;
u32 ctrl;
u32 fl_bufsz;
......@@ -502,6 +519,11 @@ struct nfp_net {
u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
struct nfp_stat_pair rx_filter, rx_filter_prev;
unsigned long rx_filter_change;
struct timer_list rx_filter_stats_timer;
spinlock_t rx_filter_lock;
int max_tx_rings;
int max_rx_rings;
......@@ -561,12 +583,28 @@ struct nfp_net {
/* Functions to read/write from/to a BAR
* Performs any endian conversion necessary.
*/
static inline u16 nn_readb(struct nfp_net *nn, int off)
{
return readb(nn->ctrl_bar + off);
}
static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
{
writeb(val, nn->ctrl_bar + off);
}
/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
/* NFP-3200 can't handle 16-bit accesses too well */
static inline u16 nn_readw(struct nfp_net *nn, int off)
{
WARN_ON_ONCE(nn->is_nfp3200);
return readw(nn->ctrl_bar + off);
}
static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
{
WARN_ON_ONCE(nn->is_nfp3200);
writew(val, nn->ctrl_bar + off);
}
static inline u32 nn_readl(struct nfp_net *nn, int off)
{
......@@ -757,4 +795,9 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
}
#endif /* CONFIG_NFP_NET_DEBUG */
void nfp_net_filter_stats_timer(unsigned long data);
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf);
#endif /* _NFP_NET_H_ */
......@@ -60,6 +60,7 @@
#include <linux/ktime.h>
#include <net/pkt_cls.h>
#include <net/vxlan.h>
#include "nfp_net_ctrl.h"
......@@ -1292,38 +1293,72 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
}
}
/**
* nfp_net_set_hash() - Set SKB hash data
* @netdev: adapter's net_device structure
* @skb: SKB to set the hash data on
* @rxd: RX descriptor
*
* The RSS hash and hash-type are pre-pended to the packet data.
* Extract and decode it and set the skb fields.
*/
static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
struct nfp_net_rx_desc *rxd)
unsigned int type, __be32 *hash)
{
struct nfp_net_rx_hash *rx_hash;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
!(netdev->features & NETIF_F_RXHASH))
if (!(netdev->features & NETIF_F_RXHASH))
return;
rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
switch (be32_to_cpu(rx_hash->hash_type)) {
switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
break;
default:
skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
break;
}
}
static void
nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
struct nfp_net_rx_desc *rxd)
{
struct nfp_net_rx_hash *rx_hash;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
return;
rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
&rx_hash->hash);
}
static void *
nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
int meta_len)
{
u8 *data = skb->data - meta_len;
u32 meta_info;
meta_info = get_unaligned_be32(data);
data += 4;
while (meta_info) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_HASH:
meta_info >>= NFP_NET_META_FIELD_SIZE;
nfp_net_set_hash(netdev, skb,
meta_info & NFP_NET_META_FIELD_MASK,
(__be32 *)data);
data += 4;
break;
case NFP_NET_META_MARK:
skb->mark = get_unaligned_be32(data);
data += 4;
break;
default:
return NULL;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
return data;
}
/**
* nfp_net_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
......@@ -1438,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
skb_reserve(skb, nn->rx_offset);
skb_put(skb, data_len - meta_len);
nfp_net_set_hash(nn->netdev, skb, rxd);
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += skb->len;
u64_stats_update_end(&r_vec->rx_sync);
if (nn->fw_ver.major <= 3) {
nfp_net_set_hash_desc(nn->netdev, skb, rxd);
} else if (meta_len) {
void *end;
end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
if (unlikely(end != skb->data)) {
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
u64_stats_update_end(&r_vec->rx_sync);
dev_kfree_skb_any(skb);
nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
continue;
}
}
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, nn->netdev);
......@@ -2382,6 +2432,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
return stats;
}
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{
if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
return true;
return false;
}
static int
nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
struct tc_to_netdev *tc)
{
struct nfp_net *nn = netdev_priv(netdev);
if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
return -ENOTSUPP;
if (proto != htons(ETH_P_ALL))
return -ENOTSUPP;
if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
return -EINVAL;
}
static int nfp_net_set_features(struct net_device *netdev,
netdev_features_t features)
{
......@@ -2436,6 +2511,11 @@ static int nfp_net_set_features(struct net_device *netdev,
new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
}
if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
nn_err(nn, "Cannot disable HW TC offload while in use\n");
return -EBUSY;
}
nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
netdev->features, features, changed);
......@@ -2585,6 +2665,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
.ndo_stop = nfp_net_netdev_close,
.ndo_start_xmit = nfp_net_tx,
.ndo_get_stats64 = nfp_net_stat64,
.ndo_setup_tc = nfp_net_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
......@@ -2610,7 +2691,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
......@@ -2627,7 +2708,8 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "");
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
nfp_net_ebpf_capable(nn) ? "BPF " : "");
}
/**
......@@ -2670,10 +2752,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->rx_filter_lock);
spin_lock_init(&nn->link_status_lock);
setup_timer(&nn->reconfig_timer,
nfp_net_reconfig_timer, (unsigned long)nn);
setup_timer(&nn->rx_filter_stats_timer,
nfp_net_filter_stats_timer, (unsigned long)nn);
return nn;
}
......@@ -2795,6 +2880,9 @@ int nfp_net_netdev_init(struct net_device *netdev)
netdev->features = netdev->hw_features;
if (nfp_net_ebpf_capable(nn))
netdev->hw_features |= NETIF_F_HW_TC;
/* Advertise but disable TSO by default. */
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
......
......@@ -65,6 +65,13 @@
*/
#define NFP_NET_LSO_MAX_HDR_SZ 255
/**
* Prepend field types
*/
#define NFP_NET_META_FIELD_SIZE 4
#define NFP_NET_META_HASH 1 /* next field carries hash type */
#define NFP_NET_META_MARK 2
/**
* Hash type pre-pended when a RSS hash was computed
*/
......@@ -123,6 +130,7 @@
#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */
#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */
#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */
#define NFP_NET_CFG_UPDATE 0x0004
#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
......@@ -134,6 +142,7 @@
#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */
#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */
#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
#define NFP_NET_CFG_TXRS_ENABLE 0x0008
#define NFP_NET_CFG_RXRS_ENABLE 0x0010
......@@ -196,10 +205,37 @@
#define NFP_NET_CFG_VXLAN_SZ 0x0008
/**
* 64B reserved for future use (0x0080 - 0x00c0)
* NFP6000 - BPF section
* @NFP_NET_CFG_BPF_ABI: BPF ABI version
* @NFP_NET_CFG_BPF_CAP: BPF capabilities
* @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
* @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded
* @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit
* @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks
* @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks
* @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions
* @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/
#define NFP_NET_CFG_RESERVED 0x0080
#define NFP_NET_CFG_RESERVED_SZ 0x0040
#define NFP_NET_CFG_BPF_ABI 0x0080
#define NFP_NET_BPF_ABI 1
#define NFP_NET_CFG_BPF_CAP 0x0081
#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */
#define NFP_NET_CFG_BPF_MAX_LEN 0x0082
#define NFP_NET_CFG_BPF_START 0x0084
#define NFP_NET_CFG_BPF_DONE 0x0086
#define NFP_NET_CFG_BPF_STACK_SZ 0x0088
#define NFP_NET_CFG_BPF_INL_MTU 0x0089
#define NFP_NET_CFG_BPF_SIZE 0x008e
#define NFP_NET_CFG_BPF_ADDR 0x0090
#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */
#define NFP_NET_CFG_BPF_CFG_MASK 7ULL
#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK)
/**
* 40B reserved for future use (0x0098 - 0x00c0)
*/
#define NFP_NET_CFG_RESERVED 0x0098
#define NFP_NET_CFG_RESERVED_SZ 0x0028
/**
* RSS configuration (0x0100 - 0x01ac):
......@@ -303,6 +339,15 @@
#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90)
#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98)
#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0)
#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8)
#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0)
#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8)
#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0)
#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8)
/**
* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry
......
......@@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
/* see comments in outro functions in nfp_bpf_jit.c to find out
* how different BPF modes use app-specific counters
*/
{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
};
#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
......
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* nfp_net_offload.c
* Netronome network device driver: TC offload functions for PF and VF
*/
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/jiffies.h>
#include <linux/timer.h>
#include <linux/list.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
#include "nfp_bpf.h"
#include "nfp_net_ctrl.h"
#include "nfp_net.h"
void nfp_net_filter_stats_timer(unsigned long data)
{
struct nfp_net *nn = (void *)data;
struct nfp_stat_pair latest;
spin_lock_bh(&nn->rx_filter_lock);
if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
mod_timer(&nn->rx_filter_stats_timer,
jiffies + NFP_NET_STAT_POLL_IVL);
spin_unlock_bh(&nn->rx_filter_lock);
latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
if (latest.pkts != nn->rx_filter.pkts)
nn->rx_filter_change = jiffies;
nn->rx_filter = latest;
}
static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
{
nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
nn->rx_filter_prev = nn->rx_filter;
nn->rx_filter_change = jiffies;
}
static int
nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
{
struct tc_action *a;
LIST_HEAD(actions);
u64 bytes, pkts;
pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
bytes -= pkts * ETH_HLEN;
nn->rx_filter_prev = nn->rx_filter;
preempt_disable();
tcf_exts_to_list(cls_bpf->exts, &actions);
list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
preempt_enable();
return 0;
}
static int
nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
{
const struct tc_action *a;
LIST_HEAD(actions);
/* TC direct action */
if (cls_bpf->exts_integrated) {
if (tc_no_actions(cls_bpf->exts))
return NN_ACT_DIRECT;
return -ENOTSUPP;
}
/* TC legacy mode */
if (!tc_single_action(cls_bpf->exts))
return -ENOTSUPP;
tcf_exts_to_list(cls_bpf->exts, &actions);
list_for_each_entry(a, &actions, list) {
if (is_tcf_gact_shot(a))
return NN_ACT_TC_DROP;
if (is_tcf_mirred_redirect(a) &&
tcf_mirred_ifindex(a) == nn->netdev->ifindex)
return NN_ACT_TC_REDIR;
}
return -ENOTSUPP;
}
static int
nfp_net_bpf_offload_prepare(struct nfp_net *nn,
struct tc_cls_bpf_offload *cls_bpf,
struct nfp_bpf_result *res,
void **code, dma_addr_t *dma_addr, u16 max_instr)
{
unsigned int code_sz = max_instr * sizeof(u64);
enum nfp_bpf_action_type act;
u16 start_off, done_off;
unsigned int max_mtu;
int ret;
ret = nfp_net_bpf_get_act(nn, cls_bpf);
if (ret < 0)
return ret;
act = ret;
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
if (max_mtu < nn->netdev->mtu) {
nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
return -ENOTSUPP;
}
start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
*code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
GFP_KERNEL);
if (!*code)
return -ENOMEM;
ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
max_instr, res);
if (ret)
goto out;
return 0;
out:
dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
return ret;
}
static void
nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
void *code, dma_addr_t dma_addr,
unsigned int code_sz, unsigned int n_instr,
bool dense_mode)
{
u64 bpf_addr = dma_addr;
int err;
nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
if (dense_mode)
bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
/* Load up the JITed code */
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
if (err)
nn_err(nn, "FW command error while loading BPF: %d\n", err);
/* Enable passing packets through BPF function */
nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
if (err)
nn_err(nn, "FW command error while enabling BPF: %d\n", err);
dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
nfp_net_bpf_stats_reset(nn);
mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
}
static int nfp_net_bpf_stop(struct nfp_net *nn)
{
if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
return 0;
spin_lock_bh(&nn->rx_filter_lock);
nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
spin_unlock_bh(&nn->rx_filter_lock);
nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
del_timer_sync(&nn->rx_filter_stats_timer);
nn->bpf_offload_skip_sw = 0;
return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
}
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf)
{
struct nfp_bpf_result res;
dma_addr_t dma_addr;
u16 max_instr;
void *code;
int err;
max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
switch (cls_bpf->command) {
case TC_CLSBPF_REPLACE:
/* There is nothing stopping us from implementing seamless
* replace but the simple method of loading I adopted in
* the firmware does not handle atomic replace (i.e. we have to
* stop the BPF offload and re-enable it). Leaking-in a few
* frames which didn't have BPF applied in the hardware should
* be fine if software fallback is available, though.
*/
if (nn->bpf_offload_skip_sw)
return -EBUSY;
err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
&dma_addr, max_instr);
if (err)
return err;
nfp_net_bpf_stop(nn);
nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
dma_addr, max_instr * sizeof(u64),
res.n_instr, res.dense_mode);
return 0;
case TC_CLSBPF_ADD:
if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
return -EBUSY;
err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
&dma_addr, max_instr);
if (err)
return err;
nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
dma_addr, max_instr * sizeof(u64),
res.n_instr, res.dense_mode);
return 0;
case TC_CLSBPF_DESTROY:
return nfp_net_bpf_stop(nn);
case TC_CLSBPF_STATS:
return nfp_net_bpf_stats_update(nn, cls_bpf);
default:
return -ENOTSUPP;
}
}
......@@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
} else {
switch (fw_ver.major) {
case 1 ... 3:
case 1 ... 4:
if (is_nfp3200) {
stride = 2;
tx_bar_no = NFP_NET_Q0_BAR;
......
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef _LINUX_BPF_VERIFIER_H
#define _LINUX_BPF_VERIFIER_H 1
#include <linux/bpf.h> /* for enum bpf_reg_type */
#include <linux/filter.h> /* for MAX_BPF_STACK */
struct bpf_reg_state {
enum bpf_reg_type type;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
s64 imm;
/* valid when type == PTR_TO_PACKET* */
struct {
u32 id;
u16 off;
u16 range;
};
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
};
enum bpf_stack_slot_type {
STACK_INVALID, /* nothing was stored in this stack slot */
STACK_SPILL, /* register spilled into stack */
STACK_MISC /* BPF program wrote some data into this slot */
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
/* state of the program:
* type of all registers and stack info
*/
struct bpf_verifier_state {
struct bpf_reg_state regs[MAX_BPF_REG];
u8 stack_slot_type[MAX_BPF_STACK];
struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
};
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
};
struct bpf_insn_aux_data {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
struct bpf_verifier_env;
struct bpf_ext_analyzer_ops {
int (*insn_hook)(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx);
};
/* single container for all structs
* one verifier_env per bpf_check() call
*/
struct bpf_verifier_env {
struct bpf_prog *prog; /* eBPF program being verified */
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
struct bpf_verifier_state cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
void *analyzer_priv; /* pointer to external analyzer's private data */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
};
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
void *priv);
#endif /* _LINUX_BPF_VERIFIER_H */
......@@ -789,6 +789,7 @@ enum {
TC_SETUP_CLSU32,
TC_SETUP_CLSFLOWER,
TC_SETUP_MATCHALL,
TC_SETUP_CLSBPF,
};
struct tc_cls_u32_offload;
......@@ -800,6 +801,7 @@ struct tc_to_netdev {
struct tc_cls_u32_offload *cls_u32;
struct tc_cls_flower_offload *cls_flower;
struct tc_cls_matchall_offload *cls_mall;
struct tc_cls_bpf_offload *cls_bpf;
};
};
......
......@@ -486,4 +486,20 @@ struct tc_cls_matchall_offload {
unsigned long cookie;
};
enum tc_clsbpf_command {
TC_CLSBPF_ADD,
TC_CLSBPF_REPLACE,
TC_CLSBPF_DESTROY,
TC_CLSBPF_STATS,
};
struct tc_cls_bpf_offload {
enum tc_clsbpf_command command;
struct tcf_exts *exts;
struct bpf_prog *prog;
const char *name;
bool exts_integrated;
u32 gen_flags;
};
#endif
......@@ -396,6 +396,7 @@ enum {
TCA_BPF_FD,
TCA_BPF_NAME,
TCA_BPF_FLAGS,
TCA_BPF_FLAGS_GEN,
__TCA_BPF_MAX,
};
......
This diff is collapsed.
......@@ -204,6 +204,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
return retval;
}
static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
u64 lastuse)
{
tcf_lastuse_update(&a->tcfa_tm);
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
int ref)
{
......@@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = {
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
.stats_update = tcf_stats_update,
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
......
......@@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
#define CLS_BPF_NAME_LEN 256
#define CLS_BPF_SUPPORTED_GEN_FLAGS \
(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
struct cls_bpf_head {
struct list_head plist;
......@@ -39,6 +41,8 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
bool offloaded;
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
union {
......@@ -54,6 +58,7 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
[TCA_BPF_FLAGS] = { .type = NLA_U32 },
[TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
[TCA_BPF_NAME] = { .type = NLA_NUL_STRING,
.len = CLS_BPF_NAME_LEN },
......@@ -91,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
if (at_ingress) {
if (tc_skip_sw(prog->gen_flags)) {
filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
} else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_end(skb);
......@@ -138,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
return !prog->bpf_ops;
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
enum tc_clsbpf_command cmd)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_bpf_offload bpf_offload = {};
struct tc_to_netdev offload;
offload.type = TC_SETUP_CLSBPF;
offload.cls_bpf = &bpf_offload;
bpf_offload.command = cmd;
bpf_offload.exts = &prog->exts;
bpf_offload.prog = prog->filter;
bpf_offload.name = prog->bpf_name;
bpf_offload.exts_integrated = prog->exts_integrated;
bpf_offload.gen_flags = prog->gen_flags;
return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
tp->protocol, &offload);
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct cls_bpf_prog *obj = prog;
enum tc_clsbpf_command cmd;
bool skip_sw;
int ret;
skip_sw = tc_skip_sw(prog->gen_flags) ||
(oldprog && tc_skip_sw(oldprog->gen_flags));
if (oldprog && oldprog->offloaded) {
if (tc_should_offload(dev, tp, prog->gen_flags)) {
cmd = TC_CLSBPF_REPLACE;
} else if (!tc_skip_sw(prog->gen_flags)) {
obj = oldprog;
cmd = TC_CLSBPF_DESTROY;
} else {
return -EINVAL;
}
} else {
if (!tc_should_offload(dev, tp, prog->gen_flags))
return skip_sw ? -EINVAL : 0;
cmd = TC_CLSBPF_ADD;
}
ret = cls_bpf_offload_cmd(tp, obj, cmd);
if (ret)
return skip_sw ? ret : 0;
obj->offloaded = true;
if (oldprog)
oldprog->offloaded = false;
return 0;
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
int err;
if (!prog->offloaded)
return;
err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
if (err) {
pr_err("Stopping hardware offload failed: %d\n", err);
return;
}
prog->offloaded = false;
}
static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
if (!prog->offloaded)
return;
cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
}
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
......@@ -177,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
......@@ -193,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
return false;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
......@@ -302,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
{
bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
u32 gen_flags = 0;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
......@@ -326,8 +421,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
}
if (tb[TCA_BPF_FLAGS_GEN]) {
gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
!tc_flags_valid(gen_flags)) {
ret = -EINVAL;
goto errout;
}
}
prog->exts_integrated = have_exts;
prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
cls_bpf_prog_from_efd(tb, prog, tp);
......@@ -415,6 +519,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
cls_bpf_delete_prog(tp, prog);
return ret;
}
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
......@@ -476,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
tm->tcm_handle = prog->handle;
cls_bpf_offload_update_stats(tp, prog);
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
......@@ -498,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
goto nla_put_failure;
if (prog->gen_flags &&
nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
goto nla_put_failure;
nla_nest_end(skb, nest);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment