Commit 4afe60a9 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-11-26

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Extend BTF to support function call types and improve the BPF
   symbol handling with this info for kallsyms and bpftool program
   dump to make debugging easier, from Martin and Yonghong.

2) Optimize LPM lookups by making longest_prefix_match() handle
   multiple bytes at a time, from Eric.

3) Adds support for loading and attaching flow dissector BPF progs
   from bpftool, from Stanislav.

4) Extend the sk_lookup() helper to be supported from XDP, from Nitin.

5) Enable verifier to support narrow context loads with offset > 0
   to adapt to LLVM code generation (currently only offset of 0 was
   supported). Add test cases as well, from Andrey.

6) Simplify passing device functions for offloaded BPF progs by
   adding callbacks to bpf_prog_offload_ops instead of ndo_bpf.
   Also convert nfp and netdevsim to make use of them, from Quentin.

7) Add support for sock_ops based BPF programs to send events to
   the perf ring-buffer through perf_event_output helper, from
   Sowmini and Daniel.

8) Add read / write support for skb->tstamp from tc BPF and cg BPF
   programs to allow for supporting rate-limiting in EDT qdiscs
   like fq from BPF side, from Vlad.

9) Extend libbpf API to support map in map types and add test cases
   for it as well to BPF kselftests, from Nikita.

10) Account the maximum packet offset accessed by a BPF program in
    the verifier and use it for optimizing nfp JIT, from Jiong.

11) Fix error handling regarding kprobe_events in BPF sample loader,
    from Daniel T.

12) Add support for queue and stack map type in bpftool, from David.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4bffc669 ffac28f9
......@@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app)
app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
}
bpf->bpf_dev = bpf_offload_dev_create();
bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops);
err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
if (err)
goto err_free_neutral_maps;
......
......@@ -509,7 +509,11 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
int nfp_bpf_jit(struct nfp_prog *prog);
bool nfp_bpf_supported_opcode(u8 code);
extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
int prev_insn_idx);
int nfp_bpf_finalize(struct bpf_verifier_env *env);
extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops;
struct netdev_bpf;
struct nfp_app;
......
......@@ -33,9 +33,6 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
struct nfp_bpf_neutral_map *record;
int err;
/* Map record paths are entered via ndo, update side is protected. */
ASSERT_RTNL();
/* Reuse path - other offloaded program is already tracking this map. */
record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
nfp_bpf_maps_neutral_params);
......@@ -84,8 +81,6 @@ nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
bool freed = false;
int i;
ASSERT_RTNL();
for (i = 0; i < nfp_prog->map_records_cnt; i++) {
if (--nfp_prog->map_records[i]->count) {
nfp_prog->map_records[i] = NULL;
......@@ -187,11 +182,10 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
kfree(nfp_prog);
}
static int
nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf)
static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
{
struct bpf_prog *prog = bpf->verifier.prog;
struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
struct nfp_app *app = nn->app;
struct nfp_prog *nfp_prog;
int ret;
......@@ -209,7 +203,6 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
goto err_free;
nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog);
bpf->verifier.ops = &nfp_bpf_analyzer_ops;
return 0;
......@@ -219,8 +212,9 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
return ret;
}
static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
static int nfp_bpf_translate(struct bpf_prog *prog)
{
struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int max_instr;
int err;
......@@ -242,15 +236,13 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
}
static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
static void nfp_bpf_destroy(struct bpf_prog *prog)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
kvfree(nfp_prog->prog);
nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
nfp_prog_free(nfp_prog);
return 0;
}
/* Atomic engine requires values to be in big endian, we need to byte swap
......@@ -422,12 +414,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
{
switch (bpf->command) {
case BPF_OFFLOAD_VERIFIER_PREP:
return nfp_bpf_verifier_prep(app, nn, bpf);
case BPF_OFFLOAD_TRANSLATE:
return nfp_bpf_translate(nn, bpf->offload.prog);
case BPF_OFFLOAD_DESTROY:
return nfp_bpf_destroy(nn, bpf->offload.prog);
case BPF_OFFLOAD_MAP_ALLOC:
return nfp_bpf_map_alloc(app->priv, bpf->offmap);
case BPF_OFFLOAD_MAP_FREE:
......@@ -489,14 +475,15 @@ nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int max_mtu, max_stack, max_prog_len;
unsigned int fw_mtu, pkt_off, max_stack, max_prog_len;
dma_addr_t dma_addr;
void *img;
int err;
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
if (max_mtu < nn->dp.netdev->mtu) {
NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary");
fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu);
if (fw_mtu < pkt_off) {
NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary");
return -EOPNOTSUPP;
}
......@@ -600,3 +587,11 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
return 0;
}
const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
.insn_hook = nfp_verify_insn,
.finalize = nfp_bpf_finalize,
.prepare = nfp_bpf_verifier_prep,
.translate = nfp_bpf_translate,
.destroy = nfp_bpf_destroy,
};
......@@ -623,8 +623,8 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
static int
nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
int prev_insn_idx)
{
struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
......@@ -745,7 +745,7 @@ nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog, unsigned int cnt)
goto continue_subprog;
}
static int nfp_bpf_finalize(struct bpf_verifier_env *env)
int nfp_bpf_finalize(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *info;
struct nfp_prog *nfp_prog;
......@@ -788,8 +788,3 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
return 0;
}
const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
.insn_hook = nfp_verify_insn,
.finalize = nfp_bpf_finalize,
};
......@@ -91,11 +91,6 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env)
return 0;
}
static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
.insn_hook = nsim_bpf_verify_insn,
.finalize = nsim_bpf_finalize,
};
static bool nsim_xdp_offload_active(struct netdevsim *ns)
{
return ns->xdp_hw.prog;
......@@ -263,6 +258,24 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
return 0;
}
static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
{
struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev);
if (!ns->bpf_bind_accept)
return -EOPNOTSUPP;
return nsim_bpf_create_prog(ns, prog);
}
static int nsim_bpf_translate(struct bpf_prog *prog)
{
struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv;
state->state = "xlated";
return 0;
}
static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
{
struct nsim_bpf_bound_prog *state;
......@@ -275,6 +288,14 @@ static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
kfree(state);
}
static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = {
.insn_hook = nsim_bpf_verify_insn,
.finalize = nsim_bpf_finalize,
.prepare = nsim_bpf_verifier_prep,
.translate = nsim_bpf_translate,
.destroy = nsim_bpf_destroy_prog,
};
static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
{
if (bpf->prog && bpf->prog->aux->offload) {
......@@ -533,30 +554,11 @@ static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap)
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct netdevsim *ns = netdev_priv(dev);
struct nsim_bpf_bound_prog *state;
int err;
ASSERT_RTNL();
switch (bpf->command) {
case BPF_OFFLOAD_VERIFIER_PREP:
if (!ns->bpf_bind_accept)
return -EOPNOTSUPP;
err = nsim_bpf_create_prog(ns, bpf->verifier.prog);
if (err)
return err;
bpf->verifier.ops = &nsim_bpf_analyzer_ops;
return 0;
case BPF_OFFLOAD_TRANSLATE:
state = bpf->offload.prog->aux->offload->dev_priv;
state->state = "xlated";
return 0;
case BPF_OFFLOAD_DESTROY:
nsim_bpf_destroy_prog(bpf->offload.prog);
return 0;
case XDP_QUERY_PROG:
return xdp_attachment_query(&ns->xdp, bpf);
case XDP_QUERY_PROG_HW:
......@@ -599,7 +601,7 @@ int nsim_bpf_init(struct netdevsim *ns)
if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
return -ENOMEM;
ns->sdev->bpf_dev = bpf_offload_dev_create();
ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops);
err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
if (err)
return err;
......
......@@ -268,15 +268,18 @@ struct bpf_prog_offload_ops {
int (*insn_hook)(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx);
int (*finalize)(struct bpf_verifier_env *env);
int (*prepare)(struct bpf_prog *prog);
int (*translate)(struct bpf_prog *prog);
void (*destroy)(struct bpf_prog *prog);
};
struct bpf_prog_offload {
struct bpf_prog *prog;
struct net_device *netdev;
struct bpf_offload_dev *offdev;
void *dev_priv;
struct list_head offloads;
bool dev_state;
const struct bpf_prog_offload_ops *dev_ops;
void *jited_image;
u32 jited_len;
};
......@@ -293,6 +296,7 @@ struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
u32 stack_depth;
u32 id;
u32 func_cnt;
......@@ -312,6 +316,8 @@ struct bpf_prog_aux {
void *security;
#endif
struct bpf_prog_offload *offload;
struct btf *btf;
u32 type_id; /* type id for this prog/func */
union {
struct work_struct work;
struct rcu_head rcu;
......@@ -523,7 +529,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
}
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
union bpf_attr __user *uattr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
......@@ -691,7 +698,8 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
struct bpf_offload_dev *bpf_offload_dev_create(void);
struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops);
void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
struct net_device *netdev);
......
......@@ -204,6 +204,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
u32 type_id; /* btf type_id for this subprog */
};
/* single container for all structs
......@@ -245,7 +246,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
return cur_func(env)->regs;
}
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx);
int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
......
......@@ -47,4 +47,20 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
int btf_get_fd_by_id(u32 id);
u32 btf_id(const struct btf *btf);
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
{
return NULL;
}
static inline const char *btf_name_by_offset(const struct btf *btf,
u32 offset)
{
return NULL;
}
#endif
#endif
......@@ -668,24 +668,10 @@ static inline u32 bpf_ctx_off_adjust_machine(u32 size)
return size;
}
static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access,
u32 size_default)
{
size_default = bpf_ctx_off_adjust_machine(size_default);
size_access = bpf_ctx_off_adjust_machine(size_access);
#ifdef __LITTLE_ENDIAN
return (off & (size_default - 1)) == 0;
#else
return (off & (size_default - 1)) + size_access == size_default;
#endif
}
static inline bool
bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
{
return bpf_ctx_narrow_align_ok(off, size, size_default) &&
size <= size_default && (size & (size - 1)) == 0;
return size <= size_default && (size & (size - 1)) == 0;
}
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
......
......@@ -865,9 +865,6 @@ enum bpf_netdev_command {
XDP_QUERY_PROG,
XDP_QUERY_PROG_HW,
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_VERIFIER_PREP,
BPF_OFFLOAD_TRANSLATE,
BPF_OFFLOAD_DESTROY,
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
XDP_QUERY_XSK_UMEM,
......@@ -893,15 +890,6 @@ struct netdev_bpf {
/* flags with which program was installed */
u32 prog_flags;
};
/* BPF_OFFLOAD_VERIFIER_PREP */
struct {
struct bpf_prog *prog;
const struct bpf_prog_offload_ops *ops; /* callee set */
} verifier;
/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
struct {
struct bpf_prog *prog;
} offload;
/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
struct {
struct bpf_offloaded_map *offmap;
......
......@@ -257,9 +257,6 @@ enum bpf_attach_type {
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
#define BPF_OBJ_NAME_LEN 16U
/* Flags for accessing BPF object */
......@@ -269,6 +266,12 @@ enum bpf_attach_type {
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
......@@ -335,6 +338,10 @@ union bpf_attr {
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
__u32 prog_btf_fd; /* fd pointing to BTF type data */
__u32 func_info_rec_size; /* userspace bpf_func_info size */
__aligned_u64 func_info; /* func info */
__u32 func_info_cnt; /* number of bpf_func_info records */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
......@@ -2201,6 +2208,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* Description
......@@ -2233,6 +2242,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* int bpf_sk_release(struct bpf_sock *sk)
* Description
......@@ -2457,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta;
struct bpf_flow_keys *flow_keys;
__u64 tstamp;
};
struct bpf_tunnel_key {
......@@ -2631,6 +2643,10 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
__u32 btf_id;
__u32 func_info_rec_size;
__aligned_u64 func_info;
__u32 func_info_cnt;
} __attribute__((aligned(8)));
struct bpf_map_info {
......@@ -2942,4 +2958,9 @@ struct bpf_flow_keys {
};
};
struct bpf_func_info {
__u32 insn_offset;
__u32 type_id;
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
union {
......@@ -64,8 +65,10 @@ struct btf_type {
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_MAX 11
#define NR_BTF_KINDS 12
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_MAX 13
#define NR_BTF_KINDS 14
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
......@@ -110,4 +113,13 @@ struct btf_member {
__u32 offset; /* offset in bits */
};
/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
* The exact number of btf_param is stored in the vlen (of the
* info in "struct btf_type").
*/
struct btf_param {
__u32 name_off;
__u32 type;
};
#endif /* _UAPI__LINUX_BTF_H__ */
This diff is collapsed.
......@@ -21,12 +21,14 @@
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
#include <uapi/linux/btf.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
#include <linux/moduleloader.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/frame.h>
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
......@@ -390,6 +392,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;
const char *func_name;
BUILD_BUG_ON(sizeof("bpf_prog_") +
sizeof(prog->tag) * 2 +
......@@ -404,6 +408,15 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
/* prog->aux->name will be ignored if full btf name is available */
if (prog->aux->btf) {
type = btf_type_by_id(prog->aux->btf, prog->aux->type_id);
func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
snprintf(sym, (size_t)(end - sym), "_%s", func_name);
return;
}
if (prog->aux->name[0])
snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
else
......
......@@ -23,7 +23,7 @@
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
BPF_F_RDONLY | BPF_F_WRONLY)
BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
struct bucket {
struct hlist_nulls_head head;
......@@ -244,6 +244,7 @@ static int htab_map_alloc_check(union bpf_attr *attr)
*/
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
int numa_node = bpf_map_attr_numa_node(attr);
BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
......@@ -257,6 +258,10 @@ static int htab_map_alloc_check(union bpf_attr *attr)
*/
return -EPERM;
if (zero_seed && !capable(CAP_SYS_ADMIN))
/* Guard against local DoS, and discourage production use. */
return -EPERM;
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
/* reserved bits should not be used */
return -EINVAL;
......@@ -373,7 +378,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (!htab->buckets)
goto free_htab;
if (htab->map.map_flags & BPF_F_ZERO_SEED)
htab->hashrnd = 0;
else
htab->hashrnd = get_random_int();
for (i = 0; i < htab->n_buckets; i++) {
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
......
......@@ -7,8 +7,7 @@
#include <linux/rbtree.h>
#include <linux/slab.h>
DEFINE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
#ifdef CONFIG_CGROUP_BPF
......
......@@ -168,20 +168,59 @@ static size_t longest_prefix_match(const struct lpm_trie *trie,
const struct lpm_trie_node *node,
const struct bpf_lpm_trie_key *key)
{
size_t prefixlen = 0;
size_t i;
u32 limit = min(node->prefixlen, key->prefixlen);
u32 prefixlen = 0, i = 0;
for (i = 0; i < trie->data_size; i++) {
size_t b;
BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
b = 8 - fls(node->data[i] ^ key->data[i]);
prefixlen += b;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
return min(node->prefixlen, key->prefixlen);
/* data_size >= 16 has very small probability.
* We do not use a loop for optimal code generation.
*/
if (trie->data_size >= 8) {
u64 diff = be64_to_cpu(*(__be64 *)node->data ^
*(__be64 *)key->data);
prefixlen = 64 - fls64(diff);
if (prefixlen >= limit)
return limit;
if (diff)
return prefixlen;
i = 8;
}
#endif
if (b < 8)
break;
while (trie->data_size >= i + 4) {
u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^
*(__be32 *)&key->data[i]);
prefixlen += 32 - fls(diff);
if (prefixlen >= limit)
return limit;
if (diff)
return prefixlen;
i += 4;
}
if (trie->data_size >= i + 2) {
u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^
*(__be16 *)&key->data[i]);
prefixlen += 16 - fls(diff);
if (prefixlen >= limit)
return limit;
if (diff)
return prefixlen;
i += 2;
}
if (trie->data_size >= i + 1) {
prefixlen += 8 - fls(node->data[i] ^ key->data[i]);
if (prefixlen >= limit)
return limit;
}
return prefixlen;
......
......@@ -33,6 +33,7 @@
static DECLARE_RWSEM(bpf_devs_lock);
struct bpf_offload_dev {
const struct bpf_prog_offload_ops *ops;
struct list_head netdevs;
};
......@@ -106,6 +107,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
err = -EINVAL;
goto err_unlock;
}
offload->offdev = ondev->offdev;
prog->aux->offload = offload;
list_add_tail(&offload->offloads, &ondev->progs);
dev_put(offload->netdev);
......@@ -121,40 +123,20 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
return err;
}
static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
struct netdev_bpf *data)
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
{
struct bpf_prog_offload *offload = prog->aux->offload;
struct net_device *netdev;
ASSERT_RTNL();
if (!offload)
return -ENODEV;
netdev = offload->netdev;
data->command = cmd;
return netdev->netdev_ops->ndo_bpf(netdev, data);
}
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
{
struct netdev_bpf data = {};
int err;
data.verifier.prog = env->prog;
struct bpf_prog_offload *offload;
int ret = -ENODEV;
rtnl_lock();
err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data);
if (err)
goto exit_unlock;
down_read(&bpf_devs_lock);
offload = prog->aux->offload;
if (offload) {
ret = offload->offdev->ops->prepare(prog);
offload->dev_state = !ret;
}
up_read(&bpf_devs_lock);
env->prog->aux->offload->dev_ops = data.verifier.ops;
env->prog->aux->offload->dev_state = true;
exit_unlock:
rtnl_unlock();
return err;
return ret;
}
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
......@@ -166,7 +148,8 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
down_read(&bpf_devs_lock);
offload = env->prog->aux->offload;
if (offload)
ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
ret = offload->offdev->ops->insn_hook(env, insn_idx,
prev_insn_idx);
up_read(&bpf_devs_lock);
return ret;
......@@ -180,8 +163,8 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
down_read(&bpf_devs_lock);
offload = env->prog->aux->offload;
if (offload) {
if (offload->dev_ops->finalize)
ret = offload->dev_ops->finalize(env);
if (offload->offdev->ops->finalize)
ret = offload->offdev->ops->finalize(env);
else
ret = 0;
}
......@@ -193,12 +176,9 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
{
struct bpf_prog_offload *offload = prog->aux->offload;
struct netdev_bpf data = {};
data.offload.prog = prog;
if (offload->dev_state)
WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
offload->offdev->ops->destroy(prog);
/* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
bpf_prog_free_id(prog, true);
......@@ -210,24 +190,22 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
void bpf_prog_offload_destroy(struct bpf_prog *prog)
{
rtnl_lock();
down_write(&bpf_devs_lock);
if (prog->aux->offload)
__bpf_prog_offload_destroy(prog);
up_write(&bpf_devs_lock);
rtnl_unlock();
}
static int bpf_prog_offload_translate(struct bpf_prog *prog)
{
struct netdev_bpf data = {};
int ret;
data.offload.prog = prog;
struct bpf_prog_offload *offload;
int ret = -ENODEV;
rtnl_lock();
ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
rtnl_unlock();
down_read(&bpf_devs_lock);
offload = prog->aux->offload;
if (offload)
ret = offload->offdev->ops->translate(prog);
up_read(&bpf_devs_lock);
return ret;
}
......@@ -655,7 +633,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
struct bpf_offload_dev *bpf_offload_dev_create(void)
struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
{
struct bpf_offload_dev *offdev;
int err;
......@@ -673,6 +652,7 @@ struct bpf_offload_dev *bpf_offload_dev_create(void)
if (!offdev)
return ERR_PTR(-ENOMEM);
offdev->ops = ops;
INIT_LIST_HEAD(&offdev->netdevs);
return offdev;
......
......@@ -1213,6 +1213,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
bpf_prog_kallsyms_del_all(prog);
btf_put(prog->aux->btf);
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
}
......@@ -1437,9 +1438,9 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
}
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
#define BPF_PROG_LOAD_LAST_FIELD func_info_cnt
static int bpf_prog_load(union bpf_attr *attr)
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog;
......@@ -1525,7 +1526,7 @@ static int bpf_prog_load(union bpf_attr *attr)
goto free_prog;
/* run eBPF verifier */
err = bpf_check(&prog, attr);
err = bpf_check(&prog, attr, uattr);
if (err < 0)
goto free_used_maps;
......@@ -2079,6 +2080,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
info.nr_jited_func_lens = 0;
info.func_info_cnt = 0;
goto done;
}
......@@ -2216,6 +2218,55 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
}
}
if (prog->aux->btf) {
u32 ucnt, urec_size;
info.btf_id = btf_id(prog->aux->btf);
ucnt = info.func_info_cnt;
info.func_info_cnt = prog->aux->func_cnt ? : 1;
urec_size = info.func_info_rec_size;
info.func_info_rec_size = sizeof(struct bpf_func_info);
if (ucnt) {
/* expect passed-in urec_size is what the kernel expects */
if (urec_size != info.func_info_rec_size)
return -EINVAL;
if (bpf_dump_raw_ok()) {
struct bpf_func_info kern_finfo;
char __user *user_finfo;
u32 i, insn_offset;
user_finfo = u64_to_user_ptr(info.func_info);
if (prog->aux->func_cnt) {
ucnt = min_t(u32, info.func_info_cnt, ucnt);
insn_offset = 0;
for (i = 0; i < ucnt; i++) {
kern_finfo.insn_offset = insn_offset;
kern_finfo.type_id = prog->aux->func[i]->aux->type_id;
if (copy_to_user(user_finfo, &kern_finfo,
sizeof(kern_finfo)))
return -EFAULT;
/* func[i]->len holds the prog len */
insn_offset += prog->aux->func[i]->len;
user_finfo += urec_size;
}
} else {
kern_finfo.insn_offset = 0;
kern_finfo.type_id = prog->aux->type_id;
if (copy_to_user(user_finfo, &kern_finfo,
sizeof(kern_finfo)))
return -EFAULT;
}
} else {
info.func_info_cnt = 0;
}
}
} else {
info.func_info_cnt = 0;
}
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
......@@ -2501,7 +2552,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = map_get_next_key(&attr);
break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr);
err = bpf_prog_load(&attr, uattr);
break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
......
......@@ -11,10 +11,12 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <uapi/linux/btf.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
......@@ -1455,6 +1457,17 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
verbose(env, "R%d offset is outside of the packet\n", regno);
return err;
}
/* __check_packet_access has made sure "off + size - 1" is within u16.
* reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
* otherwise find_good_pkt_pointers would have refused to set range info
* that __check_packet_access would have rejected this pkt access.
* Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
*/
env->prog->aux->max_pkt_offset =
max_t(u32, env->prog->aux->max_pkt_offset,
off + reg->umax_value + size - 1);
return err;
}
......@@ -4628,6 +4641,114 @@ static int check_cfg(struct bpf_verifier_env *env)
return ret;
}
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
union bpf_attr *attr, union bpf_attr __user *uattr)
{
u32 i, nfuncs, urec_size, min_size, prev_offset;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info krecord = {};
const struct btf_type *type;
void __user *urecord;
struct btf *btf;
int ret = 0;
nfuncs = attr->func_info_cnt;
if (!nfuncs)
return 0;
if (nfuncs != env->subprog_cnt) {
verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
return -EINVAL;
}
urec_size = attr->func_info_rec_size;
if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
urec_size > MAX_FUNCINFO_REC_SIZE ||
urec_size % sizeof(u32)) {
verbose(env, "invalid func info rec size %u\n", urec_size);
return -EINVAL;
}
btf = btf_get_by_fd(attr->prog_btf_fd);
if (IS_ERR(btf)) {
verbose(env, "unable to get btf from fd\n");
return PTR_ERR(btf);
}
urecord = u64_to_user_ptr(attr->func_info);
min_size = min_t(u32, krec_size, urec_size);
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
if (ret) {
if (ret == -E2BIG) {
verbose(env, "nonzero tailing record in func info");
/* set the size kernel expects so loader can zero
* out the rest of the record.
*/
if (put_user(min_size, &uattr->func_info_rec_size))
ret = -EFAULT;
}
goto free_btf;
}
if (copy_from_user(&krecord, urecord, min_size)) {
ret = -EFAULT;
goto free_btf;
}
/* check insn_offset */
if (i == 0) {
if (krecord.insn_offset) {
verbose(env,
"nonzero insn_offset %u for the first func info record",
krecord.insn_offset);
ret = -EINVAL;
goto free_btf;
}
} else if (krecord.insn_offset <= prev_offset) {
verbose(env,
"same or smaller insn offset (%u) than previous func info record (%u)",
krecord.insn_offset, prev_offset);
ret = -EINVAL;
goto free_btf;
}
if (env->subprog_info[i].start != krecord.insn_offset) {
verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
ret = -EINVAL;
goto free_btf;
}
/* check type_id */
type = btf_type_by_id(btf, krecord.type_id);
if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
verbose(env, "invalid type id %d in func info",
krecord.type_id);
ret = -EINVAL;
goto free_btf;
}
if (i == 0)
prog->aux->type_id = krecord.type_id;
env->subprog_info[i].type_id = krecord.type_id;
prev_offset = krecord.insn_offset;
urecord += urec_size;
}
prog->aux->btf = btf;
return 0;
free_btf:
btf_put(btf);
return ret;
}
/* check %cur's range satisfies %old's */
static bool range_within(struct bpf_reg_state *old,
struct bpf_reg_state *cur)
......@@ -5707,10 +5828,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
int i, cnt, size, ctx_field_size, delta = 0;
const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16], *insn;
u32 target_size, size_default, off;
struct bpf_prog *new_prog;
enum bpf_access_type type;
bool is_narrower_load;
u32 target_size;
if (ops->gen_prologue || env->seen_direct_write) {
if (!ops->gen_prologue) {
......@@ -5803,9 +5924,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
* we will apply proper mask to the result.
*/
is_narrower_load = size < ctx_field_size;
size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
off = insn->off;
if (is_narrower_load) {
u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
u32 off = insn->off;
u8 size_code;
if (type == BPF_WRITE) {
......@@ -5833,13 +5954,24 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
if (is_narrower_load && size < target_size) {
if (ctx_field_size <= 4)
u8 shift = (off & (size_default - 1)) * 8;
if (ctx_field_size <= 4) {
if (shift)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
insn->dst_reg,
shift);
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1 << size * 8) - 1);
else
} else {
if (shift)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
(1 << size * 8) - 1);
}
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
......@@ -5917,6 +6049,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
/* the btf will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->type_id = env->subprog_info[i].type_id;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
......@@ -6138,6 +6273,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
prog->cb_access = 1;
env->prog->aux->stack_depth = MAX_BPF_STACK;
env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
......@@ -6302,7 +6438,8 @@ static void free_states(struct bpf_verifier_env *env)
kfree(env->explored_states);
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_verifier_env *env;
struct bpf_verifier_log *log;
......@@ -6356,7 +6493,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
goto skip_full_check;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env);
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
goto skip_full_check;
}
......@@ -6374,6 +6511,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (ret < 0)
goto skip_full_check;
ret = check_btf_func(env->prog, env, attr, uattr);
if (ret < 0)
goto skip_full_check;
ret = do_check(env);
if (env->cur_state) {
free_verifier_state(env->cur_state, true);
......
This diff is collapsed.
......@@ -208,12 +208,20 @@ endif
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
$(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
readelf -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
ifneq ($(BTF_LLVM_PROBE),)
EXTRA_CFLAGS += -g
else
ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
endif
# Trick to allow make to be run from this directory
all:
......
......@@ -54,6 +54,23 @@ static int populate_prog_array(const char *event, int prog_fd)
return 0;
}
static int write_kprobe_events(const char *val)
{
int fd, ret, flags;
if ((val != NULL) && (val[0] == '\0'))
flags = O_WRONLY | O_TRUNC;
else
flags = O_WRONLY | O_APPEND;
fd = open("/sys/kernel/debug/tracing/kprobe_events", flags);
ret = write(fd, val, strlen(val));
close(fd);
return ret;
}
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
bool is_socket = strncmp(event, "socket", 6) == 0;
......@@ -165,10 +182,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
#ifdef __x86_64__
if (strncmp(event, "sys_", 4) == 0) {
snprintf(buf, sizeof(buf),
"echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events",
snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s",
is_kprobe ? 'p' : 'r', event, event);
err = system(buf);
err = write_kprobe_events(buf);
if (err >= 0) {
need_normal_check = false;
event_prefix = "__x64_";
......@@ -176,10 +192,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
}
#endif
if (need_normal_check) {
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
snprintf(buf, sizeof(buf), "%c:%s %s",
is_kprobe ? 'p' : 'r', event, event);
err = system(buf);
err = write_kprobe_events(buf);
if (err < 0) {
printf("failed to create kprobe '%s' error '%s'\n",
event, strerror(errno));
......@@ -284,8 +299,8 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
numa_node);
}
if (map_fd[i] < 0) {
printf("failed to create a map: %d %s\n",
errno, strerror(errno));
printf("failed to create map %d (%s): %d %s\n",
i, maps[i].name, errno, strerror(errno));
return 1;
}
maps[i].fd = map_fd[i];
......@@ -519,7 +534,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
return 1;
/* clear all kprobes */
i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
i = write_kprobe_events("");
/* scan over all elf sections to get license and map info */
for (i = 1; i < ehdr.e_shnum; i++) {
......
......@@ -42,7 +42,8 @@ MAP COMMANDS
| | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** }
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** }
DESCRIPTION
===========
......
......@@ -15,7 +15,8 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
| **loadall** | **help** }
MAP COMMANDS
=============
......@@ -24,9 +25,9 @@ MAP COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog pin** *PROG* *FILE*
| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
......@@ -39,7 +40,9 @@ MAP COMMANDS
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
| }
| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
| *ATTACH_TYPE* := {
| **msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector**
| }
DESCRIPTION
......@@ -79,8 +82,11 @@ DESCRIPTION
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
**bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
Load bpf program from binary *OBJ* and pin as *FILE*.
**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
Load bpf program(s) from binary *OBJ* and pin as *PATH*.
**bpftool prog load** pins only the first program from the
*OBJ* as *PATH*. **bpftool prog loadall** pins all programs
from the *OBJ* under *PATH* directory.
**type** is optional, if not specified program type will be
inferred from section names.
By default bpftool will create new maps as declared in the ELF
......@@ -92,18 +98,24 @@ DESCRIPTION
use, referring to it by **id** or through a **pinned** file.
If **dev** *NAME* is specified program will be loaded onto
given networking device (offload).
Optional **pinmaps** argument can be provided to pin all
maps under *MAP_DIR* directory.
Note: *FILE* must be located in *bpffs* mount. It must not
Note: *PATH* must be located in *bpffs* mount. It must not
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
**bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
to the map *MAP*.
**bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
from the map *MAP*.
**bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
Attach bpf program *PROG* (with type specified by
*ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
parameter, with the exception of *flow_dissector* which is
attached to current networking name space.
**bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
Detach bpf program *PROG* (with type specified by
*ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
parameter, with the exception of *flow_dissector* which is
detached from the current networking name space.
**bpftool prog help**
Print short help message.
......
......@@ -53,7 +53,7 @@ ifneq ($(EXTRA_LDFLAGS),)
LDFLAGS += $(EXTRA_LDFLAGS)
endif
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
LIBS = -lelf $(LIBBPF)
INSTALL ?= install
RM ?= rm -f
......@@ -90,7 +90,16 @@ include $(wildcard $(OUTPUT)*.d)
all: $(OUTPUT)bpftool
SRCS = $(wildcard *.c)
BFD_SRCS = jit_disasm.c
SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c))
ifeq ($(feature-libbfd),1)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
LIBS += -lbfd -lopcodes
endif
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
......
......@@ -243,7 +243,7 @@ _bpftool()
# Completion depends on object and command in use
case $object in
prog)
if [[ $command != "load" ]]; then
if [[ $command != "load" && $command != "loadall" ]]; then
case $prev in
id)
_bpftool_get_prog_ids
......@@ -299,7 +299,8 @@ _bpftool()
fi
if [[ ${#words[@]} == 6 ]]; then
COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
COMPREPLY=( $( compgen -W "msg_verdict skb_verdict \
skb_parse flow_dissector" -- "$cur" ) )
return 0
fi
......@@ -309,7 +310,7 @@ _bpftool()
fi
return 0
;;
load)
load|loadall)
local obj
if [[ ${#words[@]} -lt 6 ]]; then
......@@ -338,7 +339,16 @@ _bpftool()
case $prev in
type)
COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \
COMPREPLY=( $( compgen -W "socket kprobe \
kretprobe classifier flow_dissector \
action tracepoint raw_tracepoint \
xdp perf_event cgroup/skb cgroup/sock \
cgroup/dev lwt_in lwt_out lwt_xmit \
lwt_seg6local sockops sk_skb sk_msg \
lirc_mode2 cgroup/bind4 cgroup/bind6 \
cgroup/connect4 cgroup/connect6 \
cgroup/sendmsg4 cgroup/sendmsg6 \
cgroup/post_bind4 cgroup/post_bind6" -- \
"$cur" ) )
return 0
;;
......@@ -346,7 +356,7 @@ _bpftool()
_bpftool_get_map_ids
return 0
;;
pinned)
pinned|pinmaps)
_filedir
return 0
;;
......@@ -358,6 +368,7 @@ _bpftool()
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
_bpftool_once_attr 'type'
_bpftool_once_attr 'dev'
_bpftool_once_attr 'pinmaps'
return 0
;;
esac
......
......@@ -249,3 +249,139 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
{
return btf_dumper_do_type(d, type_id, 0, data);
}
#define BTF_PRINT_ARG(...) \
do { \
pos += snprintf(func_sig + pos, size - pos, \
__VA_ARGS__); \
if (pos >= size) \
return -1; \
} while (0)
#define BTF_PRINT_TYPE(type) \
do { \
pos = __btf_dumper_type_only(btf, type, func_sig, \
pos, size); \
if (pos == -1) \
return -1; \
} while (0)
static int btf_dump_func(const struct btf *btf, char *func_sig,
const struct btf_type *func_proto,
const struct btf_type *func, int pos, int size);
static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
char *func_sig, int pos, int size)
{
const struct btf_type *proto_type;
const struct btf_array *array;
const struct btf_type *t;
if (!type_id) {
BTF_PRINT_ARG("void ");
return pos;
}
t = btf__type_by_id(btf, type_id);
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_STRUCT:
BTF_PRINT_ARG("struct %s ",
btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_UNION:
BTF_PRINT_ARG("union %s ",
btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_ENUM:
BTF_PRINT_ARG("enum %s ",
btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_ARRAY:
array = (struct btf_array *)(t + 1);
BTF_PRINT_TYPE(array->type);
BTF_PRINT_ARG("[%d]", array->nelems);
break;
case BTF_KIND_PTR:
BTF_PRINT_TYPE(t->type);
BTF_PRINT_ARG("* ");
break;
case BTF_KIND_UNKN:
case BTF_KIND_FWD:
case BTF_KIND_TYPEDEF:
return -1;
case BTF_KIND_VOLATILE:
BTF_PRINT_ARG("volatile ");
BTF_PRINT_TYPE(t->type);
break;
case BTF_KIND_CONST:
BTF_PRINT_ARG("const ");
BTF_PRINT_TYPE(t->type);
break;
case BTF_KIND_RESTRICT:
BTF_PRINT_ARG("restrict ");
BTF_PRINT_TYPE(t->type);
break;
case BTF_KIND_FUNC_PROTO:
pos = btf_dump_func(btf, func_sig, t, NULL, pos, size);
if (pos == -1)
return -1;
break;
case BTF_KIND_FUNC:
proto_type = btf__type_by_id(btf, t->type);
pos = btf_dump_func(btf, func_sig, proto_type, t, pos, size);
if (pos == -1)
return -1;
break;
default:
return -1;
}
return pos;
}
static int btf_dump_func(const struct btf *btf, char *func_sig,
const struct btf_type *func_proto,
const struct btf_type *func, int pos, int size)
{
int i, vlen;
BTF_PRINT_TYPE(func_proto->type);
if (func)
BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, func->name_off));
else
BTF_PRINT_ARG("(");
vlen = BTF_INFO_VLEN(func_proto->info);
for (i = 0; i < vlen; i++) {
struct btf_param *arg = &((struct btf_param *)(func_proto + 1))[i];
if (i)
BTF_PRINT_ARG(", ");
if (arg->type) {
BTF_PRINT_TYPE(arg->type);
BTF_PRINT_ARG("%s",
btf__name_by_offset(btf, arg->name_off));
} else {
BTF_PRINT_ARG("...");
}
}
BTF_PRINT_ARG(")");
return pos;
}
void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig,
int size)
{
int err;
func_sig[0] = '\0';
if (!btf)
return;
err = __btf_dumper_type_only(btf, type_id, func_sig, 0, size);
if (err < 0)
func_sig[0] = '\0';
}
......@@ -46,6 +46,7 @@
#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
......@@ -99,6 +100,13 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
void set_max_rlimit(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
}
static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
{
bool bind_done = false;
......@@ -169,34 +177,23 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
return fd;
}
int do_pin_fd(int fd, const char *name)
int mount_bpffs_for_pin(const char *name)
{
char err_str[ERR_MAX_LEN];
char *file;
char *dir;
int err = 0;
err = bpf_obj_pin(fd, name);
if (!err)
goto out;
file = malloc(strlen(name) + 1);
strcpy(file, name);
dir = dirname(file);
if (errno != EPERM || is_bpffs(dir)) {
p_err("can't pin the object (%s): %s", name, strerror(errno));
if (is_bpffs(dir))
/* nothing to do if already mounted */
goto out_free;
}
/* Attempt to mount bpffs, then retry pinning. */
err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
if (!err) {
err = bpf_obj_pin(fd, name);
if (err)
p_err("can't pin the object (%s): %s", name,
strerror(errno));
} else {
if (err) {
err_str[ERR_MAX_LEN - 1] = '\0';
p_err("can't mount BPF file system to pin the object (%s): %s",
name, err_str);
......@@ -204,10 +201,20 @@ int do_pin_fd(int fd, const char *name)
out_free:
free(file);
out:
return err;
}
int do_pin_fd(int fd, const char *name)
{
int err;
err = mount_bpffs_for_pin(name);
if (err)
return err;
return bpf_obj_pin(fd, name);
}
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
{
unsigned int id;
......
......@@ -109,7 +109,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
if (inf) {
bfdf->arch_info = inf;
} else {
p_err("No libfd support for %s", arch);
p_err("No libbfd support for %s", arch);
return;
}
}
......@@ -183,3 +183,9 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
bfd_close(bfdf);
}
int disasm_init(void)
{
bfd_init();
return 0;
}
......@@ -31,7 +31,6 @@
* SOFTWARE.
*/
#include <bfd.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
......@@ -399,8 +398,6 @@ int main(int argc, char **argv)
if (argc < 0)
usage();
bfd_init();
ret = cmd_select(cmds, argc, argv, do_help);
if (json_output)
......
......@@ -100,6 +100,8 @@ bool is_prefix(const char *pfx, const char *str);
void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
void usage(void) __noreturn;
void set_max_rlimit(void);
struct pinned_obj_table {
DECLARE_HASHTABLE(table, 16);
};
......@@ -129,6 +131,7 @@ const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
int do_pin_fd(int fd, const char *name);
......@@ -144,8 +147,22 @@ int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd(int *argc, char ***argv);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
#ifdef HAVE_LIBBFD_SUPPORT
void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
const char *arch, const char *disassembler_options);
int disasm_init(void);
#else
static inline
void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
const char *arch, const char *disassembler_options)
{
}
static inline int disasm_init(void)
{
p_err("No libbfd support");
return -1;
}
#endif
void print_data_json(uint8_t *data, size_t len);
void print_hex_data_json(uint8_t *data, size_t len);
......@@ -170,6 +187,8 @@ struct btf_dumper {
*/
int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
const void *data);
void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id,
char *func_only, int size);
struct nlattr;
struct ifinfomsg;
......
......@@ -74,6 +74,8 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
[BPF_MAP_TYPE_QUEUE] = "queue",
[BPF_MAP_TYPE_STACK] = "stack",
};
static bool map_is_per_cpu(__u32 type)
......@@ -215,70 +217,6 @@ static int do_dump_btf(const struct btf_dumper *d,
return ret;
}
static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
{
struct bpf_btf_info btf_info = { 0 };
__u32 len = sizeof(btf_info);
__u32 last_size;
int btf_fd;
void *ptr;
int err;
err = 0;
*btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
if (btf_fd < 0)
return 0;
/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
* let's start with a sane default - 4KiB here - and resize it only if
* bpf_obj_get_info_by_fd() needs a bigger buffer.
*/
btf_info.btf_size = 4096;
last_size = btf_info.btf_size;
ptr = malloc(last_size);
if (!ptr) {
err = -ENOMEM;
goto exit_free;
}
bzero(ptr, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
if (!err && btf_info.btf_size > last_size) {
void *temp_ptr;
last_size = btf_info.btf_size;
temp_ptr = realloc(ptr, last_size);
if (!temp_ptr) {
err = -ENOMEM;
goto exit_free;
}
ptr = temp_ptr;
bzero(ptr, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
}
if (err || btf_info.btf_size > last_size) {
err = errno;
goto exit_free;
}
*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
if (IS_ERR(*btf)) {
err = PTR_ERR(*btf);
*btf = NULL;
}
exit_free:
close(btf_fd);
free(ptr);
return err;
}
static json_writer_t *get_btf_writer(void)
{
json_writer_t *jw = jsonw_new(stdout);
......@@ -383,7 +321,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
printf(single_line ? " " : "\n");
printf("value:%c", break_names ? '\n' : ' ');
if (value)
fprint_hex(stdout, value, info->value_size, " ");
else
printf("<no entry>");
printf("\n");
} else {
......@@ -398,8 +339,11 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
for (i = 0; i < n; i++) {
printf("value (CPU %02d):%c",
i, info->value_size > 16 ? '\n' : ' ');
if (value)
fprint_hex(stdout, value + i * step,
info->value_size, " ");
else
printf("<no entry>");
printf("\n");
}
}
......@@ -731,7 +675,11 @@ static int dump_map_elem(int fd, void *key, void *value,
jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
jsonw_end_object(json_wtr);
} else {
print_entry_error(map_info, key, strerror(lookup_errno));
if (errno == ENOENT)
print_entry_plain(map_info, key, NULL);
else
print_entry_error(map_info, key,
strerror(lookup_errno));
}
return 0;
......@@ -765,7 +713,7 @@ static int do_dump(int argc, char **argv)
prev_key = NULL;
err = get_btf(&info, &btf);
err = btf_get_from_id(info.btf_id, &btf);
if (err) {
p_err("failed to get btf");
goto exit_free;
......@@ -909,7 +857,7 @@ static int do_lookup(int argc, char **argv)
}
/* here means bpf_map_lookup_elem() succeeded */
err = get_btf(&info, &btf);
err = btf_get_from_id(info.btf_id, &btf);
if (err) {
p_err("failed to get btf");
goto exit_free;
......@@ -1140,6 +1088,8 @@ static int do_create(int argc, char **argv)
return -1;
}
set_max_rlimit();
fd = bpf_create_map_xattr(&attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
......
This diff is collapsed.
......@@ -242,11 +242,15 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
.cb_imm = print_imm,
.private_data = dd,
};
struct bpf_func_info *record;
struct bpf_insn *insn = buf;
struct btf *btf = dd->btf;
bool double_insn = false;
char func_sig[1024];
unsigned int i;
jsonw_start_array(json_wtr);
record = dd->func_info;
for (i = 0; i < len / sizeof(*insn); i++) {
if (double_insn) {
double_insn = false;
......@@ -255,6 +259,20 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
jsonw_start_object(json_wtr);
if (btf && record) {
if (record->insn_offset == i) {
btf_dumper_type_only(btf, record->type_id,
func_sig,
sizeof(func_sig));
if (func_sig[0] != '\0') {
jsonw_name(json_wtr, "proto");
jsonw_string(json_wtr, func_sig);
}
record = (void *)record + dd->finfo_rec_size;
}
}
jsonw_name(json_wtr, "disasm");
print_bpf_insn(&cbs, insn + i, true);
......@@ -297,16 +315,31 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
.cb_imm = print_imm,
.private_data = dd,
};
struct bpf_func_info *record;
struct bpf_insn *insn = buf;
struct btf *btf = dd->btf;
bool double_insn = false;
char func_sig[1024];
unsigned int i;
record = dd->func_info;
for (i = 0; i < len / sizeof(*insn); i++) {
if (double_insn) {
double_insn = false;
continue;
}
if (btf && record) {
if (record->insn_offset == i) {
btf_dumper_type_only(btf, record->type_id,
func_sig,
sizeof(func_sig));
if (func_sig[0] != '\0')
printf("%s:\n", func_sig);
record = (void *)record + dd->finfo_rec_size;
}
}
double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
printf("% 4d: ", i);
......
......@@ -51,6 +51,9 @@ struct dump_data {
__u32 sym_count;
__u64 *jited_ksyms;
__u32 nr_jited_ksyms;
struct btf *btf;
void *func_info;
__u32 finfo_rec_size;
char scratch_buff[SYM_MAX_NAME + 8];
};
......
......@@ -257,9 +257,6 @@ enum bpf_attach_type {
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
#define BPF_OBJ_NAME_LEN 16U
/* Flags for accessing BPF object */
......@@ -269,6 +266,12 @@ enum bpf_attach_type {
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
......@@ -335,6 +338,10 @@ union bpf_attr {
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
__u32 prog_btf_fd; /* fd pointing to BTF type data */
__u32 func_info_rec_size; /* userspace bpf_func_info size */
__aligned_u64 func_info; /* func info */
__u32 func_info_cnt; /* number of bpf_func_info records */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
......@@ -2201,6 +2208,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* Description
......@@ -2233,6 +2242,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
*
* int bpf_sk_release(struct bpf_sock *sk)
* Description
......@@ -2457,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta;
struct bpf_flow_keys *flow_keys;
__u64 tstamp;
};
struct bpf_tunnel_key {
......@@ -2631,6 +2643,10 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
__u32 btf_id;
__u32 func_info_rec_size;
__aligned_u64 func_info;
__u32 func_info_cnt;
} __attribute__((aligned(8)));
struct bpf_map_info {
......@@ -2942,4 +2958,9 @@ struct bpf_flow_keys {
};
};
struct bpf_func_info {
__u32 insn_offset;
__u32 type_id;
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
union {
......@@ -64,8 +65,10 @@ struct btf_type {
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_MAX 11
#define NR_BTF_KINDS 12
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_MAX 13
#define NR_BTF_KINDS 14
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
......@@ -110,4 +113,13 @@ struct btf_member {
__u32 offset; /* offset in bits */
};
/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
* The exact number of btf_param is stored in the vlen (of the
* info in "struct btf_type").
*/
struct btf_param {
__u32 name_off;
__u32 type;
};
#endif /* _UAPI__LINUX_BTF_H__ */
......@@ -66,7 +66,7 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx
FEATURE_DISPLAY = libelf bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
......@@ -148,6 +148,12 @@ LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
CMD_TARGETS = $(LIB_FILE)
CXX_TEST_TARGET = $(OUTPUT)test_libbpf
ifeq ($(feature-cxx), 1)
CMD_TARGETS += $(CXX_TEST_TARGET)
endif
TARGETS = $(CMD_TARGETS)
all: fixdep all_cmd
......@@ -175,6 +181,9 @@ $(OUTPUT)libbpf.so: $(BPF_IN)
$(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
$(QUIET_LINK)$(CXX) $^ -lelf -o $@
define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
......@@ -201,8 +210,8 @@ config-clean:
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
clean:
$(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
$(RM) LIBBPF-CFLAGS
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
*.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
......
......@@ -177,6 +177,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz)
{
union bpf_attr attr;
void *finfo = NULL;
__u32 name_len;
int fd;
......@@ -196,6 +197,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
attr.log_level = 0;
attr.kern_version = load_attr->kern_version;
attr.prog_ifindex = load_attr->prog_ifindex;
attr.prog_btf_fd = load_attr->prog_btf_fd;
attr.func_info_rec_size = load_attr->func_info_rec_size;
attr.func_info_cnt = load_attr->func_info_cnt;
attr.func_info = ptr_to_u64(load_attr->func_info);
memcpy(attr.prog_name, load_attr->name,
min(name_len, BPF_OBJ_NAME_LEN - 1));
......@@ -203,12 +208,55 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
if (fd >= 0 || !log_buf || !log_buf_sz)
return fd;
/* After bpf_prog_load, the kernel may modify certain attributes
* to give user space a hint how to deal with loading failure.
* Check to see whether we can make some changes and load again.
*/
if (errno == E2BIG && attr.func_info_cnt &&
attr.func_info_rec_size < load_attr->func_info_rec_size) {
__u32 actual_rec_size = load_attr->func_info_rec_size;
__u32 expected_rec_size = attr.func_info_rec_size;
__u32 finfo_cnt = load_attr->func_info_cnt;
__u64 finfo_len = actual_rec_size * finfo_cnt;
const void *orecord;
void *nrecord;
int i;
finfo = malloc(finfo_len);
if (!finfo)
/* further try with log buffer won't help */
return fd;
/* zero out bytes kernel does not understand */
orecord = load_attr->func_info;
nrecord = finfo;
for (i = 0; i < load_attr->func_info_cnt; i++) {
memcpy(nrecord, orecord, expected_rec_size);
memset(nrecord + expected_rec_size, 0,
actual_rec_size - expected_rec_size);
orecord += actual_rec_size;
nrecord += actual_rec_size;
}
/* try with corrected func info records */
attr.func_info = ptr_to_u64(finfo);
attr.func_info_rec_size = load_attr->func_info_rec_size;
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (fd >= 0 || !log_buf || !log_buf_sz)
goto done;
}
/* Try again with log */
attr.log_buf = ptr_to_u64(log_buf);
attr.log_size = log_buf_sz;
attr.log_level = 1;
log_buf[0] = 0;
return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
done:
free(finfo);
return fd;
}
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
......
......@@ -27,6 +27,10 @@
#include <stdbool.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
......@@ -74,6 +78,10 @@ struct bpf_load_program_attr {
const char *license;
__u32 kern_version;
__u32 prog_ifindex;
__u32 prog_btf_fd;
__u32 func_info_rec_size;
const void *func_info;
__u32 func_info_cnt;
};
/* Flags to direct loading requirements */
......@@ -128,4 +136,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __LIBBPF_BPF_H */
......@@ -37,6 +37,23 @@ struct btf {
int fd;
};
struct btf_ext {
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_len;
};
/* The minimum bpf_func_info checked by the loader */
struct bpf_func_info_min {
__u32 insn_offset;
__u32 type_id;
};
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
static int btf_add_type(struct btf *btf, struct btf_type *t)
{
if (btf->types_size - btf->nr_types < 2) {
......@@ -165,6 +182,10 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
case BTF_KIND_ENUM:
next_type += vlen * sizeof(struct btf_enum);
break;
case BTF_KIND_FUNC_PROTO:
next_type += vlen * sizeof(struct btf_param);
break;
case BTF_KIND_FUNC:
case BTF_KIND_TYPEDEF:
case BTF_KIND_PTR:
case BTF_KIND_FWD:
......@@ -393,3 +414,329 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
else
return NULL;
}
int btf_get_from_id(__u32 id, struct btf **btf)
{
struct bpf_btf_info btf_info = { 0 };
__u32 len = sizeof(btf_info);
__u32 last_size;
int btf_fd;
void *ptr;
int err;
err = 0;
*btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
return 0;
/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
* let's start with a sane default - 4KiB here - and resize it only if
* bpf_obj_get_info_by_fd() needs a bigger buffer.
*/
btf_info.btf_size = 4096;
last_size = btf_info.btf_size;
ptr = malloc(last_size);
if (!ptr) {
err = -ENOMEM;
goto exit_free;
}
bzero(ptr, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
if (!err && btf_info.btf_size > last_size) {
void *temp_ptr;
last_size = btf_info.btf_size;
temp_ptr = realloc(ptr, last_size);
if (!temp_ptr) {
err = -ENOMEM;
goto exit_free;
}
ptr = temp_ptr;
bzero(ptr, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
}
if (err || btf_info.btf_size > last_size) {
err = errno;
goto exit_free;
}
*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
if (IS_ERR(*btf)) {
err = PTR_ERR(*btf);
*btf = NULL;
}
exit_free:
close(btf_fd);
free(ptr);
return err;
}
static int btf_ext_validate_func_info(const void *finfo, __u32 size,
btf_print_fn_t err_log)
{
int sec_hdrlen = sizeof(struct btf_sec_func_info);
__u32 size_left, num_records, record_size;
const struct btf_sec_func_info *sinfo;
__u64 total_record_size;
/* At least a func_info record size */
if (size < sizeof(__u32)) {
elog("BTF.ext func_info record size not found");
return -EINVAL;
}
/* The record size needs to meet below minimum standard */
record_size = *(__u32 *)finfo;
if (record_size < sizeof(struct bpf_func_info_min) ||
record_size % sizeof(__u32)) {
elog("BTF.ext func_info invalid record size");
return -EINVAL;
}
sinfo = finfo + sizeof(__u32);
size_left = size - sizeof(__u32);
/* If no func_info records, return failure now so .BTF.ext
* won't be used.
*/
if (!size_left) {
elog("BTF.ext no func info records");
return -EINVAL;
}
while (size_left) {
if (size_left < sec_hdrlen) {
elog("BTF.ext func_info header not found");
return -EINVAL;
}
num_records = sinfo->num_func_info;
if (num_records == 0) {
elog("incorrect BTF.ext num_func_info");
return -EINVAL;
}
total_record_size = sec_hdrlen +
(__u64)num_records * record_size;
if (size_left < total_record_size) {
elog("incorrect BTF.ext num_func_info");
return -EINVAL;
}
size_left -= total_record_size;
sinfo = (void *)sinfo + total_record_size;
}
return 0;
}
static int btf_ext_parse_hdr(__u8 *data, __u32 data_size,
btf_print_fn_t err_log)
{
const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
__u32 meta_left, last_func_info_pos;
void *finfo;
if (data_size < offsetof(struct btf_ext_header, func_info_off) ||
data_size < hdr->hdr_len) {
elog("BTF.ext header not found");
return -EINVAL;
}
if (hdr->magic != BTF_MAGIC) {
elog("Invalid BTF.ext magic:%x\n", hdr->magic);
return -EINVAL;
}
if (hdr->version != BTF_VERSION) {
elog("Unsupported BTF.ext version:%u\n", hdr->version);
return -ENOTSUP;
}
if (hdr->flags) {
elog("Unsupported BTF.ext flags:%x\n", hdr->flags);
return -ENOTSUP;
}
meta_left = data_size - hdr->hdr_len;
if (!meta_left) {
elog("BTF.ext has no data\n");
return -EINVAL;
}
if (meta_left < hdr->func_info_off) {
elog("Invalid BTF.ext func_info section offset:%u\n",
hdr->func_info_off);
return -EINVAL;
}
if (hdr->func_info_off & 0x03) {
elog("BTF.ext func_info section is not aligned to 4 bytes\n");
return -EINVAL;
}
last_func_info_pos = hdr->hdr_len + hdr->func_info_off +
hdr->func_info_len;
if (last_func_info_pos > data_size) {
elog("Invalid BTF.ext func_info section size:%u\n",
hdr->func_info_len);
return -EINVAL;
}
finfo = data + hdr->hdr_len + hdr->func_info_off;
return btf_ext_validate_func_info(finfo, hdr->func_info_len,
err_log);
}
void btf_ext__free(struct btf_ext *btf_ext)
{
if (!btf_ext)
return;
free(btf_ext->func_info);
free(btf_ext);
}
struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
{
const struct btf_ext_header *hdr;
struct btf_ext *btf_ext;
void *org_fdata, *fdata;
__u32 hdrlen, size_u32;
int err;
err = btf_ext_parse_hdr(data, size, err_log);
if (err)
return ERR_PTR(err);
btf_ext = calloc(1, sizeof(struct btf_ext));
if (!btf_ext)
return ERR_PTR(-ENOMEM);
hdr = (const struct btf_ext_header *)data;
hdrlen = hdr->hdr_len;
size_u32 = sizeof(__u32);
fdata = malloc(hdr->func_info_len - size_u32);
if (!fdata) {
free(btf_ext);
return ERR_PTR(-ENOMEM);
}
/* remember record size and copy rest of func_info data */
org_fdata = data + hdrlen + hdr->func_info_off;
btf_ext->func_info_rec_size = *(__u32 *)org_fdata;
memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32);
btf_ext->func_info = fdata;
btf_ext->func_info_len = hdr->func_info_len - size_u32;
return btf_ext;
}
int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext,
const char *sec_name, void **func_info,
__u32 *func_info_rec_size, __u32 *func_info_len)
{
__u32 sec_hdrlen = sizeof(struct btf_sec_func_info);
__u32 i, record_size, records_len;
struct btf_sec_func_info *sinfo;
const char *info_sec_name;
__s64 remain_len;
void *data;
record_size = btf_ext->func_info_rec_size;
sinfo = btf_ext->func_info;
remain_len = btf_ext->func_info_len;
while (remain_len > 0) {
records_len = sinfo->num_func_info * record_size;
info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
if (strcmp(info_sec_name, sec_name)) {
remain_len -= sec_hdrlen + records_len;
sinfo = (void *)sinfo + sec_hdrlen + records_len;
continue;
}
data = malloc(records_len);
if (!data)
return -ENOMEM;
memcpy(data, sinfo->data, records_len);
/* adjust the insn_offset, the data in .BTF.ext is
* the actual byte offset, and the kernel expects
* the offset in term of bpf_insn.
*
* adjust the insn offset only, the rest data will
* be passed to kernel.
*/
for (i = 0; i < sinfo->num_func_info; i++) {
struct bpf_func_info_min *record;
record = data + i * record_size;
record->insn_offset /= sizeof(struct bpf_insn);
}
*func_info = data;
*func_info_len = records_len;
*func_info_rec_size = record_size;
return 0;
}
return -EINVAL;
}
int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **func_info, __u32 *func_info_len)
{
__u32 sec_hdrlen = sizeof(struct btf_sec_func_info);
__u32 i, record_size, existing_flen, records_len;
struct btf_sec_func_info *sinfo;
const char *info_sec_name;
__u64 remain_len;
void *data;
record_size = btf_ext->func_info_rec_size;
sinfo = btf_ext->func_info;
remain_len = btf_ext->func_info_len;
while (remain_len > 0) {
records_len = sinfo->num_func_info * record_size;
info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
if (strcmp(info_sec_name, sec_name)) {
remain_len -= sec_hdrlen + records_len;
sinfo = (void *)sinfo + sec_hdrlen + records_len;
continue;
}
existing_flen = *func_info_len;
data = realloc(*func_info, existing_flen + records_len);
if (!data)
return -ENOMEM;
memcpy(data + existing_flen, sinfo->data, records_len);
/* adjust insn_offset only, the rest data will be passed
* to the kernel.
*/
for (i = 0; i < sinfo->num_func_info; i++) {
struct bpf_func_info_min *record;
record = data + existing_flen + i * record_size;
record->insn_offset =
record->insn_offset / sizeof(struct bpf_insn) +
insns_cnt;
}
*func_info = data;
*func_info_len = existing_flen + records_len;
return 0;
}
return -EINVAL;
}
......@@ -6,15 +6,60 @@
#include <linux/types.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
#define BTF_ELF_SEC ".BTF"
#define BTF_EXT_ELF_SEC ".BTF.ext"
struct btf;
struct btf_ext;
struct btf_type;
/*
* The .BTF.ext ELF section layout defined as
* struct btf_ext_header
* func_info subsection
*
* The func_info subsection layout:
* record size for struct bpf_func_info in the func_info subsection
* struct btf_sec_func_info for section #1
* a list of bpf_func_info records for section #1
* where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
* but may not be identical
* struct btf_sec_func_info for section #2
* a list of bpf_func_info records for section #2
* ......
*
* Note that the bpf_func_info record size in .BTF.ext may not
* be the same as the one defined in include/uapi/linux/bpf.h.
* The loader should ensure that record_size meets minimum
* requirement and pass the record as is to the kernel. The
* kernel will handle the func_info properly based on its contents.
*/
struct btf_ext_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 func_info_off;
__u32 func_info_len;
};
struct btf_sec_func_info {
__u32 sec_name_off;
__u32 num_func_info;
/* Followed by num_func_info number of bpf func_info records */
__u8 data[0];
};
typedef int (*btf_print_fn_t)(const char *, ...)
__attribute__((format(printf, 1, 2)));
......@@ -28,5 +73,19 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf);
struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
void btf_ext__free(struct btf_ext *btf_ext);
int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext,
const char *sec_name, void **func_info,
__u32 *func_info_rec_size, __u32 *func_info_len);
int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt, void **func_info,
__u32 *func_info_len);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __LIBBPF_BTF_H */
This diff is collapsed.
......@@ -16,6 +16,10 @@
#include <sys/types.h> // for size_t
#include <linux/bpf.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
......@@ -71,6 +75,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
size_t obj_buf_sz,
const char *name);
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
const char *path);
LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
LIBBPF_API void bpf_object__close(struct bpf_object *object);
......@@ -112,6 +123,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
(pos) != NULL; \
(pos) = bpf_program__next((pos), (obj)))
LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
struct bpf_object *obj);
typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
void *);
......@@ -131,7 +145,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
const char *path,
int instance);
LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
const char *path,
int instance);
LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_insn;
......@@ -260,6 +278,9 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
(pos) != NULL; \
(pos) = bpf_map__next((pos), (obj)))
LIBBPF_API struct bpf_map *
bpf_map__prev(struct bpf_map *map, struct bpf_object *obj);
LIBBPF_API int bpf_map__fd(struct bpf_map *map);
LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
......@@ -274,6 +295,9 @@ LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
LIBBPF_API long libbpf_get_error(const void *ptr);
......@@ -317,4 +341,9 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __LIBBPF_LIBBPF_H */
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
/* do nothing, just make sure we can link successfully */
int main(int argc, char *argv[])
{
/* libbpf.h */
libbpf_set_print(NULL, NULL, NULL);
/* bpf.h */
bpf_prog_get_fd_by_id(0);
/* btf.h */
btf__new(NULL, 0, NULL);
}
......@@ -24,12 +24,13 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt
test_netcnt test_tcpnotify_user
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
test_tcpnotify_kern.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
......@@ -38,7 +39,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \
xdp_dummy.o
xdp_dummy.o test_map_in_map.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
......@@ -75,6 +76,7 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
......@@ -125,7 +127,14 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
$(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
readelf -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
ifneq ($(BTF_LLVM_PROBE),)
CLANG_FLAGS += -g
else
ifneq ($(BTF_LLC_PROBE),)
ifneq ($(BTF_PAHOLE_PROBE),)
ifneq ($(BTF_OBJCOPY_PROBE),)
......@@ -135,6 +144,17 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
endif
endif
endif
endif
# Have one program compiled without "-target bpf" to test whether libbpf loads
# it successfully
$(OUTPUT)/test_xdp.o: test_xdp.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
$(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
......
......@@ -116,7 +116,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
return BPF_DROP;
}
SEC("dissect")
SEC("flow_dissector")
int _dissect(struct __sk_buff *skb)
{
if (!skb->vlan_present)
......
......@@ -21,14 +21,40 @@ int _version SEC("version") = 1;
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock_tuple tuple = {};
struct sockaddr_in sa;
struct bpf_sock *sk;
/* Verify that new destination is available. */
memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0);
else
sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0);
if (!sk)
return 0;
if (sk->src_ip4 != tuple.ipv4.daddr ||
sk->src_port != DST_REWRITE_PORT4) {
bpf_sk_release(sk);
return 0;
}
bpf_sk_release(sk);
/* Rewrite destination. */
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
///* Rewrite source. */
/* Rewrite source. */
memset(&sa, 0, sizeof(sa));
sa.sin_family = AF_INET;
......@@ -37,7 +63,6 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
}
return 1;
}
......
......@@ -29,7 +29,41 @@ int _version SEC("version") = 1;
SEC("cgroup/connect6")
int connect_v6_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock_tuple tuple = {};
struct sockaddr_in6 sa;
struct bpf_sock *sk;
/* Verify that new destination is available. */
memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr));
memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1);
tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2);
tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3);
tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6);
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0);
else
sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0);
if (!sk)
return 0;
if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
sk->src_port != DST_REWRITE_PORT6) {
bpf_sk_release(sk);
return 0;
}
bpf_sk_release(sk);
/* Rewrite destination. */
ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
......@@ -39,7 +73,6 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
/* Rewrite source. */
memset(&sa, 0, sizeof(sa));
......@@ -53,7 +86,6 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
}
return 1;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment