Commit 73376328 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-token-support-in-libbpf-s-bpf-object'

Andrii Nakryiko says:

====================
BPF token support in libbpf's BPF object

Add fuller support for BPF token in high-level BPF object APIs. This is the
most frequently used way to work with BPF using libbpf, so supporting BPF
token there is critical.

Patch #1 is improving kernel-side BPF_TOKEN_CREATE behavior by rejecting to
create "empty" BPF token with no delegation. This seems like saner behavior
which also makes libbpf's caching better overall. If we ever want to create
BPF token with no delegate_xxx options set on BPF FS, we can use a new flag to
enable that.

Patches #2-#5 refactor libbpf internals, mostly feature detection code, to
prepare it from BPF token FD.

Patch #6 adds options to pass BPF token into BPF object open options. It also
adds implicit BPF token creation logic to BPF object load step, even without
any explicit involvement of the user. If the environment is setup properly,
BPF token will be created transparently and used implicitly. This allows for
all existing application to gain BPF token support by just linking with
latest version of libbpf library. No source code modifications are required.
All that under assumption that privileged container management agent properly
set up default BPF FS instance at /sys/bpf/fs to allow BPF token creation.

Patches #7-#8 adds more selftests, validating BPF object APIs work as expected
under unprivileged user namespaced conditions in the presence of BPF token.

Patch #9 extends libbpf with LIBBPF_BPF_TOKEN_PATH envvar knowledge, which can
be used to override custom BPF FS location used for implicit BPF token
creation logic without needing to adjust application code. This allows admins
or container managers to mount BPF token-enabled BPF FS at non-standard
location without the need to coordinate with applications.
LIBBPF_BPF_TOKEN_PATH can also be used to disable BPF token implicit creation
by setting it to an empty value. Patch #10 tests this new envvar functionality.

v2->v3:
  - move some stray feature cache refactorings into patch #4 (Alexei);
  - add LIBBPF_BPF_TOKEN_PATH envvar support (Alexei);
v1->v2:
  - remove minor code redundancies (Eduard, John);
  - add acks and rebase.
====================

Link: https://lore.kernel.org/r/20231213190842.3844987-1-andrii@kernel.orgSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f04f2ce6 322122bf
......@@ -152,6 +152,15 @@ int bpf_token_create(union bpf_attr *attr)
goto out_path;
}
mnt_opts = path.dentry->d_sb->s_fs_info;
if (mnt_opts->delegate_cmds == 0 &&
mnt_opts->delegate_maps == 0 &&
mnt_opts->delegate_progs == 0 &&
mnt_opts->delegate_attachs == 0) {
err = -ENOENT; /* no BPF token delegation is set up */
goto out_path;
}
mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
if (IS_ERR(inode)) {
......@@ -181,7 +190,6 @@ int bpf_token_create(union bpf_attr *attr)
/* remember bpffs owning userns for future ns_capable() checks */
token->userns = get_user_ns(userns);
mnt_opts = path.dentry->d_sb->s_fs_info;
token->allowed_cmds = mnt_opts->delegate_cmds;
token->allowed_maps = mnt_opts->delegate_maps;
token->allowed_progs = mnt_opts->delegate_progs;
......
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o zip.o elf.o
usdt.o zip.o elf.o features.o
......@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
* [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
* [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
*/
int probe_memcg_account(void)
int probe_memcg_account(int token_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = {
......@@ -120,6 +120,7 @@ int probe_memcg_account(void)
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL");
attr.prog_token_fd = token_fd;
prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) {
......@@ -146,7 +147,7 @@ int bump_rlimit_memlock(void)
struct rlimit rlim;
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
return 0;
memlock_bumped = true;
......@@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL);
attr.map_type = map_type;
if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
......@@ -265,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.kern_version = OPTS_GET(opts, kern_version, 0);
attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license);
......
......@@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd)
{
LIBBPF_OPTS(bpf_btf_load_opts, opts);
__u32 buf_sz = 0, raw_size;
......@@ -1367,6 +1369,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo
opts.log_level = log_level;
}
opts.token_fd = token_fd;
btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
/* time to turn on verbose mode and try again */
......@@ -1394,7 +1397,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo
int btf__load_into_kernel(struct btf *btf)
{
return btf_load_into_kernel(btf, NULL, 0, 0);
return btf_load_into_kernel(btf, NULL, 0, 0, 0);
}
int btf__fd(const struct btf *btf)
......
......@@ -11,8 +11,6 @@
#include "libbpf_internal.h"
#include "str_error.h"
#define STRERR_BUFSIZE 128
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
* explicit version number. This is a GNU extension.
......
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include <linux/kernel.h>
#include <linux/filter.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_common.h"
#include "libbpf_internal.h"
#include "str_error.h"
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64)(unsigned long)ptr;
}
static int probe_fd(int fd)
{
if (fd >= 0)
close(fd);
return fd >= 0;
}
static int probe_kern_prog_name(int token_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
union bpf_attr attr;
int ret;
memset(&attr, 0, attr_sz);
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
attr.license = ptr_to_u64("GPL");
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
attr.prog_token_fd = token_fd;
libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
/* make sure loading with name works */
ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
return probe_fd(ret);
}
static int probe_kern_global_data(int token_fd)
{
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
int ret, map, insn_cnt = ARRAY_SIZE(insns);
map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
__func__, cp, -ret);
return ret;
}
insns[0].imm = map;
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
close(map);
return probe_fd(ret);
}
static int probe_kern_btf(int token_fd)
{
static const char strs[] = "\0int";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_func(int token_fd)
{
static const char strs[] = "\0int\0x\0a";
/* void x(int a) {} */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* FUNC_PROTO */ /* [2] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
BTF_PARAM_ENC(7, 1),
/* FUNC x */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_func_global(int token_fd)
{
static const char strs[] = "\0int\0x\0a";
/* static void x(int a) {} */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* FUNC_PROTO */ /* [2] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
BTF_PARAM_ENC(7, 1),
/* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_datasec(int token_fd)
{
static const char strs[] = "\0x\0.data";
/* static int a; */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* VAR x */ /* [2] */
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
BTF_VAR_STATIC,
/* DATASEC val */ /* [3] */
BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
BTF_VAR_SECINFO_ENC(2, 0, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_float(int token_fd)
{
static const char strs[] = "\0float";
__u32 types[] = {
/* float */
BTF_TYPE_FLOAT_ENC(1, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_decl_tag(int token_fd)
{
static const char strs[] = "\0tag";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* VAR x */ /* [2] */
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
BTF_VAR_STATIC,
/* attr */
BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_btf_type_tag(int token_fd)
{
static const char strs[] = "\0tag";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* attr */
BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
/* ptr */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
static int probe_kern_array_mmap(int token_fd)
{
LIBBPF_OPTS(bpf_map_create_opts, opts,
.map_flags = BPF_F_MMAPABLE,
.token_fd = token_fd,
);
int fd;
fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
return probe_fd(fd);
}
static int probe_kern_exp_attach_type(int token_fd)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
.token_fd = token_fd,
);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int fd, insn_cnt = ARRAY_SIZE(insns);
/* use any valid combination of program type and (optional)
* non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
* to see if kernel supports expected_attach_type field for
* BPF_PROG_LOAD command
*/
fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
return probe_fd(fd);
}
static int probe_kern_probe_read_kernel(int token_fd)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
struct bpf_insn insns[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
};
int fd, insn_cnt = ARRAY_SIZE(insns);
fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
return probe_fd(fd);
}
static int probe_prog_bind_map(int token_fd)
{
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
__func__, cp, -ret);
return ret;
}
prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
if (prog < 0) {
close(map);
return 0;
}
ret = bpf_prog_bind_map(prog, map, NULL);
close(map);
close(prog);
return ret >= 0;
}
static int probe_module_btf(int token_fd)
{
static const char strs[] = "\0int";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
};
struct bpf_btf_info info;
__u32 len = sizeof(info);
char name[16];
int fd, err;
fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
if (fd < 0)
return 0; /* BTF not supported at all */
memset(&info, 0, sizeof(info));
info.name = ptr_to_u64(name);
info.name_len = sizeof(name);
/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
* kernel's module BTF support coincides with support for
* name/name_len fields in struct bpf_btf_info.
*/
err = bpf_btf_get_info_by_fd(fd, &info, &len);
close(fd);
return !err;
}
static int probe_perf_link(int token_fd)
{
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
int prog_fd, link_fd, err;
prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
insns, ARRAY_SIZE(insns), &opts);
if (prog_fd < 0)
return -errno;
/* use invalid perf_event FD to get EBADF, if link is supported;
* otherwise EINVAL should be returned
*/
link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return link_fd < 0 && err == -EBADF;
}
static int probe_uprobe_multi_link(int token_fd)
{
LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
.token_fd = token_fd,
);
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int prog_fd, link_fd, err;
unsigned long offset = 0;
prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
insns, ARRAY_SIZE(insns), &load_opts);
if (prog_fd < 0)
return -errno;
/* Creating uprobe in '/' binary should fail with -EBADF. */
link_opts.uprobe_multi.path = "/";
link_opts.uprobe_multi.offsets = &offset;
link_opts.uprobe_multi.cnt = 1;
link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return link_fd < 0 && err == -EBADF;
}
static int probe_kern_bpf_cookie(int token_fd)
{
struct bpf_insn insns[] = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
BPF_EXIT_INSN(),
};
LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
int ret, insn_cnt = ARRAY_SIZE(insns);
ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
return probe_fd(ret);
}
static int probe_kern_btf_enum64(int token_fd)
{
static const char strs[] = "\0enum64";
__u32 types[] = {
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs), token_fd));
}
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
static struct kern_feature_desc {
const char *desc;
feature_probe_fn probe;
} feature_probes[__FEAT_CNT] = {
[FEAT_PROG_NAME] = {
"BPF program name", probe_kern_prog_name,
},
[FEAT_GLOBAL_DATA] = {
"global variables", probe_kern_global_data,
},
[FEAT_BTF] = {
"minimal BTF", probe_kern_btf,
},
[FEAT_BTF_FUNC] = {
"BTF functions", probe_kern_btf_func,
},
[FEAT_BTF_GLOBAL_FUNC] = {
"BTF global function", probe_kern_btf_func_global,
},
[FEAT_BTF_DATASEC] = {
"BTF data section and variable", probe_kern_btf_datasec,
},
[FEAT_ARRAY_MMAP] = {
"ARRAY map mmap()", probe_kern_array_mmap,
},
[FEAT_EXP_ATTACH_TYPE] = {
"BPF_PROG_LOAD expected_attach_type attribute",
probe_kern_exp_attach_type,
},
[FEAT_PROBE_READ_KERN] = {
"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
},
[FEAT_PROG_BIND_MAP] = {
"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
},
[FEAT_MODULE_BTF] = {
"module BTF support", probe_module_btf,
},
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
[FEAT_PERF_LINK] = {
"BPF perf link support", probe_perf_link,
},
[FEAT_BTF_DECL_TAG] = {
"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
},
[FEAT_BTF_TYPE_TAG] = {
"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
},
[FEAT_MEMCG_ACCOUNT] = {
"memcg-based memory accounting", probe_memcg_account,
},
[FEAT_BPF_COOKIE] = {
"BPF cookie support", probe_kern_bpf_cookie,
},
[FEAT_BTF_ENUM64] = {
"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
},
[FEAT_SYSCALL_WRAPPER] = {
"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
},
[FEAT_UPROBE_MULTI_LINK] = {
"BPF multi-uprobe link support", probe_uprobe_multi_link,
},
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
{
struct kern_feature_desc *feat = &feature_probes[feat_id];
int ret;
/* assume global feature cache, unless custom one is provided */
if (!cache)
cache = &feature_cache;
if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
ret = feat->probe(cache->token_fd);
if (ret > 0) {
WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
} else if (ret == 0) {
WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
} else {
pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
}
}
return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
}
......@@ -59,6 +59,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
......@@ -693,6 +695,10 @@ struct bpf_object {
struct usdt_manager *usdt_man;
struct kern_feature_cache *feat_cache;
char *token_path;
int token_fd;
char path[];
};
......@@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
int err;
if (!path)
path = "/sys/fs/bpf";
path = BPF_FS_DEFAULT_PATH;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
......@@ -3279,7 +3285,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
} else {
/* currently BPF_BTF_LOAD only supports log_level 1 */
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
obj->log_level ? 1 : 0);
obj->log_level ? 1 : 0, obj->token_fd);
}
if (sanitize) {
if (!err) {
......@@ -4602,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
return 0;
}
static int bpf_object_prepare_token(struct bpf_object *obj)
{
const char *bpffs_path;
int bpffs_fd = -1, token_fd, err;
bool mandatory;
enum libbpf_print_level level;
/* token is already set up */
if (obj->token_fd > 0)
return 0;
/* token is explicitly prevented */
if (obj->token_fd < 0) {
pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
/* reset to zero to avoid extra checks during map_create and prog_load steps */
obj->token_fd = 0;
return 0;
}
mandatory = obj->token_path != NULL;
level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
if (bpffs_fd < 0) {
err = -errno;
__pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
obj->name, err, bpffs_path,
mandatory ? "" : ", skipping optional step...");
return mandatory ? err : 0;
}
token_fd = bpf_token_create(bpffs_fd, 0);
close(bpffs_fd);
if (token_fd < 0) {
if (!mandatory && token_fd == -ENOENT) {
pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
obj->name, bpffs_path);
return 0;
}
__pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
obj->name, token_fd, bpffs_path,
mandatory ? "" : ", skipping optional step...");
return mandatory ? token_fd : 0;
}
obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
if (!obj->feat_cache) {
close(token_fd);
return -ENOMEM;
}
obj->token_fd = token_fd;
obj->feat_cache->token_fd = token_fd;
return 0;
}
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
......@@ -4611,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj)
BPF_EXIT_INSN(),
};
int ret, insn_cnt = ARRAY_SIZE(insns);
LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd);
if (obj->gen_loader)
return 0;
......@@ -4620,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
/* make sure basic loading works */
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0)
ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
......@@ -4637,462 +4701,18 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
static int probe_fd(int fd)
{
if (fd >= 0)
close(fd);
return fd >= 0;
}
static int probe_kern_prog_name(void)
{
const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
union bpf_attr attr;
int ret;
memset(&attr, 0, attr_sz);
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
attr.license = ptr_to_u64("GPL");
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
/* make sure loading with name works */
ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
return probe_fd(ret);
}
static int probe_kern_global_data(void)
{
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int ret, map, insn_cnt = ARRAY_SIZE(insns);
map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
__func__, cp, -ret);
return ret;
}
insns[0].imm = map;
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
close(map);
return probe_fd(ret);
}
static int probe_kern_btf(void)
{
static const char strs[] = "\0int";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_func(void)
{
static const char strs[] = "\0int\0x\0a";
/* void x(int a) {} */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* FUNC_PROTO */ /* [2] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
BTF_PARAM_ENC(7, 1),
/* FUNC x */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_func_global(void)
{
static const char strs[] = "\0int\0x\0a";
/* static void x(int a) {} */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* FUNC_PROTO */ /* [2] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
BTF_PARAM_ENC(7, 1),
/* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_datasec(void)
{
static const char strs[] = "\0x\0.data";
/* static int a; */
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* VAR x */ /* [2] */
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
BTF_VAR_STATIC,
/* DATASEC val */ /* [3] */
BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
BTF_VAR_SECINFO_ENC(2, 0, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_float(void)
{
static const char strs[] = "\0float";
__u32 types[] = {
/* float */
BTF_TYPE_FLOAT_ENC(1, 4),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_decl_tag(void)
{
static const char strs[] = "\0tag";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* VAR x */ /* [2] */
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
BTF_VAR_STATIC,
/* attr */
BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_btf_type_tag(void)
{
static const char strs[] = "\0tag";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* attr */
BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
/* ptr */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_array_mmap(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
int fd;
fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
return probe_fd(fd);
}
static int probe_kern_exp_attach_type(void)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int fd, insn_cnt = ARRAY_SIZE(insns);
/* use any valid combination of program type and (optional)
* non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
* to see if kernel supports expected_attach_type field for
* BPF_PROG_LOAD command
*/
fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
return probe_fd(fd);
}
static int probe_kern_probe_read_kernel(void)
{
struct bpf_insn insns[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
};
int fd, insn_cnt = ARRAY_SIZE(insns);
fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
return probe_fd(fd);
}
static int probe_prog_bind_map(void)
{
char *cp, errmsg[STRERR_BUFSIZE];
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
__func__, cp, -ret);
return ret;
}
prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
if (prog < 0) {
close(map);
return 0;
}
ret = bpf_prog_bind_map(prog, map, NULL);
close(map);
close(prog);
return ret >= 0;
}
static int probe_module_btf(void)
{
static const char strs[] = "\0int";
__u32 types[] = {
/* int */
BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
};
struct bpf_btf_info info;
__u32 len = sizeof(info);
char name[16];
int fd, err;
fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
if (fd < 0)
return 0; /* BTF not supported at all */
memset(&info, 0, sizeof(info));
info.name = ptr_to_u64(name);
info.name_len = sizeof(name);
/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
* kernel's module BTF support coincides with support for
* name/name_len fields in struct bpf_btf_info.
*/
err = bpf_btf_get_info_by_fd(fd, &info, &len);
close(fd);
return !err;
}
static int probe_perf_link(void)
{
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int prog_fd, link_fd, err;
prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
insns, ARRAY_SIZE(insns), NULL);
if (prog_fd < 0)
return -errno;
/* use invalid perf_event FD to get EBADF, if link is supported;
* otherwise EINVAL should be returned
*/
link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return link_fd < 0 && err == -EBADF;
}
static int probe_uprobe_multi_link(void)
{
LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
);
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
int prog_fd, link_fd, err;
unsigned long offset = 0;
prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
insns, ARRAY_SIZE(insns), &load_opts);
if (prog_fd < 0)
return -errno;
/* Creating uprobe in '/' binary should fail with -EBADF. */
link_opts.uprobe_multi.path = "/";
link_opts.uprobe_multi.offsets = &offset;
link_opts.uprobe_multi.cnt = 1;
link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
err = -errno; /* close() can clobber errno */
if (link_fd >= 0)
close(link_fd);
close(prog_fd);
return link_fd < 0 && err == -EBADF;
}
static int probe_kern_bpf_cookie(void)
{
struct bpf_insn insns[] = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
BPF_EXIT_INSN(),
};
int ret, insn_cnt = ARRAY_SIZE(insns);
ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
return probe_fd(ret);
}
static int probe_kern_btf_enum64(void)
{
static const char strs[] = "\0enum64";
__u32 types[] = {
BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
};
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs)));
}
static int probe_kern_syscall_wrapper(void);
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
FEAT_MISSING = 2,
};
typedef int (*feature_probe_fn)(void);
static struct kern_feature_desc {
const char *desc;
feature_probe_fn probe;
enum kern_feature_result res;
} feature_probes[__FEAT_CNT] = {
[FEAT_PROG_NAME] = {
"BPF program name", probe_kern_prog_name,
},
[FEAT_GLOBAL_DATA] = {
"global variables", probe_kern_global_data,
},
[FEAT_BTF] = {
"minimal BTF", probe_kern_btf,
},
[FEAT_BTF_FUNC] = {
"BTF functions", probe_kern_btf_func,
},
[FEAT_BTF_GLOBAL_FUNC] = {
"BTF global function", probe_kern_btf_func_global,
},
[FEAT_BTF_DATASEC] = {
"BTF data section and variable", probe_kern_btf_datasec,
},
[FEAT_ARRAY_MMAP] = {
"ARRAY map mmap()", probe_kern_array_mmap,
},
[FEAT_EXP_ATTACH_TYPE] = {
"BPF_PROG_LOAD expected_attach_type attribute",
probe_kern_exp_attach_type,
},
[FEAT_PROBE_READ_KERN] = {
"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
},
[FEAT_PROG_BIND_MAP] = {
"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
},
[FEAT_MODULE_BTF] = {
"module BTF support", probe_module_btf,
},
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
[FEAT_PERF_LINK] = {
"BPF perf link support", probe_perf_link,
},
[FEAT_BTF_DECL_TAG] = {
"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
},
[FEAT_BTF_TYPE_TAG] = {
"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
},
[FEAT_MEMCG_ACCOUNT] = {
"memcg-based memory accounting", probe_memcg_account,
},
[FEAT_BPF_COOKIE] = {
"BPF cookie support", probe_kern_bpf_cookie,
},
[FEAT_BTF_ENUM64] = {
"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
},
[FEAT_SYSCALL_WRAPPER] = {
"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
},
[FEAT_UPROBE_MULTI_LINK] = {
"BPF multi-uprobe link support", probe_uprobe_multi_link,
},
};
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
struct kern_feature_desc *feat = &feature_probes[feat_id];
int ret;
if (obj && obj->gen_loader)
/* To generate loader program assume the latest kernel
* to avoid doing extra prog_load, map_create syscalls.
*/
return true;
if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
ret = feat->probe();
if (ret > 0) {
WRITE_ONCE(feat->res, FEAT_SUPPORTED);
} else if (ret == 0) {
WRITE_ONCE(feat->res, FEAT_MISSING);
} else {
pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
WRITE_ONCE(feat->res, FEAT_MISSING);
}
}
if (obj->token_fd)
return feat_supported(obj->feat_cache, feat_id);
return READ_ONCE(feat->res) == FEAT_SUPPORTED;
return feat_supported(NULL, feat_id);
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
......@@ -5211,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.map_flags = def->map_flags;
create_attr.numa_node = map->numa_node;
create_attr.map_extra = map->map_extra;
create_attr.token_fd = obj->token_fd;
if (bpf_map__is_struct_ops(map))
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
......@@ -7046,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
load_attr.attach_btf_id = prog->attach_btf_id;
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
load_attr.token_fd = obj->token_fd;
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(obj);
......@@ -7507,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
const char *obj_name, *kconfig, *btf_tmp_path;
const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
int err, token_fd;
char *log_buf;
size_t log_size;
__u32 log_level;
......@@ -7544,6 +7166,28 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
if (log_size && !log_buf)
return ERR_PTR(-EINVAL);
token_path = OPTS_GET(opts, bpf_token_path, NULL);
token_fd = OPTS_GET(opts, bpf_token_fd, -1);
/* non-empty token path can't be combined with invalid token FD */
if (token_path && token_path[0] != '\0' && token_fd < 0)
return ERR_PTR(-EINVAL);
/* empty token path can't be combined with valid token FD */
if (token_path && token_path[0] == '\0' && token_fd > 0)
return ERR_PTR(-EINVAL);
/* if user didn't specify bpf_token_path/bpf_token_fd explicitly,
* check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as
* bpf_token_path option
*/
if (token_fd == 0 && !token_path)
token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
/* empty token_path is equivalent to invalid token_fd */
if (token_path && token_path[0] == '\0') {
token_path = NULL;
token_fd = -1;
}
if (token_path && strlen(token_path) >= PATH_MAX)
return ERR_PTR(-ENAMETOOLONG);
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
......@@ -7552,6 +7196,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
obj->log_size = log_size;
obj->log_level = log_level;
obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd);
if (token_fd > 0 && obj->token_fd < 0) {
err = -errno;
goto out;
}
if (token_path) {
obj->token_path = strdup(token_path);
if (!obj->token_path) {
err = -ENOMEM;
goto out;
}
}
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
if (btf_tmp_path) {
if (strlen(btf_tmp_path) >= PATH_MAX) {
......@@ -8062,7 +7719,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
if (obj->gen_loader)
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
err = bpf_object__probe_loading(obj);
err = bpf_object_prepare_token(obj);
err = err ? : bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
......@@ -8599,6 +8257,11 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
zfree(&obj->feat_cache);
zfree(&obj->token_path);
if (obj->token_fd > 0)
close(obj->token_fd);
free(obj);
}
......@@ -10612,7 +10275,7 @@ static const char *arch_specific_syscall_pfx(void)
#endif
}
static int probe_kern_syscall_wrapper(void)
int probe_kern_syscall_wrapper(int token_fd)
{
char syscall_name[64];
const char *ksys_pfx;
......
......@@ -177,10 +177,45 @@ struct bpf_object_open_opts {
* logs through its print callback.
*/
__u32 kernel_log_level;
/* FD of a BPF token instantiated by user through bpf_token_create()
* API. BPF object will keep dup()'ed FD internally, so passed token
* FD can be closed after BPF object/skeleton open step.
*
* Setting bpf_token_fd to negative value disables libbpf's automatic
* attempt to create BPF token from default BPF FS mount point
* (/sys/fs/bpf), in case this default behavior is undesirable.
*
* If bpf_token_path and bpf_token_fd are not specified, libbpf will
* consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will
* be taken as a value of bpf_token_path option and will force libbpf
* to either create BPF token from provided custom BPF FS path, or
* will disable implicit BPF token creation, if envvar value is an
* empty string.
*
* bpf_token_path and bpf_token_fd are mutually exclusive and only one
* of those options should be set. Either of them overrides
* LIBBPF_BPF_TOKEN_PATH envvar.
*/
int bpf_token_fd;
/* Path to BPF FS mount point to derive BPF token from.
*
* Created BPF token will be used for all bpf() syscall operations
* that accept BPF token (e.g., map creation, BTF and program loads,
* etc) automatically within instantiated BPF object.
*
* Setting bpf_token_path option to empty string disables libbpf's
* automatic attempt to create BPF token from default BPF FS mount
* point (/sys/fs/bpf), in case this default behavior is undesirable.
*
* bpf_token_path and bpf_token_fd are mutually exclusive and only one
* of those options should be set. Either of them overrides
* LIBBPF_BPF_TOKEN_PATH envvar.
*/
const char *bpf_token_path;
size_t :0;
};
#define bpf_object_open_opts__last_field kernel_log_level
#define bpf_object_open_opts__last_field bpf_token_path
/**
* @brief **bpf_object__open()** creates a bpf_object by opening
......
......@@ -360,15 +360,32 @@ enum kern_feature_id {
__FEAT_CNT,
};
int probe_memcg_account(void);
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
FEAT_MISSING = 2,
};
struct kern_feature_cache {
enum kern_feature_result res[__FEAT_CNT];
int token_fd;
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
int probe_kern_syscall_wrapper(int token_fd);
int probe_memcg_account(int token_fd);
int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len);
int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
const char *str_sec, size_t str_len,
int token_fd);
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
......@@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
* Original FD is not closed or altered in any other way.
* Preserves original FD value, if it's invalid (negative).
*/
static inline int dup_good_fd(int fd)
{
if (fd < 0)
return fd;
return fcntl(fd, F_DUPFD_CLOEXEC, 3);
}
/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
* Takes ownership of the fd passed in, and closes it if calling
* fcntl(fd, F_DUPFD_CLOEXEC, 3).
......@@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd)
if (fd < 0)
return fd;
if (fd < 3) {
fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
fd = dup_good_fd(fd);
saved_errno = errno;
close(old_fd);
errno = saved_errno;
......
......@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
}
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len)
const char *str_sec, size_t str_len,
int token_fd)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
......@@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len,
.str_len = str_len,
};
LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd);
int btf_fd, btf_len;
__u8 *raw_btf;
......@@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf);
return btf_fd;
......@@ -271,7 +273,7 @@ static int load_local_storage_btf(void)
};
return libbpf__load_raw_btf((char *)types, sizeof(types),
strs, sizeof(strs));
strs, sizeof(strs), 0);
}
static int probe_map_create(enum bpf_map_type map_type)
......
......@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H
#define STRERR_BUFSIZE 128
char *libbpf_strerror_r(int err, char *dst, int len);
#endif /* __LIBBPF_STR_ERROR_H */
......@@ -12,8 +12,12 @@
#include <linux/unistd.h>
#include <linux/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/un.h>
#include "priv_map.skel.h"
#include "priv_prog.skel.h"
#include "dummy_st_ops_success.skel.h"
static inline int sys_mount(const char *dev_name, const char *dir_name,
const char *type, unsigned long flags,
......@@ -42,6 +46,13 @@ static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
}
static inline int sys_move_mount(int from_dfd, const char *from_path,
int to_dfd, const char *to_path,
unsigned flags)
{
return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
}
static int drop_priv_caps(__u64 *old_caps)
{
return cap_disable_effective((1ULL << CAP_BPF) |
......@@ -643,6 +654,281 @@ static int userns_prog_load(int mnt_fd)
return err;
}
static int userns_obj_priv_map(int mnt_fd)
{
LIBBPF_OPTS(bpf_object_open_opts, opts);
char buf[256];
struct priv_map *skel;
int err, token_fd;
skel = priv_map__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
priv_map__destroy(skel);
return -EINVAL;
}
/* use bpf_token_path to provide BPF FS path */
snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
opts.bpf_token_path = buf;
skel = priv_map__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
return -EINVAL;
err = priv_map__load(skel);
priv_map__destroy(skel);
if (!ASSERT_OK(err, "obj_token_path_load"))
return -EINVAL;
/* create token and pass it through bpf_token_fd */
token_fd = bpf_token_create(mnt_fd, NULL);
if (!ASSERT_GT(token_fd, 0, "create_token"))
return -EINVAL;
opts.bpf_token_path = NULL;
opts.bpf_token_fd = token_fd;
skel = priv_map__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_token_fd_open"))
return -EINVAL;
/* we can close our token FD, bpf_object owns dup()'ed FD now */
close(token_fd);
err = priv_map__load(skel);
priv_map__destroy(skel);
if (!ASSERT_OK(err, "obj_token_fd_load"))
return -EINVAL;
return 0;
}
static int userns_obj_priv_prog(int mnt_fd)
{
LIBBPF_OPTS(bpf_object_open_opts, opts);
char buf[256];
struct priv_prog *skel;
int err;
skel = priv_prog__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
priv_prog__destroy(skel);
return -EINVAL;
}
/* use bpf_token_path to provide BPF FS path */
snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
opts.bpf_token_path = buf;
skel = priv_prog__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
return -EINVAL;
err = priv_prog__load(skel);
priv_prog__destroy(skel);
if (!ASSERT_OK(err, "obj_token_path_load"))
return -EINVAL;
return 0;
}
/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
* which should cause struct_ops application to fail, as BTF won't be uploaded
* into the kernel, even if STRUCT_OPS programs themselves are allowed
*/
static int validate_struct_ops_load(int mnt_fd, bool expect_success)
{
LIBBPF_OPTS(bpf_object_open_opts, opts);
char buf[256];
struct dummy_st_ops_success *skel;
int err;
snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
opts.bpf_token_path = buf;
skel = dummy_st_ops_success__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
return -EINVAL;
err = dummy_st_ops_success__load(skel);
dummy_st_ops_success__destroy(skel);
if (expect_success) {
if (!ASSERT_OK(err, "obj_token_path_load"))
return -EINVAL;
} else /* expect failure */ {
if (!ASSERT_ERR(err, "obj_token_path_load"))
return -EINVAL;
}
return 0;
}
static int userns_obj_priv_btf_fail(int mnt_fd)
{
return validate_struct_ops_load(mnt_fd, false /* should fail */);
}
static int userns_obj_priv_btf_success(int mnt_fd)
{
return validate_struct_ops_load(mnt_fd, true /* should succeed */);
}
#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
static int userns_obj_priv_implicit_token(int mnt_fd)
{
LIBBPF_OPTS(bpf_object_open_opts, opts);
struct dummy_st_ops_success *skel;
int err;
/* before we mount BPF FS with token delegation, struct_ops skeleton
* should fail to load
*/
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
dummy_st_ops_success__destroy(skel);
return -EINVAL;
}
/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
* token automatically and implicitly
*/
err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
if (!ASSERT_OK(err, "move_mount_bpffs"))
return -EINVAL;
/* disable implicit BPF token creation by setting
* LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
*/
err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
if (!ASSERT_OK(err, "setenv_token_path"))
return -EINVAL;
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
unsetenv(TOKEN_ENVVAR);
dummy_st_ops_success__destroy(skel);
return -EINVAL;
}
unsetenv(TOKEN_ENVVAR);
/* now the same struct_ops skeleton should succeed thanks to libppf
* creating BPF token from /sys/fs/bpf mount point
*/
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
return -EINVAL;
dummy_st_ops_success__destroy(skel);
/* now disable implicit token through empty bpf_token_path, should fail */
opts.bpf_token_path = "";
skel = dummy_st_ops_success__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
return -EINVAL;
err = dummy_st_ops_success__load(skel);
dummy_st_ops_success__destroy(skel);
if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
return -EINVAL;
/* now disable implicit token through negative bpf_token_fd, should fail */
opts.bpf_token_path = NULL;
opts.bpf_token_fd = -1;
skel = dummy_st_ops_success__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
return -EINVAL;
err = dummy_st_ops_success__load(skel);
dummy_st_ops_success__destroy(skel);
if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
return -EINVAL;
return 0;
}
static int userns_obj_priv_implicit_token_envvar(int mnt_fd)
{
LIBBPF_OPTS(bpf_object_open_opts, opts);
struct dummy_st_ops_success *skel;
int err;
/* before we mount BPF FS with token delegation, struct_ops skeleton
* should fail to load
*/
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
dummy_st_ops_success__destroy(skel);
return -EINVAL;
}
/* mount custom BPF FS over custom location, so libbpf can't create
* BPF token implicitly, unless pointed to it through
* LIBBPF_BPF_TOKEN_PATH envvar
*/
rmdir(TOKEN_BPFFS_CUSTOM);
if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
goto err_out;
err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
if (!ASSERT_OK(err, "move_mount_bpffs"))
goto err_out;
/* even though we have BPF FS with delegation, it's not at default
* /sys/fs/bpf location, so we still fail to load until envvar is set up
*/
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
dummy_st_ops_success__destroy(skel);
goto err_out;
}
err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
if (!ASSERT_OK(err, "setenv_token_path"))
goto err_out;
/* now the same struct_ops skeleton should succeed thanks to libppf
* creating BPF token from custom mount point
*/
skel = dummy_st_ops_success__open_and_load();
if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
goto err_out;
dummy_st_ops_success__destroy(skel);
/* now disable implicit token through empty bpf_token_path, envvar
* will be ignored, should fail
*/
opts.bpf_token_path = "";
skel = dummy_st_ops_success__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
goto err_out;
err = dummy_st_ops_success__load(skel);
dummy_st_ops_success__destroy(skel);
if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
goto err_out;
/* now disable implicit token through negative bpf_token_fd, envvar
* will be ignored, should fail
*/
opts.bpf_token_path = NULL;
opts.bpf_token_fd = -1;
skel = dummy_st_ops_success__open_opts(&opts);
if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
goto err_out;
err = dummy_st_ops_success__load(skel);
dummy_st_ops_success__destroy(skel);
if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
goto err_out;
rmdir(TOKEN_BPFFS_CUSTOM);
unsetenv(TOKEN_ENVVAR);
return 0;
err_out:
rmdir(TOKEN_BPFFS_CUSTOM);
unsetenv(TOKEN_ENVVAR);
return -EINVAL;
}
#define bit(n) (1ULL << (n))
void test_token(void)
{
if (test__start_subtest("map_token")) {
......@@ -669,4 +955,65 @@ void test_token(void)
subtest_userns(&opts, userns_prog_load);
}
if (test__start_subtest("obj_priv_map")) {
struct bpffs_opts opts = {
.cmds = bit(BPF_MAP_CREATE),
.maps = bit(BPF_MAP_TYPE_QUEUE),
};
subtest_userns(&opts, userns_obj_priv_map);
}
if (test__start_subtest("obj_priv_prog")) {
struct bpffs_opts opts = {
.cmds = bit(BPF_PROG_LOAD),
.progs = bit(BPF_PROG_TYPE_KPROBE),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_prog);
}
if (test__start_subtest("obj_priv_btf_fail")) {
struct bpffs_opts opts = {
/* disallow BTF loading */
.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_btf_fail);
}
if (test__start_subtest("obj_priv_btf_success")) {
struct bpffs_opts opts = {
/* allow BTF loading */
.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_btf_success);
}
if (test__start_subtest("obj_priv_implicit_token")) {
struct bpffs_opts opts = {
/* allow BTF loading */
.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_implicit_token);
}
if (test__start_subtest("obj_priv_implicit_token_envvar")) {
struct bpffs_opts opts = {
/* allow BTF loading */
.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
}
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_QUEUE);
__uint(max_entries, 1);
__type(value, __u32);
} priv_map SEC(".maps");
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
SEC("kprobe")
int kprobe_prog(void *ctx)
{
return 1;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment