Commit a12a625c authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'libbpf-probing-improvements'

Andrii Nakryiko says:

====================
This patch set refactors libbpf feature probing to be done lazily on as-needed
basis, instead of proactively testing all possible features libbpf knows
about. This allows to scale such detections and mitigations better, without
issuing unnecessary syscalls on each bpf_object__load() call. It's also now
memoized globally, instead of per-bpf_object.

Building on that, libbpf will now detect availability of
bpf_probe_read_kernel() helper (which means also -user and -str variants), and
will sanitize BPF program code by replacing such references to generic
variants (bpf_probe_read[_str]()). This allows to migrate all BPF programs
into proper -kernel/-user probing helpers, without the fear of breaking them
for old kernels.

With that, update BPF_CORE_READ() and related macros to use
bpf_probe_read_kernel(), as it doesn't make much sense to do CO-RE relocations
against user-space types. And the only class of cases in which BPF program
might read kernel type from user-space are UAPI data structures which by
definition are fixed in their memory layout and don't need relocating. This is
exemplified by test_vmlinux test, which is fixed as part of this patch set as
well. BPF_CORE_READ() is useful for chainingg bpf_probe_read_{kernel,user}()
calls together even without relocation, so we might add user-space variants,
if there is a need.

While at making libbpf more useful for older kernels, also improve handling of
a complete lack of BTF support in kernel by not even attempting to load BTF
info into kernel. This eliminates annoying warning about lack of BTF support
in the kernel and map creation retry without BTF. If user is using features
that require kernel BTF support, it will still fail, of course.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 65bb2e0f 68b08647
...@@ -107,7 +107,7 @@ ifeq ($(feature-reallocarray), 0) ...@@ -107,7 +107,7 @@ ifeq ($(feature-reallocarray), 0)
endif endif
# Append required CFLAGS # Append required CFLAGS
override CFLAGS += $(EXTRA_WARNINGS) override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES) override CFLAGS += $(INCLUDES)
......
...@@ -24,7 +24,8 @@ enum bpf_field_info_kind { ...@@ -24,7 +24,8 @@ enum bpf_field_info_kind {
#if __BYTE_ORDER == __LITTLE_ENDIAN #if __BYTE_ORDER == __LITTLE_ENDIAN
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read((void *)dst, \ bpf_probe_read_kernel( \
(void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \ __CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else #else
...@@ -33,7 +34,8 @@ enum bpf_field_info_kind { ...@@ -33,7 +34,8 @@ enum bpf_field_info_kind {
* field byte size * field byte size
*/ */
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ bpf_probe_read_kernel( \
(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
__CORE_RELO(src, fld, BYTE_SIZE), \ __CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#endif #endif
...@@ -42,9 +44,9 @@ enum bpf_field_info_kind { ...@@ -42,9 +44,9 @@ enum bpf_field_info_kind {
* Extract bitfield, identified by s->field, and return its value as u64. * Extract bitfield, identified by s->field, and return its value as u64.
* All this is done in relocatable manner, so bitfield changes such as * All this is done in relocatable manner, so bitfield changes such as
* signedness, bit size, offset changes, this will be handled automatically. * signedness, bit size, offset changes, this will be handled automatically.
* This version of macro is using bpf_probe_read() to read underlying integer * This version of macro is using bpf_probe_read_kernel() to read underlying
* storage. Macro functions as an expression and its return type is * integer storage. Macro functions as an expression and its return type is
* bpf_probe_read()'s return value: 0, on success, <0 on error. * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
*/ */
#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
unsigned long long val = 0; \ unsigned long long val = 0; \
...@@ -99,8 +101,8 @@ enum bpf_field_info_kind { ...@@ -99,8 +101,8 @@ enum bpf_field_info_kind {
__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
/* /*
* bpf_core_read() abstracts away bpf_probe_read() call and captures offset * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
* relocation for source address using __builtin_preserve_access_index() * offset relocation for source address using __builtin_preserve_access_index()
* built-in, provided by Clang. * built-in, provided by Clang.
* *
* __builtin_preserve_access_index() takes as an argument an expression of * __builtin_preserve_access_index() takes as an argument an expression of
...@@ -115,7 +117,7 @@ enum bpf_field_info_kind { ...@@ -115,7 +117,7 @@ enum bpf_field_info_kind {
* (local) BTF, used to record relocation. * (local) BTF, used to record relocation.
*/ */
#define bpf_core_read(dst, sz, src) \ #define bpf_core_read(dst, sz, src) \
bpf_probe_read(dst, sz, \ bpf_probe_read_kernel(dst, sz, \
(const void *)__builtin_preserve_access_index(src)) (const void *)__builtin_preserve_access_index(src))
/* /*
...@@ -124,7 +126,7 @@ enum bpf_field_info_kind { ...@@ -124,7 +126,7 @@ enum bpf_field_info_kind {
* argument. * argument.
*/ */
#define bpf_core_read_str(dst, sz, src) \ #define bpf_core_read_str(dst, sz, src) \
bpf_probe_read_str(dst, sz, \ bpf_probe_read_kernel_str(dst, sz, \
(const void *)__builtin_preserve_access_index(src)) (const void *)__builtin_preserve_access_index(src))
#define ___concat(a, b) a ## b #define ___concat(a, b) a ## b
...@@ -239,15 +241,17 @@ enum bpf_field_info_kind { ...@@ -239,15 +241,17 @@ enum bpf_field_info_kind {
* int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
* *
* BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
* CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
* equivalent to:
* 1. const void *__t = s->a.b.c; * 1. const void *__t = s->a.b.c;
* 2. __t = __t->d.e; * 2. __t = __t->d.e;
* 3. __t = __t->f; * 3. __t = __t->f;
* 4. return __t->g; * 4. return __t->g;
* *
* Equivalence is logical, because there is a heavy type casting/preservation * Equivalence is logical, because there is a heavy type casting/preservation
* involved, as well as all the reads are happening through bpf_probe_read() * involved, as well as all the reads are happening through
* calls using __builtin_preserve_access_index() to emit CO-RE relocations. * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
* emit CO-RE relocations.
* *
* N.B. Only up to 9 "field accessors" are supported, which should be more * N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose. * than enough for any practical purpose.
......
...@@ -289,9 +289,9 @@ struct pt_regs; ...@@ -289,9 +289,9 @@ struct pt_regs;
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else #else
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ #define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read(&(ip), sizeof(ip), \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif #endif
......
This diff is collapsed.
...@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep") ...@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
int handle__tp(struct trace_event_raw_sys_enter *args) int handle__tp(struct trace_event_raw_sys_enter *args)
{ {
struct __kernel_timespec *ts; struct __kernel_timespec *ts;
long tv_nsec;
if (args->id != __NR_nanosleep) if (args->id != __NR_nanosleep)
return 0; return 0;
ts = (void *)args->args[0]; ts = (void *)args->args[0];
if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
tv_nsec != MY_TV_NSEC)
return 0; return 0;
tp_called = true; tp_called = true;
...@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter") ...@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
{ {
struct __kernel_timespec *ts; struct __kernel_timespec *ts;
long tv_nsec;
if (id != __NR_nanosleep) if (id != __NR_nanosleep)
return 0; return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs); ts = (void *)PT_REGS_PARM1_CORE(regs);
if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
tv_nsec != MY_TV_NSEC)
return 0; return 0;
raw_tp_called = true; raw_tp_called = true;
...@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter") ...@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
{ {
struct __kernel_timespec *ts; struct __kernel_timespec *ts;
long tv_nsec;
if (id != __NR_nanosleep) if (id != __NR_nanosleep)
return 0; return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs); ts = (void *)PT_REGS_PARM1_CORE(regs);
if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
tv_nsec != MY_TV_NSEC)
return 0; return 0;
tp_btf_called = true; tp_btf_called = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment