Commit 358fdb45 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf_probe_read_user'

Daniel Borkmann says:

====================
This set adds probe_read_{user,kernel}(), probe_read_str_{user,kernel}()
helpers, fixes probe_write_user() helper and selftests. For details please
see individual patches.

Thanks!

v2 -> v3:
  - noticed two more things that are fixed in here:
   - bpf uapi helper description used 'int size' for *_str helpers, now u32
   - we need TASK_SIZE_MAX + guard page on x86-64 in patch 2 otherwise
     we'll trigger the 00c42373 warn as well, so full range covered now
v1 -> v2:
  - standardize unsafe_ptr terminology in uapi header comment (Andrii)
  - probe_read_{user,kernel}[_str] naming scheme (Andrii)
  - use global data in last test case, remove relaxed_maps (Andrii)
  - add strict non-pagefault kernel read funcs to avoid warning in
    kernel probe read helpers (Alexei)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e1cb7d2d fa553d9b
......@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/uaccess.h>
#include <linux/kernel.h>
#ifdef CONFIG_X86_64
static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
{
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
}
static __always_inline bool invalid_probe_range(u64 vaddr)
{
/*
* Range covering the highest possible canonical userspace address
* as well as non-canonical address range. For the canonical range
* we also need to include the userspace guard page.
*/
return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
}
#else
static __always_inline bool invalid_probe_range(u64 vaddr)
{
return vaddr < TASK_SIZE_MAX;
}
#endif
long probe_kernel_read_strict(void *dst, const void *src, size_t size)
{
if (unlikely(invalid_probe_range((unsigned long)src)))
return -EFAULT;
return __probe_kernel_read(dst, src, size);
}
long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
{
if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
return -EFAULT;
return __strncpy_from_unsafe(dst, unsafe_addr, count);
}
......@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
* happens, handle that and return -EFAULT.
*/
extern long probe_kernel_read(void *dst, const void *src, size_t size);
extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/*
......@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
/*
* probe_user_write(): safely attempt to write to a location in user space
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
*
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
long count);
extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
......
......@@ -563,10 +563,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read(void *dst, u32 size, const void *src)
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* address *src* and store the data in *dst*.
* kernel space address *unsafe_ptr* and store the data in *dst*.
*
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
* instead.
* Return
* 0 on success, or a negative error in case of failure.
*
......@@ -1428,45 +1431,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read()** helper here instead
* to read the string would require to estimate the length at
* compile time, and would often result in copying more memory
* than necessary.
* Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
* more details.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
* instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
......@@ -2777,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read_user()** helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
* value.
*
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
* Return
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2890,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \
FN(send_signal), \
FN(tcp_gen_syncookie), \
FN(skb_output),
FN(skb_output), \
FN(probe_read_user), \
FN(probe_read_kernel), \
FN(probe_read_user_str), \
FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -1306,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
{
memset(dst, 0, size);
return -EFAULT;
}
/**
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
......@@ -1566,9 +1567,9 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6
LDST(W, u32)
LDST(DW, u64)
#undef LDST
#define LDX_PROBE(SIZEOP, SIZE) \
LDX_PROBE_MEM_##SIZEOP: \
bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
#define LDX_PROBE(SIZEOP, SIZE) \
LDX_PROBE_MEM_##SIZEOP: \
bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
CONT;
LDX_PROBE(B, 1)
LDX_PROBE(H, 2)
......
......@@ -138,24 +138,125 @@ static const struct bpf_func_proto bpf_override_return_proto = {
};
#endif
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
const void __user *, unsafe_ptr)
{
int ret;
int ret = probe_user_read(dst, unsafe_ptr, size);
ret = security_locked_down(LOCKDOWN_BPF_READ);
if (ret < 0)
goto out;
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_user_proto = {
.func = bpf_probe_read_user,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
const void __user *, unsafe_ptr)
{
int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.func = bpf_probe_read_user_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
ret = probe_kernel_read(dst, unsafe_ptr, size);
static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
const bool compat)
{
int ret = security_locked_down(LOCKDOWN_BPF_READ);
if (unlikely(ret < 0))
goto out;
ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
probe_kernel_read_strict(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
return ret;
}
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
}
static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
.func = bpf_probe_read_kernel,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
}
static const struct bpf_func_proto bpf_probe_read_compat_proto = {
.func = bpf_probe_read_compat,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
static __always_inline int
bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
const bool compat)
{
int ret = security_locked_down(LOCKDOWN_BPF_READ);
if (unlikely(ret < 0))
goto out;
/*
* The strncpy_from_unsafe_*() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read_*() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_proto = {
.func = bpf_probe_read,
BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
}
static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
.func = bpf_probe_read_kernel_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
......@@ -163,7 +264,22 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
}
static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
.func = bpf_probe_read_compat_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
u32, size)
{
/*
......@@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
if (!access_ok(unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
return probe_user_write(unsafe_ptr, src, size);
}
static const struct bpf_func_proto bpf_probe_write_user_proto = {
......@@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
int ret;
ret = security_locked_down(LOCKDOWN_BPF_READ);
if (ret < 0)
goto out;
/*
* The strncpy_from_unsafe() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_str_proto = {
.func = bpf_probe_read_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
......@@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_pop_elem_proto;
case BPF_FUNC_map_peek_elem:
return &bpf_map_peek_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
......@@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
return &bpf_probe_read_kernel_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_compat_proto;
case BPF_FUNC_probe_read_user_str:
return &bpf_probe_read_user_str_proto;
case BPF_FUNC_probe_read_kernel_str:
return &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
return &bpf_probe_read_compat_str_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
......
......@@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
return ret ? -EFAULT : 0;
}
static __always_inline long
probe_write_common(void __user *dst, const void *src, size_t size)
{
long ret;
pagefault_disable();
ret = __copy_to_user_inatomic(dst, src, size);
pagefault_enable();
return ret ? -EFAULT : 0;
}
/**
* probe_kernel_read(): safely attempt to read from a kernel-space location
* @dst: pointer to the buffer that shall take the data
......@@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size)
* do_page_fault() doesn't attempt to take mmap_sem. This makes
* probe_kernel_read() suitable for use within regions where the caller
* already holds mmap_sem, or other locks which nest inside mmap_sem.
*
* probe_kernel_read_strict() is the same as probe_kernel_read() except for
* the case where architectures have non-overlapping user and kernel address
* ranges: probe_kernel_read_strict() will additionally return -EFAULT for
* probing memory on a user address range where probe_user_read() is supposed
* to be used instead.
*/
long __weak probe_kernel_read(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_read")));
long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_read")));
long __probe_kernel_read(void *dst, const void *src, size_t size)
{
long ret;
......@@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
long __weak probe_kernel_write(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_write")));
......@@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
pagefault_disable();
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
pagefault_enable();
ret = probe_write_common((__force void __user *)dst, src, size);
set_fs(old_fs);
return ret ? -EFAULT : 0;
return ret;
}
EXPORT_SYMBOL_GPL(probe_kernel_write);
/**
* probe_user_write(): safely attempt to write to a user-space location
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
*
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
long __weak probe_user_write(void __user *dst, const void *src, size_t size)
__attribute__((alias("__probe_user_write")));
long __probe_user_write(void __user *dst, const void *src, size_t size)
{
long ret = -EFAULT;
mm_segment_t old_fs = get_fs();
set_fs(USER_DS);
if (access_ok(dst, size))
ret = probe_write_common(dst, src, size);
set_fs(old_fs);
return ret;
}
EXPORT_SYMBOL_GPL(probe_user_write);
/**
* strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
......@@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write);
*
* If @count is smaller than the length of the string, copies @count-1 bytes,
* sets the last byte of @dst buffer to NUL and returns @count.
*
* strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
* for the case where architectures have non-overlapping user and kernel address
* ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
* probing memory on a user address range where strncpy_from_unsafe_user() is
* supposed to be used instead.
*/
long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
__attribute__((alias("__strncpy_from_unsafe")));
long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
long count)
__attribute__((alias("__strncpy_from_unsafe")));
long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
{
mm_segment_t old_fs = get_fs();
const void *src = unsafe_addr;
......
......@@ -181,8 +181,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
if (addrlen != sizeof(*in6))
return 0;
ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
&in6->sin6_addr);
ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
&in6->sin6_addr);
if (ret)
goto done;
......
......@@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
if (addrlen != sizeof(*in6))
return 0;
ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
if (ret) {
inline_ret = ret;
goto done;
......@@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
test_case = dst6[7];
ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
if (ret) {
inline_ret = ret;
goto done;
......
......@@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
if (sockaddr_len > sizeof(orig_addr))
return 0;
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
return 0;
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
......
......@@ -563,10 +563,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read(void *dst, u32 size, const void *src)
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* address *src* and store the data in *dst*.
* kernel space address *unsafe_ptr* and store the data in *dst*.
*
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
* instead.
* Return
* 0 on success, or a negative error in case of failure.
*
......@@ -1428,45 +1431,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read()** helper here instead
* to read the string would require to estimate the length at
* compile time, and would often result in copying more memory
* than necessary.
* Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
* more details.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
* instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
......@@ -2777,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read_user()** helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
* value.
*
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
* Return
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2890,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \
FN(send_signal), \
FN(tcp_gen_syncookie), \
FN(skb_output),
FN(skb_output), \
FN(probe_read_user), \
FN(probe_read_kernel), \
FN(probe_read_user_str), \
FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
void test_probe_user(void)
{
#define kprobe_name "__sys_connect"
const char *prog_name = "kprobe/" kprobe_name;
const char *obj_file = "./test_probe_user.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
struct sockaddr curr, orig, tmp;
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
struct bpf_link *kprobe_link = NULL;
struct bpf_program *kprobe_prog;
struct bpf_object *obj;
static const int zero = 0;
obj = bpf_object__open_file(obj_file, &opts);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", prog_name))
goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
if (CHECK(results_map_fd < 0, "find_bss_map",
"err %d\n", results_map_fd))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
kprobe_name);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
"err %ld\n", PTR_ERR(kprobe_link))) {
kprobe_link = NULL;
goto cleanup;
}
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
in->sin_port = htons(5555);
in->sin_addr.s_addr = inet_addr("255.255.255.255");
memcpy(&orig, &curr, sizeof(curr));
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
goto cleanup;
connect(sock_fd, &curr, sizeof(curr));
close(sock_fd);
err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
if (CHECK(err, "get_kprobe_res",
"failed to get kprobe res: %d\n", err))
goto cleanup;
in = (struct sockaddr_in *)&tmp;
if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
"wrong kprobe res from probe read: %s:%u\n",
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
memset(&tmp, 0xab, sizeof(tmp));
in = (struct sockaddr_in *)&curr;
if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
"wrong kprobe res from probe write: %s:%u\n",
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
cleanup:
bpf_link__destroy(kprobe_link);
bpf_object__close(obj);
}
......@@ -79,11 +79,11 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx)
func = ptr->func;
}));
bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
pkt_type &= 7;
/* read eth proto */
bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12);
bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
bpf_printk("skb->len %d users %d pkt_type %x\n",
......
......@@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
void* thread_state;
int key;
bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
bpf_probe_read(&thread_state, sizeof(thread_state),
tls_base + 0x310 + key * 0x10 + 0x08);
bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
bpf_probe_read_user(&thread_state, sizeof(thread_state),
tls_base + 0x310 + key * 0x10 + 0x08);
return thread_state;
}
......@@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
FrameData *frame, Symbol *symbol)
{
// read data from PyFrameObject
bpf_probe_read(&frame->f_back,
sizeof(frame->f_back),
frame_ptr + pidData->offsets.PyFrameObject_back);
bpf_probe_read(&frame->f_code,
sizeof(frame->f_code),
frame_ptr + pidData->offsets.PyFrameObject_code);
bpf_probe_read_user(&frame->f_back,
sizeof(frame->f_back),
frame_ptr + pidData->offsets.PyFrameObject_back);
bpf_probe_read_user(&frame->f_code,
sizeof(frame->f_code),
frame_ptr + pidData->offsets.PyFrameObject_code);
// read data from PyCodeObject
if (!frame->f_code)
return false;
bpf_probe_read(&frame->co_filename,
sizeof(frame->co_filename),
frame->f_code + pidData->offsets.PyCodeObject_filename);
bpf_probe_read(&frame->co_name,
sizeof(frame->co_name),
frame->f_code + pidData->offsets.PyCodeObject_name);
bpf_probe_read_user(&frame->co_filename,
sizeof(frame->co_filename),
frame->f_code + pidData->offsets.PyCodeObject_filename);
bpf_probe_read_user(&frame->co_name,
sizeof(frame->co_name),
frame->f_code + pidData->offsets.PyCodeObject_name);
// read actual names into symbol
if (frame->co_filename)
bpf_probe_read_str(&symbol->file,
sizeof(symbol->file),
frame->co_filename + pidData->offsets.String_data);
bpf_probe_read_user_str(&symbol->file,
sizeof(symbol->file),
frame->co_filename +
pidData->offsets.String_data);
if (frame->co_name)
bpf_probe_read_str(&symbol->name,
sizeof(symbol->name),
frame->co_name + pidData->offsets.String_data);
bpf_probe_read_user_str(&symbol->name,
sizeof(symbol->name),
frame->co_name +
pidData->offsets.String_data);
return true;
}
......@@ -174,9 +176,9 @@ static __always_inline int __on_event(struct pt_regs *ctx)
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
void* thread_state_current = (void*)0;
bpf_probe_read(&thread_state_current,
sizeof(thread_state_current),
(void*)(long)pidData->current_state_addr);
bpf_probe_read_user(&thread_state_current,
sizeof(thread_state_current),
(void*)(long)pidData->current_state_addr);
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
void* tls_base = (void*)task;
......@@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
if (pidData->use_tls) {
uint64_t pthread_created;
uint64_t pthread_self;
bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
tls_base + 0x10);
bpf_probe_read(&pthread_created,
sizeof(pthread_created),
thread_state + pidData->offsets.PyThreadState_thread);
bpf_probe_read_user(&pthread_created,
sizeof(pthread_created),
thread_state +
pidData->offsets.PyThreadState_thread);
event->pthread_match = pthread_created == pthread_self;
} else {
event->pthread_match = 1;
......@@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
Symbol sym = {};
int cur_cpu = bpf_get_smp_processor_id();
bpf_probe_read(&frame_ptr,
sizeof(frame_ptr),
thread_state + pidData->offsets.PyThreadState_frame);
bpf_probe_read_user(&frame_ptr,
sizeof(frame_ptr),
thread_state +
pidData->offsets.PyThreadState_frame);
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
......
......@@ -98,7 +98,7 @@ struct strobe_map_raw {
/*
* having volatile doesn't change anything on BPF side, but clang
* emits warnings for passing `volatile const char *` into
* bpf_probe_read_str that expects just `const char *`
* bpf_probe_read_user_str that expects just `const char *`
*/
const char* tag;
/*
......@@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
dtv_t *dtv;
void *tls_ptr;
bpf_probe_read(&tls_index, sizeof(struct tls_index),
(void *)loc->offset);
bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
(void *)loc->offset);
/* valid module index is always positive */
if (tls_index.module > 0) {
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
bpf_probe_read(&dtv, sizeof(dtv),
&((struct tcbhead *)tls_base)->dtv);
bpf_probe_read_user(&dtv, sizeof(dtv),
&((struct tcbhead *)tls_base)->dtv);
dtv += tls_index.module;
} else {
dtv = NULL;
}
bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
return tls_ptr && tls_ptr != (void *)-1
? tls_ptr + tls_index.offset
......@@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
if (!location)
return;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
data->int_vals[idx] = value->val;
if (value->header.len)
data->int_vals_set_mask |= (1 << idx);
......@@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
if (!location)
return 0;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
/*
* if bpf_probe_read_str returns error (<0), due to casting to
* if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is
* sufficient to check for errors AND prove to BPF verifier, that
* bpf_probe_read_str won't return anything bigger than
* bpf_probe_read_user_str won't return anything bigger than
* STROBE_MAX_STR_LEN
*/
if (len > STROBE_MAX_STR_LEN)
......@@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
if (!location)
return payload;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
return payload;
descr->id = map.id;
......@@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
payload += len;
......@@ -418,15 +418,15 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
break;
descr->key_lens[i] = 0;
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].key);
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
payload += len;
}
descr->val_lens[i] = 0;
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].val);
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
payload += len;
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <netinet/in.h>
#include "bpf_helpers.h"
#include "bpf_tracing.h"
static struct sockaddr_in old;
SEC("kprobe/__sys_connect")
int handle_sys_connect(struct pt_regs *ctx)
{
void *ptr = (void *)PT_REGS_PARM2(ctx);
struct sockaddr_in new;
bpf_probe_read_user(&old, sizeof(old), ptr);
__builtin_memset(&new, 0xab, sizeof(new));
bpf_probe_write_user(ptr, &new, sizeof(new));
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -38,7 +38,7 @@
#include <sys/socket.h>
#include "bpf_helpers.h"
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
#define TCP_ESTATS_MAGIC 0xBAADBEEF
/* This test case needs "sock" and "pt_regs" data structure.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment