Commit e6ea60ba authored by Jakub Kicinski's avatar Jakub Kicinski

Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Alexei Starovoitov says:

====================
1) libbpf should not attempt to load unused subprogs, from Andrii.

2) Make strncpy_from_user() mask out bytes after NUL terminator, from Daniel.

3) Relax return code check for subprograms in the BPF verifier, from Dmitrii.

4) Fix several sockmap issues, from John.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  fail_function: Remove a redundant mutex unlock
  selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL
  lib/strncpy_from_user.c: Mask out bytes after NUL terminator.
  libbpf: Fix VERSIONED_SYM_COUNT number parsing
  bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list
  bpf, sockmap: Handle memory acct if skb_verdict prog redirects to self
  bpf, sockmap: Avoid returning unneeded EAGAIN when redirecting to self
  bpf, sockmap: Use truesize with sk_rmem_schedule()
  bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect
  bpf, sockmap: Fix partial copy_page_to_iter so progress can still be made
  selftests/bpf: Fix error return code in run_getsockopt_test()
  bpf: Relax return code check for subprograms
  tools, bpftool: Add missing close before bpftool net attach exit
  MAINTAINERS/bpf: Update Andrii's entry.
  selftests/bpf: Fix unused attribute usage in subprogs_unused test
  bpf: Fix unsigned 'datasec_id' compared with zero in check_pseudo_btf_id
  bpf: Fix passing zero to PTR_ERR() in bpf_btf_printf_prepare
  libbpf: Don't attempt to load unused subprog as an entry-point BPF program
====================

Link: https://lore.kernel.org/r/20201119200721.288-1-alexei.starovoitov@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 90b49784 2801a5da
......@@ -3243,10 +3243,10 @@ F: drivers/iio/accel/bma400*
BPF (Safe dynamic programs and tools)
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Andrii Nakryiko <andrii@kernel.org>
R: Martin KaFai Lau <kafai@fb.com>
R: Song Liu <songliubraving@fb.com>
R: Yonghong Song <yhs@fb.com>
R: Andrii Nakryiko <andrii@kernel.org>
R: John Fastabend <john.fastabend@gmail.com>
R: KP Singh <kpsingh@chromium.org>
L: netdev@vger.kernel.org
......
......@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
const bool is_subprog = env->cur_state->frame[0]->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
if (!is_subprog &&
(prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
......@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}
reg = cur_regs(env) + BPF_REG_0;
if (is_subprog) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
reg_type_str[reg->type]);
return -EINVAL;
}
return 0;
}
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
......@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}
reg = cur_regs(env) + BPF_REG_0;
if (reg->type != SCALAR_VALUE) {
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
reg_type_str[reg->type]);
......@@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
struct bpf_insn_aux_data *aux)
{
u32 datasec_id, type, id = insn->imm;
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u32 type, id = insn->imm;
s32 datasec_id;
u64 addr;
int i;
......
......@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
if (copy_from_user(buf, buffer, count)) {
ret = -EFAULT;
goto out;
goto out_free;
}
buf[count] = '\0';
sym = strstrip(buf);
......@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
ret = count;
}
out:
kfree(buf);
mutex_unlock(&fei_lock);
out_free:
kfree(buf);
return ret;
}
......
......@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
{
int ret;
/*
* NB: We rely on strncpy_from_user() not copying junk past the NUL
* terminator into `dst`.
*
* strncpy_from_user() does long-sized strides in the fast path. If the
* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
* then there could be junk after the NUL in `dst`. If user takes `dst`
* and keys a hash map with it, then semantically identical strings can
* occupy multiple entries in the map.
*/
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
......@@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
*btf = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(*btf))
return PTR_ERR(*btf);
return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
if (ptr->type_id > 0)
*btf_id = ptr->type_id;
......
......@@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
goto byte_at_a_time;
while (max >= sizeof(unsigned long)) {
unsigned long c, data;
unsigned long c, data, mask;
/* Fall back to byte-at-a-time if we get a page fault */
unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
*(unsigned long *)(dst+res) = c;
/*
* Note that we mask out the bytes following the NUL. This is
* important to do because string oblivious code may read past
* the NUL. For those routines, we don't want to give them
* potentially random bytes after the NUL in `src`.
*
* One example of such code is BPF map keys. BPF treats map keys
* as an opaque set of bytes. Without the post-NUL mask, any BPF
* maps keyed by strings returned from strncpy_from_user() may
* have multiple entries for semantically identical strings.
*/
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
data = create_zero_mask(data);
mask = zero_bytemask(data);
*(unsigned long *)(dst+res) = c & mask;
return res + find_zero(data);
}
*(unsigned long *)(dst+res) = c;
res += sizeof(unsigned long);
max -= sizeof(unsigned long);
}
......
......@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
struct scatterlist *sge = sk_msg_elem(msg, i);
u32 len = sge->length;
if (charge)
sk_mem_uncharge(sk, len);
if (!msg->skb)
/* When the skb owns the memory we free it from consume_skb path. */
if (!msg->skb) {
if (charge)
sk_mem_uncharge(sk, len);
put_page(sg_page(sge));
}
memset(sge, 0, sizeof(*sge));
return len;
}
......@@ -397,28 +399,45 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
struct sock *sk = psock->sk;
int copied = 0, num_sge;
struct sk_msg *msg;
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;
if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!msg))
return -EAGAIN;
if (!sk_rmem_schedule(sk, skb, skb->len)) {
kfree(msg);
return -EAGAIN;
}
return NULL;
sk_msg_init(msg);
return msg;
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
struct sk_psock *psock,
struct sock *sk,
struct sk_msg *msg)
{
int num_sge, copied;
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
*/
if (skb_linearize(skb))
return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
if (unlikely(num_sge < 0)) {
kfree(msg);
return num_sge;
}
sk_mem_charge(sk, skb->len);
copied = skb->len;
msg->sg.start = 0;
msg->sg.size = copied;
......@@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
return copied;
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
/* If we are receiving on the same sock skb->sk is already assigned,
* skip memory accounting and owner transition seeing it already set
* correctly.
*/
if (unlikely(skb->sk == sk))
return sk_psock_skb_ingress_self(psock, skb);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
/* This will transition ownership of the data from the socket where
* the BPF program was run initiating the redirect to the socket
* we will eventually receive this data on. The data will be released
* from skb_consume found in __tcp_bpf_recvmsg() after its been copied
* into user buffers.
*/
skb_set_owner_r(skb, sk);
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
}
/* Puts an skb on the ingress queue of the socket already assigned to the
* skb. In this case we do not need to check memory limits or skb_set_owner_r
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
if (unlikely(!msg))
return -EAGAIN;
sk_msg_init(msg);
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
}
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
......@@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
* retrying later from workqueue.
*/
if (skb_queue_empty(&psock->ingress_skb)) {
err = sk_psock_skb_ingress(psock, skb);
err = sk_psock_skb_ingress_self(psock, skb);
}
if (err < 0) {
skb_queue_tail(&psock->ingress_skb, skb);
......
......@@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
{
struct iov_iter *iter = &msg->msg_iter;
int peek = flags & MSG_PEEK;
int i, ret, copied = 0;
struct sk_msg *msg_rx;
int i, copied = 0;
msg_rx = list_first_entry_or_null(&psock->ingress_msg,
struct sk_msg, list);
......@@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
ret = copy_page_to_iter(page, sge->offset, copy, iter);
if (ret != copy) {
msg_rx->sg.start = i;
return -EFAULT;
}
copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy)
return copied ? copied : -EFAULT;
copied += copy;
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
sk_mem_uncharge(sk, copy);
if (!msg_rx->skb)
sk_mem_uncharge(sk, copy);
msg_rx->sg.size -= copy;
if (!sge->length) {
......@@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
put_page(page);
}
} else {
/* Lets not optimize peek case if copy_page_to_iter
* didn't copy the entire length lets just break.
*/
if (copy != sge->length)
return copied;
sk_msg_iter_var_next(i);
}
......
......@@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv)
ifindex = net_parse_dev(&argc, &argv);
if (ifindex < 1) {
close(progfd);
return -EINVAL;
err = -EINVAL;
goto cleanup;
}
if (argc) {
......@@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv)
overwrite = true;
} else {
p_err("expected 'overwrite', got: '%s'?", *argv);
close(progfd);
return -EINVAL;
err = -EINVAL;
goto cleanup;
}
}
......@@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv)
if (is_prefix("xdp", attach_type_strings[attach_type]))
err = do_attach_detach_xdp(progfd, attach_type, ifindex,
overwrite);
if (err < 0) {
if (err) {
p_err("interface %s attach failed: %s",
attach_type_strings[attach_type], strerror(-err));
return err;
goto cleanup;
}
if (json_output)
jsonw_null(json_wtr);
return 0;
cleanup:
close(progfd);
return err;
}
static int do_detach(int argc, char **argv)
......
......@@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
......@@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
......
......@@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
size_t sec_off, void *insn_data, size_t insn_data_sz)
{
int i;
if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
sec_name, name, sec_off, insn_data_sz);
......@@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
goto errout;
memcpy(prog->insns, insn_data, insn_data_sz);
for (i = 0; i < prog->insns_cnt; i++) {
if (insn_is_subprog_call(&prog->insns[i])) {
obj->has_subcalls = true;
break;
}
}
return 0;
errout:
pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
......@@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
static bool prog_is_subprog(const struct bpf_object *obj,
const struct bpf_program *prog)
{
return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
/* For legacy reasons, libbpf supports an entry-point BPF programs
* without SEC() attribute, i.e., those in the .text section. But if
* there are 2 or more such programs in the .text section, they all
* must be subprograms called from entry-point BPF programs in
* designated SEC()'tions, otherwise there is no way to distinguish
* which of those programs should be loaded vs which are a subprogram.
* Similarly, if there is a function/program in .text and at least one
* other BPF program with custom SEC() attribute, then we just assume
* .text programs are subprograms (even if they are not called from
* other programs), because libbpf never explicitly supported mixing
* SEC()-designated BPF programs and .text entry-point BPF programs.
*/
return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
}
struct bpf_program *
......
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "test_probe_read_user_str.skel.h"
static const char str1[] = "mestring";
static const char str2[] = "mestringalittlebigger";
static const char str3[] = "mestringblubblubblubblubblub";
static int test_one_str(struct test_probe_read_user_str *skel, const char *str,
size_t len)
{
int err, duration = 0;
char buf[256];
/* Ensure bytes after string are ones */
memset(buf, 1, sizeof(buf));
memcpy(buf, str, len);
/* Give prog our userspace pointer */
skel->bss->user_ptr = buf;
/* Trigger tracepoint */
usleep(1);
/* Did helper fail? */
if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n",
skel->bss->ret))
return 1;
/* Check that string was copied correctly */
err = memcmp(skel->bss->buf, str, len);
if (CHECK(err, "memcmp", "prog copied wrong string"))
return 1;
/* Now check that no extra trailing bytes were copied */
memset(buf, 0, sizeof(buf));
err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len);
if (CHECK(err, "memcmp", "trailing bytes were not stripped"))
return 1;
return 0;
}
void test_probe_read_user_str(void)
{
struct test_probe_read_user_str *skel;
int err, duration = 0;
skel = test_probe_read_user_str__open_and_load();
if (CHECK(!skel, "test_probe_read_user_str__open_and_load",
"skeleton open and load failed\n"))
return;
/* Give pid to bpf prog so it doesn't read from anyone else */
skel->bss->pid = getpid();
err = test_probe_read_user_str__attach(skel);
if (CHECK(err, "test_probe_read_user_str__attach",
"skeleton attach failed: %d\n", err))
goto out;
if (test_one_str(skel, str1, sizeof(str1)))
goto out;
if (test_one_str(skel, str2, sizeof(str2)))
goto out;
if (test_one_str(skel, str3, sizeof(str3)))
goto out;
out:
test_probe_read_user_str__destroy(skel);
}
......@@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
*/
buf = 0x40;
if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
if (err < 0) {
log_err("Failed to call setsockopt(IP_TOS)");
goto detach;
}
......
......@@ -3,12 +3,14 @@
#include <test_progs.h>
#include <time.h>
#include "test_subprogs.skel.h"
#include "test_subprogs_unused.skel.h"
static int duration;
void test_subprogs(void)
{
struct test_subprogs *skel;
struct test_subprogs_unused *skel2;
int err;
skel = test_subprogs__open_and_load();
......@@ -26,6 +28,10 @@ void test_subprogs(void)
CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
skel2 = test_subprogs_unused__open_and_load();
ASSERT_OK_PTR(skel2, "unused_progs_skel");
test_subprogs_unused__destroy(skel2);
cleanup:
test_subprogs__destroy(skel);
}
......@@ -60,6 +60,7 @@ void test_test_global_funcs(void)
{ "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
{ "test_global_func6.o" , "modified ctx ptr R2" },
{ "test_global_func7.o" , "foo() doesn't return scalar" },
{ "test_global_func8.o" },
};
libbpf_print_fn_t old_print_fn = NULL;
int err, i, duration = 0;
......
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2020 Facebook */
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
__noinline int foo(struct __sk_buff *skb)
{
return bpf_get_prandom_u32();
}
SEC("cgroup_skb/ingress")
int test_cls(struct __sk_buff *skb)
{
if (!foo(skb))
return 0;
return 1;
}
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <sys/types.h>
pid_t pid = 0;
long ret = 0;
void *user_ptr = 0;
char buf[256] = {};
SEC("tracepoint/syscalls/sys_enter_nanosleep")
int on_write(void *ctx)
{
if (pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
return 0;
}
char _license[] SEC("license") = "GPL";
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
const char LICENSE[] SEC("license") = "GPL";
__attribute__((unused)) __noinline int unused1(int x)
{
return x + 1;
}
static __attribute__((unused)) __noinline int unused2(int x)
{
return x + 2;
}
SEC("raw_tp/sys_enter")
int main_prog(void *ctx)
{
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment