Commit 2865ba82 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2021-09-14

The following pull-request contains BPF updates for your *net* tree.

We've added 7 non-merge commits during the last 13 day(s) which contain
a total of 18 files changed, 334 insertions(+), 193 deletions(-).

The main changes are:

1) Fix mmap_lock lockdep splat in BPF stack map's build_id lookup, from Yonghong Song.

2) Fix BPF cgroup v2 program bypass upon net_cls/prio activation, from Daniel Borkmann.

3) Fix kvcalloc() BTF line info splat on oversized allocation attempts, from Bixuan Cui.

4) Fix BPF selftest build of task_pt_regs test for arm64/s390, from Jean-Philippe Brucker.

5) Fix BPF's disasm.{c,h} to dual-license so that it is aligned with bpftool given the former
   is a build dependency for the latter, from Daniel Borkmann with ACKs from contributors.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 550ac9c1 43d2b88c
...@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} ...@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
* per-socket cgroup information except for memcg association. * per-socket cgroup information except for memcg association.
* *
* On legacy hierarchies, net_prio and net_cls controllers directly set * On legacy hierarchies, net_prio and net_cls controllers directly
* attributes on each sock which can then be tested by the network layer. * set attributes on each sock which can then be tested by the network
* On the default hierarchy, each sock is associated with the cgroup it was * layer. On the default hierarchy, each sock is associated with the
* created in and the networking layer can match the cgroup directly. * cgroup it was created in and the networking layer can match the
* * cgroup directly.
* To avoid carrying all three cgroup related fields separately in sock,
* sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
* On boot, sock_cgroup_data records the cgroup that the sock was created
* in so that cgroup2 matches can be made; however, once either net_prio or
* net_cls starts being used, the area is overridden to carry prioidx and/or
* classid. The two modes are distinguished by whether the lowest bit is
* set. Clear bit indicates cgroup pointer while set bit prioidx and
* classid.
*
* While userland may start using net_prio or net_cls at any time, once
* either is used, cgroup2 matching no longer works. There is no reason to
* mix the two and this is in line with how legacy and v2 compatibility is
* handled. On mode switch, cgroup references which are already being
* pointed to by socks may be leaked. While this can be remedied by adding
* synchronization around sock_cgroup_data, given that the number of leaked
* cgroups is bound and highly unlikely to be high, this seems to be the
* better trade-off.
*/ */
struct sock_cgroup_data { struct sock_cgroup_data {
union { struct cgroup *cgroup; /* v2 */
#ifdef __LITTLE_ENDIAN #ifdef CONFIG_CGROUP_NET_CLASSID
struct { u32 classid; /* v1 */
u8 is_data : 1; #endif
u8 no_refcnt : 1; #ifdef CONFIG_CGROUP_NET_PRIO
u8 unused : 6; u16 prioidx; /* v1 */
u8 padding;
u16 prioidx;
u32 classid;
} __packed;
#else
struct {
u32 classid;
u16 prioidx;
u8 padding;
u8 unused : 6;
u8 no_refcnt : 1;
u8 is_data : 1;
} __packed;
#endif #endif
u64 val;
};
}; };
/*
* There's a theoretical window where the following accessors race with
* updaters and return part of the previous pointer as the prioidx or
* classid. Such races are short-lived and the result isn't critical.
*/
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
{ {
/* fallback to 1 which is always the ID of the root cgroup */ #ifdef CONFIG_CGROUP_NET_PRIO
return (skcd->is_data & 1) ? skcd->prioidx : 1; return READ_ONCE(skcd->prioidx);
#else
return 1;
#endif
} }
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{ {
/* fallback to 0 which is the unconfigured default classid */ #ifdef CONFIG_CGROUP_NET_CLASSID
return (skcd->is_data & 1) ? skcd->classid : 0; return READ_ONCE(skcd->classid);
#else
return 0;
#endif
} }
/*
* If invoked concurrently, the updaters may clobber each other. The
* caller is responsible for synchronization.
*/
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx) u16 prioidx)
{ {
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; #ifdef CONFIG_CGROUP_NET_PRIO
WRITE_ONCE(skcd->prioidx, prioidx);
if (sock_cgroup_prioidx(&skcd_buf) == prioidx) #endif
return;
if (!(skcd_buf.is_data & 1)) {
skcd_buf.val = 0;
skcd_buf.is_data = 1;
}
skcd_buf.prioidx = prioidx;
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
} }
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid) u32 classid)
{ {
struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; #ifdef CONFIG_CGROUP_NET_CLASSID
WRITE_ONCE(skcd->classid, classid);
if (sock_cgroup_classid(&skcd_buf) == classid) #endif
return;
if (!(skcd_buf.is_data & 1)) {
skcd_buf.val = 0;
skcd_buf.is_data = 1;
}
skcd_buf.classid = classid;
WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
} }
#else /* CONFIG_SOCK_CGROUP_DATA */ #else /* CONFIG_SOCK_CGROUP_DATA */
......
...@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, ...@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
*/ */
#ifdef CONFIG_SOCK_CGROUP_DATA #ifdef CONFIG_SOCK_CGROUP_DATA
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
extern spinlock_t cgroup_sk_update_lock;
#endif
void cgroup_sk_alloc_disable(void);
void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd);
static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{ {
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) return skcd->cgroup;
unsigned long v;
/*
* @skcd->val is 64bit but the following is safe on 32bit too as we
* just need the lower ulong to be written and read atomically.
*/
v = READ_ONCE(skcd->val);
if (v & 3)
return &cgrp_dfl_root.cgrp;
return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
#else
return (struct cgroup *)(unsigned long)skcd->val;
#endif
} }
#else /* CONFIG_CGROUP_DATA */ #else /* CONFIG_CGROUP_DATA */
......
...@@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm) ...@@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
__mmap_lock_trace_released(mm, false); __mmap_lock_trace_released(mm, false);
} }
static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
if (mmap_read_trylock(mm)) {
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
return true;
}
return false;
}
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{ {
up_read_non_owner(&mm->mmap_lock); up_read_non_owner(&mm->mmap_lock);
......
// SPDX-License-Identifier: GPL-2.0-only // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook * Copyright (c) 2016 Facebook
*/ */
......
/* SPDX-License-Identifier: GPL-2.0-only */ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook * Copyright (c) 2016 Facebook
*/ */
......
...@@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, ...@@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
* with build_id. * with build_id.
*/ */
if (!user || !current || !current->mm || irq_work_busy || if (!user || !current || !current->mm || irq_work_busy ||
!mmap_read_trylock_non_owner(current->mm)) { !mmap_read_trylock(current->mm)) {
/* cannot access current->mm, fall back to ips */ /* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) { for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].status = BPF_STACK_BUILD_ID_IP;
...@@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, ...@@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
} }
if (!work) { if (!work) {
mmap_read_unlock_non_owner(current->mm); mmap_read_unlock(current->mm);
} else { } else {
work->mm = current->mm; work->mm = current->mm;
/* The lock will be released once we're out of interrupt
* context. Tell lockdep that we've released it now so
* it doesn't complain that we forgot to release it.
*/
rwsem_release(&current->mm->mmap_lock.dep_map, _RET_IP_);
irq_work_queue(&work->irq_work); irq_work_queue(&work->irq_work);
} }
} }
......
...@@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env, ...@@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env,
nr_linfo = attr->line_info_cnt; nr_linfo = attr->line_info_cnt;
if (!nr_linfo) if (!nr_linfo)
return 0; return 0;
if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
return -EINVAL;
rec_size = attr->line_info_rec_size; rec_size = attr->line_info_rec_size;
if (rec_size < MIN_BPF_LINEINFO_SIZE || if (rec_size < MIN_BPF_LINEINFO_SIZE ||
......
...@@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) ...@@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
*/ */
#ifdef CONFIG_SOCK_CGROUP_DATA #ifdef CONFIG_SOCK_CGROUP_DATA
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
DEFINE_SPINLOCK(cgroup_sk_update_lock);
static bool cgroup_sk_alloc_disabled __read_mostly;
void cgroup_sk_alloc_disable(void)
{
if (cgroup_sk_alloc_disabled)
return;
pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
cgroup_sk_alloc_disabled = true;
}
#else
#define cgroup_sk_alloc_disabled false
#endif
void cgroup_sk_alloc(struct sock_cgroup_data *skcd) void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{ {
if (cgroup_sk_alloc_disabled) {
skcd->no_refcnt = 1;
return;
}
/* Don't associate the sock with unrelated interrupted task's cgroup. */ /* Don't associate the sock with unrelated interrupted task's cgroup. */
if (in_interrupt()) if (in_interrupt())
return; return;
rcu_read_lock(); rcu_read_lock();
while (true) { while (true) {
struct css_set *cset; struct css_set *cset;
cset = task_css_set(current); cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) { if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp; skcd->cgroup = cset->dfl_cgrp;
cgroup_bpf_get(cset->dfl_cgrp); cgroup_bpf_get(cset->dfl_cgrp);
break; break;
} }
cpu_relax(); cpu_relax();
} }
rcu_read_unlock(); rcu_read_unlock();
} }
void cgroup_sk_clone(struct sock_cgroup_data *skcd) void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{ {
if (skcd->val) { struct cgroup *cgrp = sock_cgroup_ptr(skcd);
if (skcd->no_refcnt)
return;
/* /*
* We might be cloning a socket which is left in an empty * We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd. * cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live(). * Don't use cgroup_get_live().
*/ */
cgroup_get(sock_cgroup_ptr(skcd)); cgroup_get(cgrp);
cgroup_bpf_get(sock_cgroup_ptr(skcd)); cgroup_bpf_get(cgrp);
}
} }
void cgroup_sk_free(struct sock_cgroup_data *skcd) void cgroup_sk_free(struct sock_cgroup_data *skcd)
{ {
struct cgroup *cgrp = sock_cgroup_ptr(skcd); struct cgroup *cgrp = sock_cgroup_ptr(skcd);
if (skcd->no_refcnt)
return;
cgroup_bpf_put(cgrp); cgroup_bpf_put(cgrp);
cgroup_put(cgrp); cgroup_put(cgrp);
} }
......
...@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) ...@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
struct update_classid_context *ctx = (void *)v; struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file); struct socket *sock = sock_from_file(file);
if (sock) { if (sock)
spin_lock(&cgroup_sk_update_lock);
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
if (--ctx->batch == 0) { if (--ctx->batch == 0) {
ctx->batch = UPDATE_CLASSID_BATCH; ctx->batch = UPDATE_CLASSID_BATCH;
return n + 1; return n + 1;
...@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
struct css_task_iter it; struct css_task_iter it;
struct task_struct *p; struct task_struct *p;
cgroup_sk_alloc_disable();
cs->classid = (u32)value; cs->classid = (u32)value;
css_task_iter_start(css, 0, &it); css_task_iter_start(css, 0, &it);
......
...@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of, ...@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev) if (!dev)
return -ENODEV; return -ENODEV;
cgroup_sk_alloc_disable();
rtnl_lock(); rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio); ret = netprio_set_prio(of_css(of), dev, prio);
...@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of, ...@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n) static int update_netprio(const void *v, struct file *file, unsigned n)
{ {
struct socket *sock = sock_from_file(file); struct socket *sock = sock_from_file(file);
if (sock) {
spin_lock(&cgroup_sk_update_lock); if (sock)
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
(unsigned long)v); (unsigned long)v);
spin_unlock(&cgroup_sk_update_lock);
}
return 0; return 0;
} }
...@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset) ...@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p; struct task_struct *p;
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
cgroup_sk_alloc_disable();
cgroup_taskset_for_each(p, css, tset) { cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id; void *v = (void *)(unsigned long)css->id;
......
...@@ -12,27 +12,36 @@ ...@@ -12,27 +12,36 @@
#include <unistd.h> #include <unistd.h>
#include <ftw.h> #include <ftw.h>
#include "cgroup_helpers.h" #include "cgroup_helpers.h"
/* /*
* To avoid relying on the system setup, when setup_cgroup_env is called * To avoid relying on the system setup, when setup_cgroup_env is called
* we create a new mount namespace, and cgroup namespace. The cgroup2 * we create a new mount namespace, and cgroup namespace. The cgroupv2
* root is mounted at CGROUP_MOUNT_PATH * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
* * have cgroupv2 enabled at this point in time. It's easier to create our
* Unfortunately, most people don't have cgroupv2 enabled at this point in time. * own mount namespace and manage it ourselves. We assume /mnt exists.
* It's easier to create our own mount namespace and manage it ourselves.
* *
* We assume /mnt exists. * Related cgroupv1 helpers are named *classid*(), since we only use the
* net_cls controller for tagging net_cls.classid. We assume the default
* mount under /sys/fs/cgroup/net_cls, which should be the case for the
* vast majority of users.
*/ */
#define WALK_FD_LIMIT 16 #define WALK_FD_LIMIT 16
#define CGROUP_MOUNT_PATH "/mnt" #define CGROUP_MOUNT_PATH "/mnt"
#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
#define CGROUP_WORK_DIR "/cgroup-test-work-dir" #define CGROUP_WORK_DIR "/cgroup-test-work-dir"
#define format_cgroup_path(buf, path) \ #define format_cgroup_path(buf, path) \
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path) CGROUP_WORK_DIR, path)
#define format_classid_path(buf) \
snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
CGROUP_WORK_DIR)
/** /**
* enable_all_controllers() - Enable all available cgroup v2 controllers * enable_all_controllers() - Enable all available cgroup v2 controllers
* *
...@@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr, ...@@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
return 0; return 0;
} }
static int join_cgroup_from_top(const char *cgroup_path)
static int join_cgroup_from_top(char *cgroup_path)
{ {
char cgroup_procs_path[PATH_MAX + 1]; char cgroup_procs_path[PATH_MAX + 1];
pid_t pid = getpid(); pid_t pid = getpid();
...@@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) { ...@@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) {
} }
return cg_fd; return cg_fd;
} }
/**
* setup_classid_environment() - Setup the cgroupv1 net_cls environment
*
* After calling this function, cleanup_classid_environment should be called
* once testing is complete.
*
* This function will print an error to stderr and return 1 if it is unable
* to setup the cgroup environment. If setup is successful, 0 is returned.
*/
int setup_classid_environment(void)
{
char cgroup_workdir[PATH_MAX + 1];
format_classid_path(cgroup_workdir);
if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
errno != EBUSY) {
log_err("mount cgroup base");
return 1;
}
if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
log_err("mkdir cgroup net_cls");
return 1;
}
if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
errno != EBUSY) {
log_err("mount cgroup net_cls");
return 1;
}
cleanup_classid_environment();
if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
log_err("mkdir cgroup work dir");
return 1;
}
return 0;
}
/**
* set_classid() - Set a cgroupv1 net_cls classid
* @id: the numeric classid
*
* Writes the passed classid into the cgroup work dir's net_cls.classid
* file in order to later on trigger socket tagging.
*
* On success, it returns 0, otherwise on failure it returns 1. If there
* is a failure, it prints the error to stderr.
*/
int set_classid(unsigned int id)
{
char cgroup_workdir[PATH_MAX - 42];
char cgroup_classid_path[PATH_MAX + 1];
int fd, rc = 0;
format_classid_path(cgroup_workdir);
snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
"%s/net_cls.classid", cgroup_workdir);
fd = open(cgroup_classid_path, O_WRONLY);
if (fd < 0) {
log_err("Opening cgroup classid: %s", cgroup_classid_path);
return 1;
}
if (dprintf(fd, "%u\n", id) < 0) {
log_err("Setting cgroup classid");
rc = 1;
}
close(fd);
return rc;
}
/**
* join_classid() - Join a cgroupv1 net_cls classid
*
* This function expects the cgroup work dir to be already created, as we
* join it here. This causes the process sockets to be tagged with the given
* net_cls classid.
*
* On success, it returns 0, otherwise on failure it returns 1.
*/
int join_classid(void)
{
char cgroup_workdir[PATH_MAX + 1];
format_classid_path(cgroup_workdir);
return join_cgroup_from_top(cgroup_workdir);
}
/**
* cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
*
* At call time, it moves the calling process to the root cgroup, and then
* runs the deletion process.
*
* On failure, it will print an error to stderr, and try to continue.
*/
void cleanup_classid_environment(void)
{
char cgroup_workdir[PATH_MAX + 1];
format_classid_path(cgroup_workdir);
join_cgroup_from_top(NETCLS_MOUNT_PATH);
nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
}
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CGROUP_HELPERS_H #ifndef __CGROUP_HELPERS_H
#define __CGROUP_HELPERS_H #define __CGROUP_HELPERS_H
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
...@@ -8,12 +9,21 @@ ...@@ -8,12 +9,21 @@
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
/* cgroupv2 related */
int cgroup_setup_and_join(const char *path); int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path); int create_and_get_cgroup(const char *path);
unsigned long long get_cgroup_id(const char *path);
int join_cgroup(const char *path); int join_cgroup(const char *path);
int setup_cgroup_environment(void); int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void); void cleanup_cgroup_environment(void);
unsigned long long get_cgroup_id(const char *path);
#endif /* cgroupv1 related */
int set_classid(unsigned int id);
int join_classid(void);
int setup_classid_environment(void);
void cleanup_classid_environment(void);
#endif /* __CGROUP_HELPERS_H */
...@@ -208,12 +208,27 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, ...@@ -208,12 +208,27 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
static int connect_fd_to_addr(int fd, static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr, const struct sockaddr_storage *addr,
socklen_t addrlen) socklen_t addrlen, const bool must_fail)
{ {
if (connect(fd, (const struct sockaddr *)addr, addrlen)) { int ret;
errno = 0;
ret = connect(fd, (const struct sockaddr *)addr, addrlen);
if (must_fail) {
if (!ret) {
log_err("Unexpected success to connect to server");
return -1;
}
if (errno != EPERM) {
log_err("Unexpected error from connect to server");
return -1;
}
} else {
if (ret) {
log_err("Failed to connect to server"); log_err("Failed to connect to server");
return -1; return -1;
} }
}
return 0; return 0;
} }
...@@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) ...@@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
strlen(opts->cc) + 1)) strlen(opts->cc) + 1))
goto error_close; goto error_close;
if (connect_fd_to_addr(fd, &addr, addrlen)) if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
goto error_close; goto error_close;
return fd; return fd;
...@@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) ...@@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return -1; return -1;
} }
if (connect_fd_to_addr(client_fd, &addr, len)) if (connect_fd_to_addr(client_fd, &addr, len, false))
return -1; return -1;
return 0; return 0;
......
...@@ -20,6 +20,7 @@ typedef __u16 __sum16; ...@@ -20,6 +20,7 @@ typedef __u16 __sum16;
struct network_helper_opts { struct network_helper_opts {
const char *cc; const char *cc;
int timeout_ms; int timeout_ms;
bool must_fail;
}; };
/* ipv4 test vector */ /* ipv4 test vector */
......
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "connect4_dropper.skel.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
static int run_test(int cgroup_fd, int server_fd, bool classid)
{
struct network_helper_opts opts = {
.must_fail = true,
};
struct connect4_dropper *skel;
int fd, err = 0;
skel = connect4_dropper__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return -1;
skel->links.connect_v4_dropper =
bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
cgroup_fd);
if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
err = -1;
goto out;
}
if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
err = -1;
goto out;
}
fd = connect_to_fd_opts(server_fd, &opts);
if (fd < 0)
err = -1;
else
close(fd);
out:
connect4_dropper__destroy(skel);
return err;
}
void test_cgroup_v1v2(void)
{
struct network_helper_opts opts = {};
int server_fd, client_fd, cgroup_fd;
static const int port = 60123;
/* Step 1: Check base connectivity works without any BPF. */
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
client_fd = connect_to_fd_opts(server_fd, &opts);
if (!ASSERT_GE(client_fd, 0, "client_fd")) {
close(server_fd);
return;
}
close(client_fd);
close(server_fd);
/* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
cgroup_fd = test__join_cgroup("/connect_dropper");
if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
return;
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd")) {
close(cgroup_fd);
return;
}
ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
setup_classid_environment();
set_classid(42);
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
cleanup_classid_environment();
close(server_fd);
close(cgroup_fd);
}
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE #define _GNU_SOURCE
#include <test_progs.h> #include <test_progs.h>
#include <linux/ptrace.h>
#include "test_task_pt_regs.skel.h" #include "test_task_pt_regs.skel.h"
void test_task_pt_regs(void) void test_task_pt_regs(void)
......
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#define VERDICT_REJECT 0
#define VERDICT_PROCEED 1
SEC("cgroup/connect4")
int connect_v4_dropper(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_STREAM)
return VERDICT_PROCEED;
if (ctx->user_port == bpf_htons(60123))
return VERDICT_REJECT;
return VERDICT_PROCEED;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/ptrace.h> #include "vmlinux.h"
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
struct pt_regs current_regs = {}; #define PT_REGS_SIZE sizeof(struct pt_regs)
struct pt_regs ctx_regs = {};
/*
* The kernel struct pt_regs isn't exported in its entirety to userspace.
* Pass it as an array to task_pt_regs.c
*/
char current_regs[PT_REGS_SIZE] = {};
char ctx_regs[PT_REGS_SIZE] = {};
int uprobe_res = 0; int uprobe_res = 0;
SEC("uprobe/trigger_func") SEC("uprobe/trigger_func")
...@@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx) ...@@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx)
current = bpf_get_current_task_btf(); current = bpf_get_current_task_btf();
regs = (struct pt_regs *) bpf_task_pt_regs(current); regs = (struct pt_regs *) bpf_task_pt_regs(current);
__builtin_memcpy(&current_regs, regs, sizeof(*regs)); if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
__builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx)); return 0;
if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
return 0;
/* Prove that uprobe was run */ /* Prove that uprobe was run */
uprobe_res = 1; uprobe_res = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment