Commit 2ce3206b authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-ancestor-cgroup-id'

Andrey Ignatov says:

====================
This patch set adds new BPF helper bpf_skb_ancestor_cgroup_id that returns
id of cgroup v2 that is ancestor of cgroup associated with the skb at the
ancestor_level.

The helper is useful to implement policies in TC based on cgroups that are
upper in hierarchy than immediate cgroup associated with skb.

v1->v2:
- more reliable check for testing IPv6 to become ready in selftest.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e8d2bec0 5ecd8c22
......@@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
}
/**
* cgroup_ancestor - find ancestor of cgroup
* @cgrp: cgroup to find ancestor of
* @ancestor_level: level of ancestor to find starting from root
*
* Find ancestor of cgroup at specified level starting from root if it exists
* and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
* @ancestor_level.
*
* This function is safe to call as long as @cgrp is accessible.
*/
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
int ancestor_level)
{
struct cgroup *ptr;
if (cgrp->level < ancestor_level)
return NULL;
for (ptr = cgrp;
ptr && ptr->level > ancestor_level;
ptr = cgroup_parent(ptr))
;
if (ptr && ptr->level == ancestor_level)
return ptr;
return NULL;
}
/**
* task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
* @task: the task to be tested
......
......@@ -2093,6 +2093,24 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *skb* at the *ancestor_level*. The root cgroup is at
* *ancestor_level* zero and each step down the hierarchy
* increments the level. If *ancestor_level* == level of cgroup
* associated with *skb*, then return value will be same as that
* of **bpf_skb_cgroup_id**\ ().
*
* The helper is useful to implement policies based on cgroups
* that are upper in hierarchy than immediate cgroup associated
* with *skb*.
*
* The format of returned id and helper limitations are same as in
* **bpf_skb_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
......@@ -2207,7 +2225,8 @@ union bpf_attr {
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport),
FN(sk_select_reuseport), \
FN(skb_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -3778,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
ancestor_level)
{
struct sock *sk = skb_to_full_sk(skb);
struct cgroup *ancestor;
struct cgroup *cgrp;
if (!sk || !sk_fullsock(sk))
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
return 0;
return ancestor->kn->id.id;
}
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
.func = bpf_skb_ancestor_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
#endif
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
......@@ -4966,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
case BPF_FUNC_skb_ancestor_cgroup_id:
return &bpf_skb_ancestor_cgroup_id_proto;
#endif
default:
return bpf_base_func_proto(func_id);
......
......@@ -2093,6 +2093,24 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *skb* at the *ancestor_level*. The root cgroup is at
* *ancestor_level* zero and each step down the hierarchy
* increments the level. If *ancestor_level* == level of cgroup
* associated with *skb*, then return value will be same as that
* of **bpf_skb_cgroup_id**\ ().
*
* The helper is useful to implement policies based on cgroups
* that are upper in hierarchy than immediate cgroup associated
* with *skb*.
*
* The format of returned id and helper limitations are same as in
* **bpf_skb_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
......@@ -2207,7 +2225,8 @@ union bpf_attr {
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport),
FN(sk_select_reuseport), \
FN(skb_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
......@@ -45,10 +46,11 @@ TEST_PROGS := test_kmod.sh \
test_sock_addr.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh
test_lirc_mode2.sh \
test_skb_cgroup_id.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
include ../lib.mk
......@@ -59,6 +61,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
......
......@@ -139,6 +139,10 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
(void *) BPF_FUNC_skb_cgroup_id;
static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
(void *) BPF_FUNC_skb_ancestor_cgroup_id;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2018 Facebook
set -eu
wait_for_ip()
{
local _i
echo -n "Wait for testing link-local IP to become available "
for _i in $(seq ${MAX_PING_TRIES}); do
echo -n "."
if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
echo " OK"
return
fi
sleep 1
done
echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
exit 1
}
setup()
{
# Create testing interfaces not to interfere with current environment.
ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
ip link set ${TEST_IF} up
ip link set ${TEST_IF_PEER} up
wait_for_ip
tc qdisc add dev ${TEST_IF} clsact
tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
sec ${BPF_PROG_SECTION} da
BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
awk '/ id / {sub(/.* id /, "", $0); print($1)}')
}
cleanup()
{
ip link del ${TEST_IF} 2>/dev/null || :
ip link del ${TEST_IF_PEER} 2>/dev/null || :
}
main()
{
trap cleanup EXIT 2 3 6 15
setup
${PROG} ${TEST_IF} ${BPF_PROG_ID}
}
DIR=$(dirname $0)
TEST_IF="test_cgid_1"
TEST_IF_PEER="test_cgid_2"
MAX_PING_TRIES=5
BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
BPF_PROG_SECTION="cgroup_id_logger"
BPF_PROG_ID=0
PROG="${DIR}/test_skb_cgroup_id_user"
main
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <string.h>
#include "bpf_helpers.h"
#define NUM_CGROUP_LEVELS 4
struct bpf_map_def SEC("maps") cgroup_ids = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = NUM_CGROUP_LEVELS,
};
static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
{
__u64 id;
/* [1] &level passed to external function that may change it, it's
* incompatible with loop unroll.
*/
id = bpf_skb_ancestor_cgroup_id(skb, level);
bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
}
SEC("cgroup_id_logger")
int log_cgroup_id(struct __sk_buff *skb)
{
/* Loop unroll can't be used here due to [1]. Unrolling manually.
* Number of calls should be in sync with NUM_CGROUP_LEVELS.
*/
log_nth_level(skb, 0);
log_nth_level(skb, 1);
log_nth_level(skb, 2);
log_nth_level(skb, 3);
return TC_ACT_OK;
}
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define CGROUP_PATH "/skb_cgroup_test"
#define NUM_CGROUP_LEVELS 4
/* RFC 4291, Section 2.7.1 */
#define LINKLOCAL_MULTICAST "ff02::1"
static int mk_dst_addr(const char *ip, const char *iface,
struct sockaddr_in6 *dst)
{
memset(dst, 0, sizeof(*dst));
dst->sin6_family = AF_INET6;
dst->sin6_port = htons(1025);
if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
log_err("Invalid IPv6: %s", ip);
return -1;
}
dst->sin6_scope_id = if_nametoindex(iface);
if (!dst->sin6_scope_id) {
log_err("Failed to get index of iface: %s", iface);
return -1;
}
return 0;
}
static int send_packet(const char *iface)
{
struct sockaddr_in6 dst;
char msg[] = "msg";
int err = 0;
int fd = -1;
if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
goto err;
fd = socket(AF_INET6, SOCK_DGRAM, 0);
if (fd == -1) {
log_err("Failed to create UDP socket");
goto err;
}
if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
sizeof(dst)) == -1) {
log_err("Failed to send datagram");
goto err;
}
goto out;
err:
err = -1;
out:
if (fd >= 0)
close(fd);
return err;
}
int get_map_fd_by_prog_id(int prog_id)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 map_ids[1];
int prog_fd = -1;
int map_fd = -1;
prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (prog_fd < 0) {
log_err("Failed to get fd by prog id %d", prog_id);
goto err;
}
info.nr_map_ids = 1;
info.map_ids = (__u64) (unsigned long) map_ids;
if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
log_err("Failed to get info by prog fd %d", prog_fd);
goto err;
}
if (!info.nr_map_ids) {
log_err("No maps found for prog fd %d", prog_fd);
goto err;
}
map_fd = bpf_map_get_fd_by_id(map_ids[0]);
if (map_fd < 0)
log_err("Failed to get fd by map id %d", map_ids[0]);
err:
if (prog_fd >= 0)
close(prog_fd);
return map_fd;
}
int check_ancestor_cgroup_ids(int prog_id)
{
__u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
__u32 level;
int err = 0;
int map_fd;
expected_ids[0] = 0x100000001; /* root cgroup */
expected_ids[1] = get_cgroup_id("");
expected_ids[2] = get_cgroup_id(CGROUP_PATH);
expected_ids[3] = 0; /* non-existent cgroup */
map_fd = get_map_fd_by_prog_id(prog_id);
if (map_fd < 0)
goto err;
for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
log_err("Failed to lookup key %d", level);
goto err;
}
if (actual_ids[level] != expected_ids[level]) {
log_err("%llx (actual) != %llx (expected), level: %u\n",
actual_ids[level], expected_ids[level], level);
goto err;
}
}
goto out;
err:
err = -1;
out:
if (map_fd >= 0)
close(map_fd);
return err;
}
int main(int argc, char **argv)
{
int cgfd = -1;
int err = 0;
if (argc < 3) {
fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
exit(EXIT_FAILURE);
}
if (setup_cgroup_environment())
goto err;
cgfd = create_and_get_cgroup(CGROUP_PATH);
if (!cgfd)
goto err;
if (join_cgroup(CGROUP_PATH))
goto err;
if (send_packet(argv[1]))
goto err;
if (check_ancestor_cgroup_ids(atoi(argv[2])))
goto err;
goto out;
err:
err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
printf("[%s]\n", err ? "FAIL" : "PASS");
return err;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment