Commit 403f3e8f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'add-bpf_xdp_get_xfrm_state-kfunc'

Daniel Xu says:

====================
Add bpf_xdp_get_xfrm_state() kfunc

This patchset adds two kfunc helpers, bpf_xdp_get_xfrm_state() and
bpf_xdp_xfrm_state_release() that wrap xfrm_state_lookup() and
xfrm_state_put(). The intent is to support software RSS (via XDP) for
the ongoing/upcoming ipsec pcpu work [0]. Recent experiments performed
on (hopefully) reproducible AWS testbeds indicate that single tunnel
pcpu ipsec can reach line rate on 100G ENA nics.

Note this patchset only tests/shows generic xfrm_state access. The
"secret sauce" (if you can really even call it that) involves accessing
a soon-to-be-upstreamed pcpu_num field in xfrm_state. Early example is
available here [1].

[0]: https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/03/
[1]: https://github.com/danobi/xdp-tools/blob/e89a1c617aba3b50d990f779357d6ce2863ecb27/xdp-bench/xdp_redirect_cpumap.bpf.c#L385-L406

Changes from v5:
* Improve kfunc doc comments
* Remove extraneous replay-window setting on selftest reverse path
* Squash two kfunc commits into one
* Rebase to bpf-next to pick up bitfield write patches
* Remove testing of opts.error in selftest prog

Changes from v4:
* Fixup commit message for selftest
* Set opts->error -ENOENT for !x
* Revert single file xfrm + bpf

Changes from v3:
* Place all xfrm bpf integrations in xfrm_bpf.c
* Avoid using nval as a temporary
* Rebase to bpf-next
* Remove extraneous __failure_unpriv annotation for verifier tests

Changes from v2:
* Fix/simplify BPF_CORE_WRITE_BITFIELD() algorithm
* Added verifier tests for bitfield writes
* Fix state leakage across test_tunnel subtests

Changes from v1:
* Move xfrm tunnel tests to test_progs
* Fix writing to opts->error when opts is invalid
* Use __bpf_kfunc_start_defs()
* Remove unused vxlanhdr definition
* Add and use BPF_CORE_WRITE_BITFIELD() macro
* Make series bisect clean

Changes from RFCv2:
* Rebased to ipsec-next
* Fix netns leak

Changes from RFCv1:
* Add Antony's commit tags
* Add KF_ACQUIRE and KF_RELEASE semantics
====================
Reviewed-by: default avatarEyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/cover.1702593901.git.dxu@dxuuu.xyzSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 56925f38 2cd07b0e
......@@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
#endif
#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
int register_xfrm_state_bpf(void);
#else
static inline int register_xfrm_state_bpf(void)
{
return 0;
}
#endif
#endif /* _NET_XFRM_H */
......@@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
......@@ -4218,6 +4218,8 @@ void __init xfrm_init(void)
#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
register_xfrm_state_bpf();
}
#ifdef CONFIG_AUDITSYSCALL
......
// SPDX-License-Identifier: GPL-2.0-only
/* Unstable XFRM state BPF helpers.
*
* Note that it is allowed to break compatibility for these functions since the
* interface they are exposed through to BPF programs is explicitly unstable.
*/
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <net/xdp.h>
#include <net/xfrm.h>
/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
*
* Members:
* @error - Out parameter, set for any errors encountered
* Values:
* -EINVAL - netns_id is less than -1
* -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
* -ENONET - No network namespace found for netns_id
* -ENOENT - No xfrm_state found
* @netns_id - Specify the network namespace for lookup
* Values:
* BPF_F_CURRENT_NETNS (-1)
* Use namespace associated with ctx
* [0, S32_MAX]
* Network Namespace ID
* @mark - XFRM mark to match on
* @daddr - Destination address to match on
* @spi - Security parameter index to match on
* @proto - IP protocol to match on (eg. IPPROTO_ESP)
* @family - Protocol family to match on (AF_INET/AF_INET6)
*/
struct bpf_xfrm_state_opts {
s32 error;
s32 netns_id;
u32 mark;
xfrm_address_t daddr;
__be32 spi;
u8 proto;
u16 family;
};
enum {
BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
};
__bpf_kfunc_start_defs();
/* bpf_xdp_get_xfrm_state - Get XFRM state
*
* A `struct xfrm_state *`, if found, must be released with a corresponding
* bpf_xdp_xfrm_state_release.
*
* Parameters:
* @ctx - Pointer to ctx (xdp_md) in XDP program
* Cannot be NULL
* @opts - Options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_xfrm_state_opts structure
* Must be BPF_XFRM_STATE_OPTS_SZ
*/
__bpf_kfunc struct xfrm_state *
bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
{
struct xdp_buff *xdp = (struct xdp_buff *)ctx;
struct net *net = dev_net(xdp->rxq->dev);
struct xfrm_state *x;
if (!opts || opts__sz < sizeof(opts->error))
return NULL;
if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
opts->error = -EINVAL;
return NULL;
}
if (opts->netns_id >= 0) {
net = get_net_ns_by_id(net, opts->netns_id);
if (unlikely(!net)) {
opts->error = -ENONET;
return NULL;
}
}
x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
opts->proto, opts->family);
if (opts->netns_id >= 0)
put_net(net);
if (!x)
opts->error = -ENOENT;
return x;
}
/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
*
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
* the program if any references remain in the program in all of the explored
* states.
*
* Parameters:
* @x - Pointer to referenced xfrm_state object, obtained using
* bpf_xdp_get_xfrm_state.
*/
__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
{
xfrm_state_put(x);
}
__bpf_kfunc_end_defs();
BTF_SET8_START(xfrm_state_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
BTF_SET8_END(xfrm_state_kfunc_set)
static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
.owner = THIS_MODULE,
.set = &xfrm_state_kfunc_set,
};
int __init register_xfrm_state_bpf(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
&xfrm_state_xdp_kfunc_set);
}
......@@ -50,6 +50,7 @@
*/
#include <arpa/inet.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
......@@ -92,6 +93,11 @@
#define IPIP_TUNL_DEV0 "ipip00"
#define IPIP_TUNL_DEV1 "ipip11"
#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
#define XFRM_ENC "0x22222222222222222222222222222222"
#define XFRM_SPI_IN_TO_OUT 0x1
#define XFRM_SPI_OUT_TO_IN 0x2
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
......@@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void)
SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
}
static int add_xfrm_tunnel(void)
{
/* at_ns0 namespace
* at_ns0 -> root
*/
SYS(fail,
"ip netns exec at_ns0 "
"ip xfrm state add src %s dst %s proto esp "
"spi %d reqid 1 mode tunnel replay-window 42 "
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
SYS(fail,
"ip netns exec at_ns0 "
"ip xfrm policy add src %s/32 dst %s/32 dir out "
"tmpl src %s dst %s proto esp reqid 1 "
"mode tunnel",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
/* root -> at_ns0 */
SYS(fail,
"ip netns exec at_ns0 "
"ip xfrm state add src %s dst %s proto esp "
"spi %d reqid 2 mode tunnel "
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
SYS(fail,
"ip netns exec at_ns0 "
"ip xfrm policy add src %s/32 dst %s/32 dir in "
"tmpl src %s dst %s proto esp reqid 2 "
"mode tunnel",
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
/* address & route */
SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
IP4_ADDR_TUNL_DEV0);
SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
/* root namespace
* at_ns0 -> root
*/
SYS(fail,
"ip xfrm state add src %s dst %s proto esp "
"spi %d reqid 1 mode tunnel replay-window 42 "
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
SYS(fail,
"ip xfrm policy add src %s/32 dst %s/32 dir in "
"tmpl src %s dst %s proto esp reqid 1 "
"mode tunnel",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
/* root -> at_ns0 */
SYS(fail,
"ip xfrm state add src %s dst %s proto esp "
"spi %d reqid 2 mode tunnel "
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
SYS(fail,
"ip xfrm policy add src %s/32 dst %s/32 dir out "
"tmpl src %s dst %s proto esp reqid 2 "
"mode tunnel",
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
/* address & route */
SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
SYS(fail, "ip route add %s dev veth1 via %s src %s",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
return 0;
fail:
return -1;
}
static void delete_xfrm_tunnel(void)
{
SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
......@@ -532,25 +624,85 @@ static void test_ipip_tunnel(enum ipip_encap encap)
test_tunnel_kern__destroy(skel);
}
static void test_xfrm_tunnel(void)
{
DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
.attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct test_tunnel_kern *skel = NULL;
struct nstoken *nstoken;
int xdp_prog_fd;
int tc_prog_fd;
int ifindex;
int err;
err = add_xfrm_tunnel();
if (!ASSERT_OK(err, "add_xfrm_tunnel"))
return;
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
ifindex = if_nametoindex("veth1");
if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
goto done;
/* attach tc prog to tunnel dev */
tc_hook.ifindex = ifindex;
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
goto done;
if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
goto done;
/* attach xdp prog to tunnel dev */
xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
goto done;
err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
if (!ASSERT_OK(err, "bpf_xdp_attach"))
goto done;
/* ping from at_ns0 namespace test */
nstoken = open_netns("at_ns0");
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
close_netns(nstoken);
if (!ASSERT_OK(err, "test_ping"))
goto done;
if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
goto done;
if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
goto done;
if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
goto done;
if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
goto done;
done:
delete_xfrm_tunnel();
if (skel)
test_tunnel_kern__destroy(skel);
}
#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
config_device(); \
test_ ## name(__VA_ARGS__); \
cleanup(); \
} \
})
static void *test_tunnel_run_tests(void *arg)
{
cleanup();
config_device();
RUN_TEST(vxlan_tunnel);
RUN_TEST(ip6vxlan_tunnel);
RUN_TEST(ipip_tunnel, NONE);
RUN_TEST(ipip_tunnel, FOU);
RUN_TEST(ipip_tunnel, GUE);
cleanup();
RUN_TEST(xfrm_tunnel);
return NULL;
}
......
......@@ -26,6 +26,7 @@
#define IPV6_AUTOFLOWLABEL 70
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
#define SOL_TCP 6
......
......@@ -6,66 +6,34 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stddef.h>
#include <string.h>
#include <arpa/inet.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_tunnel.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/icmp.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/pkt_cls.h>
#include <linux/erspan.h>
#include <linux/udp.h>
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_kfuncs.h"
#include "bpf_tracing_net.h"
#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
#define VXLAN_UDP_PORT 4789
#define VXLAN_UDP_PORT 4789
#define ETH_P_IP 0x0800
#define PACKET_HOST 0
#define TUNNEL_CSUM bpf_htons(0x01)
#define TUNNEL_KEY bpf_htons(0x04)
/* Only IPv4 address assigned to veth1.
* 172.16.1.200
*/
#define ASSIGNED_ADDR_VETH1 0xac1001c8
struct geneve_opt {
__be16 opt_class;
__u8 type;
__u8 length:5;
__u8 r3:1;
__u8 r2:1;
__u8 r1:1;
__u8 opt_data[8]; /* hard-coded to 8 byte */
};
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
} __attribute__((packed));
struct vxlan_metadata {
__u32 gbp;
};
struct bpf_fou_encap {
__be16 sport;
__be16 dport;
};
enum bpf_fou_encap_type {
FOU_BPF_ENCAP_FOU,
FOU_BPF_ENCAP_GUE,
};
int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
struct bpf_fou_encap *encap, int type) __ksym;
int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
struct bpf_fou_encap *encap) __ksym;
struct xfrm_state *
bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
u32 opts__sz) __ksym;
void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
......@@ -205,9 +173,9 @@ int erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 7;
md.version = 2;
md.u.md2.dir = direction;
md.u.md2.hwid = hwid & 0xf;
md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
......@@ -246,8 +214,9 @@ int erspan_get_tunnel(struct __sk_buff *skb)
bpf_printk("\tindex %x\n", index);
#else
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
md.u.md2.dir,
(md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
(BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
bpf_ntohl(md.u.md2.timestamp));
#endif
......@@ -284,9 +253,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 17;
md.version = 2;
md.u.md2.dir = direction;
md.u.md2.hwid = hwid & 0xf;
md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
......@@ -326,8 +295,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
bpf_printk("\tindex %x\n", index);
#else
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
md.u.md2.dir,
(md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
(BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
bpf_ntohl(md.u.md2.timestamp));
#endif
......@@ -963,6 +933,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
volatile int xfrm_reqid = 0;
volatile int xfrm_spi = 0;
volatile int xfrm_remote_ip = 0;
SEC("tc")
int xfrm_get_state(struct __sk_buff *skb)
{
......@@ -973,10 +947,58 @@ int xfrm_get_state(struct __sk_buff *skb)
if (ret < 0)
return TC_ACT_OK;
bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
x.reqid, bpf_ntohl(x.spi),
bpf_ntohl(x.remote_ipv4));
xfrm_reqid = x.reqid;
xfrm_spi = bpf_ntohl(x.spi);
xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
return TC_ACT_OK;
}
volatile int xfrm_replay_window = 0;
SEC("xdp")
int xfrm_get_state_xdp(struct xdp_md *xdp)
{
struct bpf_xfrm_state_opts opts = {};
struct xfrm_state *x = NULL;
struct ip_esp_hdr *esph;
struct bpf_dynptr ptr;
u8 esph_buf[8] = {};
u8 iph_buf[20] = {};
struct iphdr *iph;
u32 off;
if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
goto out;
off = sizeof(struct ethhdr);
iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
if (!iph || iph->protocol != IPPROTO_ESP)
goto out;
off += sizeof(struct iphdr);
esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
if (!esph)
goto out;
opts.netns_id = BPF_F_CURRENT_NETNS;
opts.daddr.a4 = iph->daddr;
opts.spi = esph->spi;
opts.proto = IPPROTO_ESP;
opts.family = AF_INET;
x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
if (!x)
goto out;
if (!x->replay_esn)
goto out;
xfrm_replay_window = x->replay_esn->replay_window;
out:
if (x)
bpf_xdp_xfrm_state_release(x);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
......@@ -517,90 +517,6 @@ test_ip6ip6()
echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
}
setup_xfrm_tunnel()
{
auth=0x$(printf '1%.0s' {1..40})
enc=0x$(printf '2%.0s' {1..32})
spi_in_to_out=0x1
spi_out_to_in=0x2
# at_ns0 namespace
# at_ns0 -> root
ip netns exec at_ns0 \
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
spi $spi_in_to_out reqid 1 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip netns exec at_ns0 \
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
mode tunnel
# root -> at_ns0
ip netns exec at_ns0 \
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
spi $spi_out_to_in reqid 2 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip netns exec at_ns0 \
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
mode tunnel
# address & route
ip netns exec at_ns0 \
ip addr add dev veth0 10.1.1.100/32
ip netns exec at_ns0 \
ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
src 10.1.1.100
# root namespace
# at_ns0 -> root
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
spi $spi_in_to_out reqid 1 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
mode tunnel
# root -> at_ns0
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
spi $spi_out_to_in reqid 2 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
mode tunnel
# address & route
ip addr add dev veth1 10.1.1.200/32
ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
}
test_xfrm_tunnel()
{
if [[ -e /sys/kernel/tracing/trace ]]; then
TRACE=/sys/kernel/tracing/trace
else
TRACE=/sys/kernel/debug/tracing/trace
fi
config_device
> ${TRACE}
setup_xfrm_tunnel
mkdir -p ${BPF_PIN_TUNNEL_DIR}
bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
tc qdisc add dev veth1 clsact
tc filter add dev veth1 proto ip ingress bpf da object-pinned \
${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
sleep 1
grep "reqid 1" ${TRACE}
check_err $?
grep "spi 0x1" ${TRACE}
check_err $?
grep "remote ip 0xac100164" ${TRACE}
check_err $?
cleanup
if [ $ret -ne 0 ]; then
echo -e ${RED}"FAIL: xfrm tunnel"${NC}
return 1
fi
echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
}
attach_bpf()
{
DEV=$1
......@@ -630,10 +546,6 @@ cleanup()
ip link del ip6geneve11 2> /dev/null
ip link del erspan11 2> /dev/null
ip link del ip6erspan11 2> /dev/null
ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
}
cleanup_exit()
......@@ -716,10 +628,6 @@ bpf_tunnel_test()
test_ip6ip6
errors=$(( $errors + $? ))
echo "Testing IPSec tunnel..."
test_xfrm_tunnel
errors=$(( $errors + $? ))
return $errors
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment