Commit c4423a17 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Add source ip in bpf tunnel key'

Kaixi Fan says:

====================
From: Kaixi Fan <fankaixi.li@bytedance.com>

Now bpf code could not set tunnel source ip address of ip tunnel. So it
could not support flow based tunnel mode completely. Because flow based
tunnel mode could set tunnel source, destination ip address and tunnel
key simultaneously.

Flow based tunnel is useful for overlay networks. And by configuring tunnel
source ip address, user could make their networks more elastic.
For example, tunnel source ip could be used to select different egress
nic interface for different flows with same tunnel destination ip. Another
example, user could choose one of multiple ip address of the egress nic
interface as the packet's tunnel source ip.

Add tunnel and tunnel source testcases in test_progs. Other types of
tunnel testcases would be moved to test_progs step by step in the
future.

v6:
- use libbpf api to attach tc progs and remove some shell commands to reduce
  test runtime based on Alexei Starovoitov's suggestion

v5:
- fix some code format errors
- use bpf kernel code at namespace at_ns0 to set tunnel metadata

v4:
- fix subject error of first patch

v3:
- move vxlan tunnel testcases to test_progs
- replace bpf_trace_printk with bpf_printk
- rename bpf kernel prog section name to tic

v2:
- merge vxlan tunnel and tunnel source ip testcases in test_tunnel.sh
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents bd2331b3 71b2ec21
...@@ -5604,6 +5604,10 @@ struct bpf_tunnel_key { ...@@ -5604,6 +5604,10 @@ struct bpf_tunnel_key {
__u8 tunnel_ttl; __u8 tunnel_ttl;
__u16 tunnel_ext; /* Padding, future use. */ __u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label; __u32 tunnel_label;
union {
__u32 local_ipv4;
__u32 local_ipv6[4];
};
}; };
/* user accessible mirror of in-kernel xfrm_state. /* user accessible mirror of in-kernel xfrm_state.
......
...@@ -4498,6 +4498,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key ...@@ -4498,6 +4498,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
if (unlikely(size != sizeof(struct bpf_tunnel_key))) { if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
err = -EINVAL; err = -EINVAL;
switch (size) { switch (size) {
case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, tunnel_ext):
goto set_compat; goto set_compat;
...@@ -4523,10 +4524,14 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key ...@@ -4523,10 +4524,14 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
if (flags & BPF_F_TUNINFO_IPV6) { if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src, memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6)); sizeof(to->remote_ipv6));
memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
sizeof(to->local_ipv6));
to->tunnel_label = be32_to_cpu(info->key.label); to->tunnel_label = be32_to_cpu(info->key.label);
} else { } else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
to->tunnel_label = 0; to->tunnel_label = 0;
} }
...@@ -4597,6 +4602,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, ...@@ -4597,6 +4602,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
return -EINVAL; return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) { if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) { switch (size) {
case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, tunnel_ext):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
...@@ -4639,10 +4645,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, ...@@ -4639,10 +4645,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->mode |= IP_TUNNEL_INFO_IPV6; info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6)); sizeof(from->remote_ipv6));
memcpy(&info->key.u.ipv6.src, from->local_ipv6,
sizeof(from->local_ipv6));
info->key.label = cpu_to_be32(from->tunnel_label) & info->key.label = cpu_to_be32(from->tunnel_label) &
IPV6_FLOWLABEL_MASK; IPV6_FLOWLABEL_MASK;
} else { } else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
} }
return 0; return 0;
......
...@@ -5604,6 +5604,10 @@ struct bpf_tunnel_key { ...@@ -5604,6 +5604,10 @@ struct bpf_tunnel_key {
__u8 tunnel_ttl; __u8 tunnel_ttl;
__u16 tunnel_ext; /* Padding, future use. */ __u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label; __u32 tunnel_label;
union {
__u32 local_ipv4;
__u32 local_ipv6[4];
};
}; };
/* user accessible mirror of in-kernel xfrm_state. /* user accessible mirror of in-kernel xfrm_state.
......
This diff is collapsed.
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
# 6) Forward the packet to the overlay tnl dev # 6) Forward the packet to the overlay tnl dev
BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
PING_ARG="-c 3 -w 10 -q" PING_ARG="-c 3 -w 10 -q"
ret=0 ret=0
GREEN='\033[0;92m' GREEN='\033[0;92m'
...@@ -155,52 +156,6 @@ add_ip6erspan_tunnel() ...@@ -155,52 +156,6 @@ add_ip6erspan_tunnel()
ip link set dev $DEV up ip link set dev $DEV up
} }
add_vxlan_tunnel()
{
# Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets,
# causing errors at get_tunnel_{key/opt}.
# at_ns0 namespace
ip netns exec at_ns0 \
ip link add dev $DEV_NS type $TYPE \
id 2 dstport 4789 gbp remote 172.16.1.200
ip netns exec at_ns0 \
ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
ip netns exec at_ns0 \
ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS
ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
# root namespace
ip link add dev $DEV type $TYPE external gbp dstport 4789
ip link set dev $DEV address 52:54:00:d9:02:00 up
ip addr add dev $DEV 10.1.1.200/24
ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV
}
add_ip6vxlan_tunnel()
{
#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
ip netns exec at_ns0 ip link set dev veth0 up
#ip -4 addr del 172.16.1.200 dev veth1
ip -6 addr add dev veth1 ::22/96
ip link set dev veth1 up
# at_ns0 namespace
ip netns exec at_ns0 \
ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
local ::11 remote ::22
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
ip netns exec at_ns0 ip link set dev $DEV_NS up
# root namespace
ip link add dev $DEV type $TYPE external dstport 4789
ip addr add dev $DEV 10.1.1.200/24
ip link set dev $DEV up
}
add_geneve_tunnel() add_geneve_tunnel()
{ {
# at_ns0 namespace # at_ns0 namespace
...@@ -403,58 +358,6 @@ test_ip6erspan() ...@@ -403,58 +358,6 @@ test_ip6erspan()
echo -e ${GREEN}"PASS: $TYPE"${NC} echo -e ${GREEN}"PASS: $TYPE"${NC}
} }
test_vxlan()
{
TYPE=vxlan
DEV_NS=vxlan00
DEV=vxlan11
ret=0
check $TYPE
config_device
add_vxlan_tunnel
attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
ping $PING_ARG 10.1.1.100
check_err $?
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
check_err $?
cleanup
if [ $ret -ne 0 ]; then
echo -e ${RED}"FAIL: $TYPE"${NC}
return 1
fi
echo -e ${GREEN}"PASS: $TYPE"${NC}
}
test_ip6vxlan()
{
TYPE=vxlan
DEV_NS=ip6vxlan00
DEV=ip6vxlan11
ret=0
check $TYPE
config_device
add_ip6vxlan_tunnel
ip link set dev veth1 mtu 1500
attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
# underlay
ping6 $PING_ARG ::11
# ip4 over ip6
ping $PING_ARG 10.1.1.100
check_err $?
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
check_err $?
cleanup
if [ $ret -ne 0 ]; then
echo -e ${RED}"FAIL: ip6$TYPE"${NC}
return 1
fi
echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
}
test_geneve() test_geneve()
{ {
TYPE=geneve TYPE=geneve
...@@ -641,9 +544,11 @@ test_xfrm_tunnel() ...@@ -641,9 +544,11 @@ test_xfrm_tunnel()
config_device config_device
> /sys/kernel/debug/tracing/trace > /sys/kernel/debug/tracing/trace
setup_xfrm_tunnel setup_xfrm_tunnel
mkdir -p ${BPF_PIN_TUNNEL_DIR}
bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}
tc qdisc add dev veth1 clsact tc qdisc add dev veth1 clsact
tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ tc filter add dev veth1 proto ip ingress bpf da object-pinned \
sec xfrm_get_state ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
sleep 1 sleep 1
grep "reqid 1" /sys/kernel/debug/tracing/trace grep "reqid 1" /sys/kernel/debug/tracing/trace
...@@ -666,13 +571,17 @@ attach_bpf() ...@@ -666,13 +571,17 @@ attach_bpf()
DEV=$1 DEV=$1
SET=$2 SET=$2
GET=$3 GET=$3
mkdir -p ${BPF_PIN_TUNNEL_DIR}
bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/
tc qdisc add dev $DEV clsact tc qdisc add dev $DEV clsact
tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
} }
cleanup() cleanup()
{ {
rm -rf ${BPF_PIN_TUNNEL_DIR}
ip netns delete at_ns0 2> /dev/null ip netns delete at_ns0 2> /dev/null
ip link del veth1 2> /dev/null ip link del veth1 2> /dev/null
ip link del ipip11 2> /dev/null ip link del ipip11 2> /dev/null
...@@ -681,8 +590,6 @@ cleanup() ...@@ -681,8 +590,6 @@ cleanup()
ip link del gretap11 2> /dev/null ip link del gretap11 2> /dev/null
ip link del ip6gre11 2> /dev/null ip link del ip6gre11 2> /dev/null
ip link del ip6gretap11 2> /dev/null ip link del ip6gretap11 2> /dev/null
ip link del vxlan11 2> /dev/null
ip link del ip6vxlan11 2> /dev/null
ip link del geneve11 2> /dev/null ip link del geneve11 2> /dev/null
ip link del ip6geneve11 2> /dev/null ip link del ip6geneve11 2> /dev/null
ip link del erspan11 2> /dev/null ip link del erspan11 2> /dev/null
...@@ -714,7 +621,6 @@ enable_debug() ...@@ -714,7 +621,6 @@ enable_debug()
{ {
echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
} }
...@@ -750,14 +656,6 @@ bpf_tunnel_test() ...@@ -750,14 +656,6 @@ bpf_tunnel_test()
test_ip6erspan v2 test_ip6erspan v2
errors=$(( $errors + $? )) errors=$(( $errors + $? ))
echo "Testing VXLAN tunnel..."
test_vxlan
errors=$(( $errors + $? ))
echo "Testing IP6VXLAN tunnel..."
test_ip6vxlan
errors=$(( $errors + $? ))
echo "Testing GENEVE tunnel..." echo "Testing GENEVE tunnel..."
test_geneve test_geneve
errors=$(( $errors + $? )) errors=$(( $errors + $? ))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment