Commit 256becd4 authored by Xuesen Huang's avatar Xuesen Huang Committed by Daniel Borkmann

selftests, bpf: Extend test_tc_tunnel test with vxlan

Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
encapsulates the ethernet as the inner l2 header.

Update a vxlan encapsulation test case.
Signed-off-by: default avatarXuesen Huang <huangxuesen@kuaishou.com>
Signed-off-by: default avatarLi Wang <wangli09@kuaishou.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210305123347.15311-1-hxseverything@gmail.com
parent d01b59c9
...@@ -24,14 +24,29 @@ static const int cfg_port = 8000; ...@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000; static const int cfg_udp_src = 20000;
#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
#define UDP_PORT 5555 #define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635 #define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777 #define ETH_OVER_UDP_PORT 7777
#define VXLAN_UDP_PORT 8472
#define EXTPROTO_VXLAN 0x1
#define VXLAN_N_VID (1u << 24)
#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
#define VXLAN_FLAGS 0x8
#define VXLAN_VNI 1
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */ /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff); MPLS_LS_S_MASK | 0xff);
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
} __attribute__((packed));
struct gre_hdr { struct gre_hdr {
__be16 flags; __be16 flags;
__be16 protocol; __be16 protocol;
...@@ -45,13 +60,13 @@ union l4hdr { ...@@ -45,13 +60,13 @@ union l4hdr {
struct v4hdr { struct v4hdr {
struct iphdr ip; struct iphdr ip;
union l4hdr l4hdr; union l4hdr l4hdr;
__u8 pad[16]; /* enough space for L2 header */ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed)); } __attribute__((packed));
struct v6hdr { struct v6hdr {
struct ipv6hdr ip; struct ipv6hdr ip;
union l4hdr l4hdr; union l4hdr l4hdr;
__u8 pad[16]; /* enough space for L2 header */ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed)); } __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph) static __always_inline void set_ipv4_csum(struct iphdr *iph)
...@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) ...@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16)); iph->check = ~((csum & 0xffff) + (csum >> 16));
} }
static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto) __u16 l2_proto, __u16 ext_proto)
{ {
__u16 udp_dst = UDP_PORT; __u16 udp_dst = UDP_PORT;
struct iphdr iph_inner; struct iphdr iph_inner;
struct v4hdr h_outer; struct v4hdr h_outer;
struct tcphdr tcph; struct tcphdr tcph;
int olen, l2_len; int olen, l2_len;
__u8 *l2_hdr = NULL;
int tcp_off; int tcp_off;
__u64 flags; __u64 flags;
...@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, ...@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break; break;
case ETH_P_TEB: case ETH_P_TEB:
l2_len = ETH_HLEN; l2_len = ETH_HLEN;
udp_dst = ETH_OVER_UDP_PORT; if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
l2_len += sizeof(struct vxlanhdr);
} else
udp_dst = ETH_OVER_UDP_PORT;
break; break;
} }
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
...@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, ...@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
} }
/* add L2 encap (if specified) */ /* add L2 encap (if specified) */
l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) { switch (l2_proto) {
case ETH_P_MPLS_UC: case ETH_P_MPLS_UC:
*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; *(__u32 *)l2_hdr = mpls_label;
break; break;
case ETH_P_TEB: case ETH_P_TEB:
if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
ETH_HLEN))
if (ext_proto & EXTPROTO_VXLAN) {
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
l2_hdr += sizeof(struct vxlanhdr);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT; return TC_ACT_SHOT;
break; break;
} }
olen += l2_len; olen += l2_len;
...@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, ...@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK; return TC_ACT_OK;
} }
static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto) __u16 l2_proto)
{
return __encap_ipv4(skb, encap_proto, l2_proto, 0);
}
static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto, __u16 ext_proto)
{ {
__u16 udp_dst = UDP_PORT; __u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner; struct ipv6hdr iph_inner;
struct v6hdr h_outer; struct v6hdr h_outer;
struct tcphdr tcph; struct tcphdr tcph;
int olen, l2_len; int olen, l2_len;
__u8 *l2_hdr = NULL;
__u16 tot_len; __u16 tot_len;
__u64 flags; __u64 flags;
...@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, ...@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break; break;
case ETH_P_TEB: case ETH_P_TEB:
l2_len = ETH_HLEN; l2_len = ETH_HLEN;
udp_dst = ETH_OVER_UDP_PORT; if (ext_proto & EXTPROTO_VXLAN) {
udp_dst = VXLAN_UDP_PORT;
l2_len += sizeof(struct vxlanhdr);
} else
udp_dst = ETH_OVER_UDP_PORT;
break; break;
} }
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
...@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, ...@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
sizeof(h_outer.l4hdr.udp); sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0; h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len); h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break; break;
...@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, ...@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
} }
/* add L2 encap (if specified) */ /* add L2 encap (if specified) */
l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) { switch (l2_proto) {
case ETH_P_MPLS_UC: case ETH_P_MPLS_UC:
*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; *(__u32 *)l2_hdr = mpls_label;
break; break;
case ETH_P_TEB: case ETH_P_TEB:
if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
ETH_HLEN))
if (ext_proto & EXTPROTO_VXLAN) {
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
l2_hdr += sizeof(struct vxlanhdr);
}
if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT; return TC_ACT_SHOT;
break; break;
} }
...@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, ...@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK; return TC_ACT_OK;
} }
static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
return __encap_ipv6(skb, encap_proto, l2_proto, 0);
}
SEC("encap_ipip_none") SEC("encap_ipip_none")
int __encap_ipip_none(struct __sk_buff *skb) int __encap_ipip_none(struct __sk_buff *skb)
{ {
...@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb) ...@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("encap_vxlan_eth")
int __encap_vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
return __encap_ipv4(skb, IPPROTO_UDP,
ETH_P_TEB,
EXTPROTO_VXLAN);
else
return TC_ACT_OK;
}
SEC("encap_sit_none") SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb) int __encap_sit_none(struct __sk_buff *skb)
{ {
...@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb) ...@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("encap_ip6vxlan_eth")
int __encap_ip6vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
return __encap_ipv6(skb, IPPROTO_UDP,
ETH_P_TEB,
EXTPROTO_VXLAN);
else
return TC_ACT_OK;
}
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{ {
char buf[sizeof(struct v6hdr)]; char buf[sizeof(struct v6hdr)];
...@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) ...@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT: case ETH_OVER_UDP_PORT:
olen += ETH_HLEN; olen += ETH_HLEN;
break; break;
case VXLAN_UDP_PORT:
olen += ETH_HLEN + sizeof(struct vxlanhdr);
break;
} }
break; break;
default: default:
......
...@@ -44,8 +44,8 @@ setup() { ...@@ -44,8 +44,8 @@ setup() {
# clamp route to reserve room for tunnel headers # clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main ip -netns "${ns1}" -6 route flush table main
ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1 ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1 ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
sleep 1 sleep 1
...@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then ...@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
echo "sit" echo "sit"
$0 ipv6 sit none 100 $0 ipv6 sit none 100
echo "ip4 vxlan"
$0 ipv4 vxlan eth 2000
echo "ip6 vxlan"
$0 ipv6 ip6vxlan eth 2000
for mac in none mpls eth ; do for mac in none mpls eth ; do
echo "ip gre $mac" echo "ip gre $mac"
$0 ipv4 gre $mac 100 $0 ipv4 gre $mac 100
...@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then ...@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
targs="encap fou encap-sport auto encap-dport $dport" targs="encap fou encap-sport auto encap-dport $dport"
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
ttype=$gretaptype ttype=$gretaptype
elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
ttype="vxlan"
targs="id 1 dstport 8472 udp6zerocsumrx"
else else
ttype=$tuntype ttype=$tuntype
targs="" targs=""
...@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then ...@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
# No support for TEB fou tunnel; expect failure. # No support for TEB fou tunnel; expect failure.
expect_tun_fail=1 expect_tun_fail=1
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
# Share ethernet address between tunnel/veth2 so L2 decap works. # Share ethernet address between tunnel/veth2 so L2 decap works.
ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \ ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
awk '/ether/ { print $2 }') awk '/ether/ { print $2 }')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment