Commit 1115169f authored by Paul Chaignon's avatar Paul Chaignon Committed by Daniel Borkmann

selftests/bpf: Don't assign outer source IP to host

The previous commit fixed a bug in the bpf_skb_set_tunnel_key helper to
avoid dropping packets whose outer source IP address isn't assigned to a
host interface. This commit changes the corresponding selftest to not
assign the outer source IP address to an interface.

Not assigning the source IP to an interface causes two issues in the
existing test:

1. The ARP requests will fail for that IP address so we need to add the
   ARP entry manually.
2. The encapsulated ICMP echo reply traffic will not reach the VXLAN
   device. It will be dropped by the stack before, because the
   outer destination IP is unknown.

To solve 2., we have two choices. Either we perform decapsulation
ourselves in a BPF program attached at veth1 (the base device for the
VXLAN device), or we switch the outer destination address when we
receive the packet at veth1, such that the stack properly demultiplexes
it to the VXLAN device afterward.

This commit implements the second approach, where we switch the outer
destination address from the unassigned IP address to the assigned one,
only for VXLAN traffic ingressing veth1.

Then, at the vxlan device, the BPF program that checks the output of
bpf_skb_get_tunnel_key needs to be updated as the expected local IP
address is now the unassigned one.
Signed-off-by: default avatarPaul Chaignon <paul@isovalent.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/4addde76eaf3477a58975bef15ed2788c44e5f55.1658759380.git.paul@isovalent.com
parent b8fff748
...@@ -82,6 +82,7 @@ ...@@ -82,6 +82,7 @@
#define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
#define MAC_TUNL_DEV1 "52:54:00:d9:02:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
#define MAC_VETH1 "52:54:00:d9:03:00"
#define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV0 "vxlan00"
#define VXLAN_TUNL_DEV1 "vxlan11" #define VXLAN_TUNL_DEV1 "vxlan11"
...@@ -108,10 +109,9 @@ ...@@ -108,10 +109,9 @@
static int config_device(void) static int config_device(void)
{ {
SYS("ip netns add at_ns0"); SYS("ip netns add at_ns0");
SYS("ip link add veth0 type veth peer name veth1"); SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
SYS("ip link set veth0 netns at_ns0"); SYS("ip link set veth0 netns at_ns0");
SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1");
SYS("ip link set dev veth1 up mtu 1500"); SYS("ip link set dev veth1 up mtu 1500");
SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
...@@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) ...@@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void)
VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
IP4_ADDR2_VETH1, MAC_VETH1);
/* root namespace */ /* root namespace */
SYS("ip link add dev %s type vxlan external gbp dstport 4789", SYS("ip link add dev %s type vxlan external gbp dstport 4789",
...@@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) ...@@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void)
if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
goto done; goto done;
/* load and attach bpf prog to veth dev tc hook point */
ifindex = if_nametoindex("veth1");
if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
goto done;
tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
goto done;
if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0"); nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src")) if (!ASSERT_OK_PTR(nstoken, "setns src"))
......
...@@ -14,15 +14,24 @@ ...@@ -14,15 +14,24 @@
#include <linux/if_packet.h> #include <linux/if_packet.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/icmp.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/socket.h> #include <linux/socket.h>
#include <linux/pkt_cls.h> #include <linux/pkt_cls.h>
#include <linux/erspan.h> #include <linux/erspan.h>
#include <linux/udp.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #include <bpf/bpf_endian.h>
#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
#define VXLAN_UDP_PORT 4789
/* Only IPv4 address assigned to veth1.
* 172.16.1.200
*/
#define ASSIGNED_ADDR_VETH1 0xac1001c8
struct geneve_opt { struct geneve_opt {
__be16 opt_class; __be16 opt_class;
__u8 type; __u8 type;
...@@ -33,6 +42,11 @@ struct geneve_opt { ...@@ -33,6 +42,11 @@ struct geneve_opt {
__u8 opt_data[8]; /* hard-coded to 8 byte */ __u8 opt_data[8]; /* hard-coded to 8 byte */
}; };
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
} __attribute__((packed));
struct vxlan_metadata { struct vxlan_metadata {
__u32 gbp; __u32 gbp;
}; };
...@@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) ...@@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
int ret; int ret;
struct bpf_tunnel_key key; struct bpf_tunnel_key key;
struct vxlan_metadata md; struct vxlan_metadata md;
__u32 orig_daddr;
__u32 index = 0; __u32 index = 0;
__u32 *local_ip = NULL;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) { if (ret < 0) {
...@@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) ...@@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
return TC_ACT_SHOT; return TC_ACT_SHOT;
} }
if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) {
bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n",
key.tunnel_id, key.local_ipv4, key.tunnel_id, key.local_ipv4,
key.remote_ipv4, md.gbp); key.remote_ipv4, md.gbp);
bpf_printk("local_ip 0x%x\n", *local_ip);
log_err(ret); log_err(ret);
return TC_ACT_SHOT; return TC_ACT_SHOT;
} }
...@@ -402,6 +409,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) ...@@ -402,6 +409,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("tc")
int veth_set_outer_dst(struct __sk_buff *skb)
{
struct ethhdr *eth = (struct ethhdr *)(long)skb->data;
__u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1);
void *data_end = (void *)(long)skb->data_end;
struct udphdr *udph;
struct iphdr *iph;
__u32 index = 0;
int ret = 0;
int shrink;
__s64 csum;
if ((void *)eth + sizeof(*eth) > data_end) {
log_err(ret);
return TC_ACT_SHOT;
}
if (eth->h_proto != bpf_htons(ETH_P_IP))
return TC_ACT_OK;
iph = (struct iphdr *)(eth + 1);
if ((void *)iph + sizeof(*iph) > data_end) {
log_err(ret);
return TC_ACT_SHOT;
}
if (iph->protocol != IPPROTO_UDP)
return TC_ACT_OK;
udph = (struct udphdr *)(iph + 1);
if ((void *)udph + sizeof(*udph) > data_end) {
log_err(ret);
return TC_ACT_SHOT;
}
if (udph->dest != bpf_htons(VXLAN_UDP_PORT))
return TC_ACT_OK;
if (iph->daddr != assigned_ip) {
csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip,
sizeof(__u32), 0);
if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr),
&assigned_ip, sizeof(__u32), 0) < 0) {
log_err(ret);
return TC_ACT_SHOT;
}
if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
0, csum, 0) < 0) {
log_err(ret);
return TC_ACT_SHOT;
}
bpf_skb_change_type(skb, PACKET_HOST);
}
return TC_ACT_OK;
}
SEC("tc") SEC("tc")
int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment