Commit 3ad7a4b1 authored by Roopa Prabhu's avatar Roopa Prabhu Committed by David S. Miller

vxlan: support fdb and learning in COLLECT_METADATA mode

Vxlan COLLECT_METADATA mode today solves the per-vni netdev
scalability problem in l3 networks. It expects all forwarding
information to be present in dst_metadata. This patch series
enhances collect metadata mode to include the case where only
vni is present in dst_metadata, and the vxlan driver can then use
the rest of the forwarding information datbase to make forwarding
decisions. There is no change to default COLLECT_METADATA
behaviour. These changes only apply to COLLECT_METADATA when
used with the bridging use-case with a special dst_metadata
tunnel info flag (eg: where vxlan device is part of a bridge).
For all this to work, the vxlan driver will need to now support a
single fdb table hashed by mac + vni. This series essentially makes
this happen.

use-case and workflow:
vxlan collect metadata device participates in bridging vlan
to vn-segments. Bridge driver above the vxlan device,
sends the vni corresponding to the vlan in the dst_metadata.
vxlan driver will lookup forwarding database with (mac + vni)
for the required remote destination information to forward the
packet.

Changes introduced by this patch:
    - allow learning and forwarding database state in vxlan netdev in
      COLLECT_METADATA mode. Current behaviour is not changed
      by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
      to support the new bridge friendly mode.
    - A single fdb table hashed by (mac, vni) to allow fdb entries with
      multiple vnis in the same fdb table
    - rx path already has the vni
    - tx path expects a vni in the packet with dst_metadata
    - prior to this series, fdb remote_dsts carried remote vni and
      the vxlan device carrying the fdb table represented the
      source vni. With the vxlan device now representing multiple vnis,
      this patch adds a src vni attribute to the fdb entry. The remote
      vni already uses NDA_VNI attribute. This patch introduces
      NDA_SRC_VNI netlink attribute to represent the src vni in a multi
      vni fdb table.

iproute2 example (patched and pruned iproute2 output to just show
relevant fdb entries):
example shows same host mac learnt on two vni's.

before (netdev per vni):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self

after this patch with collect metadata in bridged mode (single netdev):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self
Signed-off-by: default avatarRoopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f35581d6
...@@ -75,6 +75,7 @@ struct vxlan_fdb { ...@@ -75,6 +75,7 @@ struct vxlan_fdb {
struct list_head remotes; struct list_head remotes;
u8 eth_addr[ETH_ALEN]; u8 eth_addr[ETH_ALEN];
u16 state; /* see ndm_state */ u16 state; /* see ndm_state */
__be32 vni;
u8 flags; /* see ndm_flags */ u8 flags; /* see ndm_flags */
}; };
...@@ -302,6 +303,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ...@@ -302,6 +303,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (rdst->remote_vni != vxlan->default_dst.remote_vni && if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure; goto nla_put_failure;
if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
nla_put_u32(skb, NDA_SRC_VNI,
be32_to_cpu(fdb->vni)))
goto nla_put_failure;
if (rdst->remote_ifindex && if (rdst->remote_ifindex &&
nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
goto nla_put_failure; goto nla_put_failure;
...@@ -400,34 +405,51 @@ static u32 eth_hash(const unsigned char *addr) ...@@ -400,34 +405,51 @@ static u32 eth_hash(const unsigned char *addr)
return hash_64(value, FDB_HASH_BITS); return hash_64(value, FDB_HASH_BITS);
} }
static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
{
/* use 1 byte of OUI and 3 bytes of NIC */
u32 key = get_unaligned((u32 *)(addr + 2));
return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
}
/* Hash chain to use given mac address */ /* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
const u8 *mac) const u8 *mac, __be32 vni)
{ {
return &vxlan->fdb_head[eth_hash(mac)]; if (vxlan->flags & VXLAN_F_COLLECT_METADATA)
return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
else
return &vxlan->fdb_head[eth_hash(mac)];
} }
/* Look up Ethernet address in forwarding table */ /* Look up Ethernet address in forwarding table */
static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
const u8 *mac) const u8 *mac, __be32 vni)
{ {
struct hlist_head *head = vxlan_fdb_head(vxlan, mac); struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
struct vxlan_fdb *f; struct vxlan_fdb *f;
hlist_for_each_entry_rcu(f, head, hlist) { hlist_for_each_entry_rcu(f, head, hlist) {
if (ether_addr_equal(mac, f->eth_addr)) if (ether_addr_equal(mac, f->eth_addr)) {
return f; if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
if (vni == f->vni)
return f;
} else {
return f;
}
}
} }
return NULL; return NULL;
} }
static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
const u8 *mac) const u8 *mac, __be32 vni)
{ {
struct vxlan_fdb *f; struct vxlan_fdb *f;
f = __vxlan_find_mac(vxlan, mac); f = __vxlan_find_mac(vxlan, mac, vni);
if (f) if (f)
f->used = jiffies; f->used = jiffies;
...@@ -605,15 +627,15 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) ...@@ -605,15 +627,15 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
static int vxlan_fdb_create(struct vxlan_dev *vxlan, static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip, const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags, __u16 state, __u16 flags,
__be16 port, __be32 vni, __u32 ifindex, __be16 port, __be32 src_vni, __be32 vni,
__u8 ndm_flags) __u32 ifindex, __u8 ndm_flags)
{ {
struct vxlan_rdst *rd = NULL; struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f; struct vxlan_fdb *f;
int notify = 0; int notify = 0;
int rc; int rc;
f = __vxlan_find_mac(vxlan, mac); f = __vxlan_find_mac(vxlan, mac, src_vni);
if (f) { if (f) {
if (flags & NLM_F_EXCL) { if (flags & NLM_F_EXCL) {
netdev_dbg(vxlan->dev, netdev_dbg(vxlan->dev,
...@@ -670,6 +692,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, ...@@ -670,6 +692,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
f->state = state; f->state = state;
f->flags = ndm_flags; f->flags = ndm_flags;
f->updated = f->used = jiffies; f->updated = f->used = jiffies;
f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes); INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN); memcpy(f->eth_addr, mac, ETH_ALEN);
...@@ -681,7 +704,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, ...@@ -681,7 +704,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
++vxlan->addrcnt; ++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist, hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac)); vxlan_fdb_head(vxlan, mac, src_vni));
} }
if (notify) { if (notify) {
...@@ -718,8 +741,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) ...@@ -718,8 +741,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
} }
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
union vxlan_addr *ip, __be16 *port, __be32 *vni, union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
u32 *ifindex) __be32 *vni, u32 *ifindex)
{ {
struct net *net = dev_net(vxlan->dev); struct net *net = dev_net(vxlan->dev);
int err; int err;
...@@ -757,6 +780,14 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, ...@@ -757,6 +780,14 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
*vni = vxlan->default_dst.remote_vni; *vni = vxlan->default_dst.remote_vni;
} }
if (tb[NDA_SRC_VNI]) {
if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
return -EINVAL;
*src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
} else {
*src_vni = vxlan->default_dst.remote_vni;
}
if (tb[NDA_IFINDEX]) { if (tb[NDA_IFINDEX]) {
struct net_device *tdev; struct net_device *tdev;
...@@ -782,7 +813,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -782,7 +813,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* struct net *net = dev_net(vxlan->dev); */ /* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip; union vxlan_addr ip;
__be16 port; __be16 port;
__be32 vni; __be32 src_vni, vni;
u32 ifindex; u32 ifindex;
int err; int err;
...@@ -795,7 +826,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -795,7 +826,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (tb[NDA_DST] == NULL) if (tb[NDA_DST] == NULL)
return -EINVAL; return -EINVAL;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
if (err) if (err)
return err; return err;
...@@ -804,36 +835,24 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -804,36 +835,24 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
spin_lock_bh(&vxlan->hash_lock); spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
port, vni, ifindex, ndm->ndm_flags); port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
return err; return err;
} }
/* Delete entry (via netlink) */ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, union vxlan_addr ip,
struct net_device *dev, __be16 port, __be32 src_vni, u32 vni, u32 ifindex,
const unsigned char *addr, u16 vid) u16 vid)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f; struct vxlan_fdb *f;
struct vxlan_rdst *rd = NULL; struct vxlan_rdst *rd = NULL;
union vxlan_addr ip; int err = -ENOENT;
__be16 port;
__be32 vni;
u32 ifindex;
int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); f = vxlan_find_mac(vxlan, addr, src_vni);
if (err)
return err;
err = -ENOENT;
spin_lock_bh(&vxlan->hash_lock);
f = vxlan_find_mac(vxlan, addr);
if (!f) if (!f)
goto out; return err;
if (!vxlan_addr_any(&ip)) { if (!vxlan_addr_any(&ip)) {
rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex); rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
...@@ -841,8 +860,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -841,8 +860,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
goto out; goto out;
} }
err = 0;
/* remove a destination if it's not the only one on the list, /* remove a destination if it's not the only one on the list,
* otherwise destroy the fdb entry * otherwise destroy the fdb entry
*/ */
...@@ -856,6 +873,28 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -856,6 +873,28 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f);
out: out:
return 0;
}
/* Delete entry (via netlink) */
static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
union vxlan_addr ip;
__be32 src_vni, vni;
__be16 port;
u32 ifindex;
int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
if (err)
return err;
spin_lock_bh(&vxlan->hash_lock);
err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
vid);
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
return err; return err;
...@@ -901,12 +940,13 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, ...@@ -901,12 +940,13 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
* Return true if packet is bogus and should be dropped. * Return true if packet is bogus and should be dropped.
*/ */
static bool vxlan_snoop(struct net_device *dev, static bool vxlan_snoop(struct net_device *dev,
union vxlan_addr *src_ip, const u8 *src_mac) union vxlan_addr *src_ip, const u8 *src_mac,
__be32 vni)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f; struct vxlan_fdb *f;
f = vxlan_find_mac(vxlan, src_mac); f = vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) { if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f); struct vxlan_rdst *rdst = first_remote_rcu(f);
...@@ -935,6 +975,7 @@ static bool vxlan_snoop(struct net_device *dev, ...@@ -935,6 +975,7 @@ static bool vxlan_snoop(struct net_device *dev,
NUD_REACHABLE, NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE, NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port, vxlan->cfg.dst_port,
vni,
vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni,
0, NTF_SELF); 0, NTF_SELF);
spin_unlock(&vxlan->hash_lock); spin_unlock(&vxlan->hash_lock);
...@@ -1202,7 +1243,7 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed, ...@@ -1202,7 +1243,7 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
static bool vxlan_set_mac(struct vxlan_dev *vxlan, static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs, struct vxlan_sock *vs,
struct sk_buff *skb) struct sk_buff *skb, __be32 vni)
{ {
union vxlan_addr saddr; union vxlan_addr saddr;
...@@ -1226,7 +1267,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, ...@@ -1226,7 +1267,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
} }
if ((vxlan->flags & VXLAN_F_LEARN) && if ((vxlan->flags & VXLAN_F_LEARN) &&
vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni))
return false; return false;
return true; return true;
...@@ -1268,6 +1309,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1268,6 +1309,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
__be16 protocol = htons(ETH_P_TEB); __be16 protocol = htons(ETH_P_TEB);
bool raw_proto = false; bool raw_proto = false;
void *oiph; void *oiph;
__be32 vni = 0;
/* Need UDP and VXLAN header to be present */ /* Need UDP and VXLAN header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN)) if (!pskb_may_pull(skb, VXLAN_HLEN))
...@@ -1289,7 +1331,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1289,7 +1331,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (!vs) if (!vs)
goto drop; goto drop;
vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni)); vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
if ((vs->flags & VXLAN_F_COLLECT_METADATA) && !vni)
goto drop;
vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan) if (!vxlan)
goto drop; goto drop;
...@@ -1307,7 +1354,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1307,7 +1354,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop; goto drop;
if (vxlan_collect_metadata(vs)) { if (vxlan_collect_metadata(vs)) {
__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
struct metadata_dst *tun_dst; struct metadata_dst *tun_dst;
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
...@@ -1345,7 +1391,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1345,7 +1391,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
} }
if (!raw_proto) { if (!raw_proto) {
if (!vxlan_set_mac(vxlan, vs, skb)) if (!vxlan_set_mac(vxlan, vs, skb, vni))
goto drop; goto drop;
} else { } else {
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
...@@ -1377,7 +1423,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1377,7 +1423,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
return 0; return 0;
} }
static int arp_reduce(struct net_device *dev, struct sk_buff *skb) static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct arphdr *parp; struct arphdr *parp;
...@@ -1424,7 +1470,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) ...@@ -1424,7 +1470,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
goto out; goto out;
} }
f = vxlan_find_mac(vxlan, n->ha); f = vxlan_find_mac(vxlan, n->ha, vni);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */ /* bridge-local neighbor */
neigh_release(n); neigh_release(n);
...@@ -1548,7 +1594,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, ...@@ -1548,7 +1594,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
return reply; return reply;
} }
static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct nd_msg *msg; struct nd_msg *msg;
...@@ -1585,7 +1631,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) ...@@ -1585,7 +1631,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
goto out; goto out;
} }
f = vxlan_find_mac(vxlan, n->ha); f = vxlan_find_mac(vxlan, n->ha, vni);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */ /* bridge-local neighbor */
neigh_release(n); neigh_release(n);
...@@ -1906,7 +1952,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, ...@@ -1906,7 +1952,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
/* Bypass encapsulation if the destination is local */ /* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan) struct vxlan_dev *dst_vxlan, __be32 vni)
{ {
struct pcpu_sw_netstats *tx_stats, *rx_stats; struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback; union vxlan_addr loopback;
...@@ -1932,7 +1978,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, ...@@ -1932,7 +1978,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
} }
if (dst_vxlan->flags & VXLAN_F_LEARN) if (dst_vxlan->flags & VXLAN_F_LEARN)
vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source); vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni);
u64_stats_update_begin(&tx_stats->syncp); u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++; tx_stats->tx_packets++;
...@@ -1976,7 +2022,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, ...@@ -1976,7 +2022,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
return -ENOENT; return -ENOENT;
} }
vxlan_encap_bypass(skb, vxlan, dst_vxlan); vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
return 1; return 1;
} }
...@@ -1984,7 +2030,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, ...@@ -1984,7 +2030,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
} }
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc) __be32 default_vni, struct vxlan_rdst *rdst,
bool did_rsc)
{ {
struct dst_cache *dst_cache; struct dst_cache *dst_cache;
struct ip_tunnel_info *info; struct ip_tunnel_info *info;
...@@ -2011,14 +2058,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2011,14 +2058,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (vxlan_addr_any(dst)) { if (vxlan_addr_any(dst)) {
if (did_rsc) { if (did_rsc) {
/* short-circuited back to local bridge */ /* short-circuited back to local bridge */
vxlan_encap_bypass(skb, vxlan, vxlan); vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
return; return;
} }
goto drop; goto drop;
} }
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni; vni = (rdst->remote_vni) ? : default_vni;
src = &vxlan->cfg.saddr; src = &vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache; dst_cache = &rdst->dst_cache;
md->gbp = skb->mark; md->gbp = skb->mark;
...@@ -2173,23 +2220,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2173,23 +2220,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
bool did_rsc = false; bool did_rsc = false;
struct vxlan_rdst *rdst, *fdst = NULL; struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f; struct vxlan_fdb *f;
__be32 vni = 0;
info = skb_tunnel_info(skb); info = skb_tunnel_info(skb);
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
if (info && info->mode & IP_TUNNEL_INFO_TX) if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
vxlan_xmit_one(skb, dev, NULL, false); info->mode & IP_TUNNEL_INFO_TX) {
else vni = tunnel_id_to_key32(info->key.tun_id);
kfree_skb(skb); } else {
return NETDEV_TX_OK; if (info && info->mode & IP_TUNNEL_INFO_TX)
vxlan_xmit_one(skb, dev, vni, NULL, false);
else
kfree_skb(skb);
return NETDEV_TX_OK;
}
} }
if (vxlan->flags & VXLAN_F_PROXY) { if (vxlan->flags & VXLAN_F_PROXY) {
eth = eth_hdr(skb); eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP) if (ntohs(eth->h_proto) == ETH_P_ARP)
return arp_reduce(dev, skb); return arp_reduce(dev, skb, vni);
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
else if (ntohs(eth->h_proto) == ETH_P_IPV6 && else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) pskb_may_pull(skb, sizeof(struct ipv6hdr)
...@@ -2200,13 +2253,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2200,13 +2253,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
msg = (struct nd_msg *)skb_transport_header(skb); msg = (struct nd_msg *)skb_transport_header(skb);
if (msg->icmph.icmp6_code == 0 && if (msg->icmph.icmp6_code == 0 &&
msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
return neigh_reduce(dev, skb); return neigh_reduce(dev, skb, vni);
} }
#endif #endif
} }
eth = eth_hdr(skb); eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest); f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false; did_rsc = false;
if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
...@@ -2214,11 +2267,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2214,11 +2267,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
ntohs(eth->h_proto) == ETH_P_IPV6)) { ntohs(eth->h_proto) == ETH_P_IPV6)) {
did_rsc = route_shortcircuit(dev, skb); did_rsc = route_shortcircuit(dev, skb);
if (did_rsc) if (did_rsc)
f = vxlan_find_mac(vxlan, eth->h_dest); f = vxlan_find_mac(vxlan, eth->h_dest, vni);
} }
if (f == NULL) { if (f == NULL) {
f = vxlan_find_mac(vxlan, all_zeros_mac); f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f == NULL) { if (f == NULL) {
if ((vxlan->flags & VXLAN_F_L2MISS) && if ((vxlan->flags & VXLAN_F_L2MISS) &&
!is_multicast_ether_addr(eth->h_dest)) !is_multicast_ether_addr(eth->h_dest))
...@@ -2239,11 +2292,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2239,11 +2292,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
} }
skb1 = skb_clone(skb, GFP_ATOMIC); skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1) if (skb1)
vxlan_xmit_one(skb1, dev, rdst, did_rsc); vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
} }
if (fdst) if (fdst)
vxlan_xmit_one(skb, dev, fdst, did_rsc); vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
else else
kfree_skb(skb); kfree_skb(skb);
return NETDEV_TX_OK; return NETDEV_TX_OK;
...@@ -2307,12 +2360,12 @@ static int vxlan_init(struct net_device *dev) ...@@ -2307,12 +2360,12 @@ static int vxlan_init(struct net_device *dev)
return 0; return 0;
} }
static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan) static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
{ {
struct vxlan_fdb *f; struct vxlan_fdb *f;
spin_lock_bh(&vxlan->hash_lock); spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac); f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f) if (f)
vxlan_fdb_destroy(vxlan, f); vxlan_fdb_destroy(vxlan, f);
spin_unlock_bh(&vxlan->hash_lock); spin_unlock_bh(&vxlan->hash_lock);
...@@ -2322,7 +2375,7 @@ static void vxlan_uninit(struct net_device *dev) ...@@ -2322,7 +2375,7 @@ static void vxlan_uninit(struct net_device *dev)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
vxlan_fdb_delete_default(vxlan); vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
free_percpu(dev->tstats); free_percpu(dev->tstats);
} }
...@@ -2923,6 +2976,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, ...@@ -2923,6 +2976,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
NLM_F_EXCL|NLM_F_CREATE, NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port, vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex, vxlan->default_dst.remote_ifindex,
NTF_SELF); NTF_SELF);
if (err) if (err)
...@@ -2931,7 +2985,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, ...@@ -2931,7 +2985,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
err = register_netdevice(dev); err = register_netdevice(dev);
if (err) { if (err) {
vxlan_fdb_delete_default(vxlan); vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
return err; return err;
} }
......
...@@ -26,6 +26,7 @@ enum { ...@@ -26,6 +26,7 @@ enum {
NDA_IFINDEX, NDA_IFINDEX,
NDA_MASTER, NDA_MASTER,
NDA_LINK_NETNSID, NDA_LINK_NETNSID,
NDA_SRC_VNI,
__NDA_MAX __NDA_MAX
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment