Commit 5270c4da authored by David S. Miller's avatar David S. Miller

Merge branch 'vxlan-cleanups'

Jiri Benc says:

====================
vxlan: clean up rx path, consolidating extension handling

The rx path of VXLAN turned over time into kind of spaghetti code. The rx
processing is split between vxlan_udp_encap_recv and vxlan_rcv but in an
artificial way: vxlan_rcv is just called at the end of vxlan_udp_encap_recv,
continuing the rx processing where vxlan_udp_encap_recv left it. There's no
clear border between those two functions.

It makes sense to combine those functions into one; this will be actually
needed for VXLAN-GPE where we'll need to skip part of the processing which
is hard to do with the current code.

However, both functions are too long already. This patchset is shortening
them, consolidating extension handling that is spread all around together
and moving it to separate functions. (Later patchsets will do more
consolidation in other parts of the functions with the final goal of merging
vxlan_udp_encap_recv and vxlan_rcv.)

In process of consolidation of the extension handling, I needed to deal with
vni field in a generic way, as its lower 8 bits mean different things for
different extensions. While cleaning up the code to strictly distinguish
between "vni" and "vni field" (which contains vni plus an additional byte),
I also converted the code not to convert endianess back and forth.

The full picture can be seen at:
https://github.com/jbenc/linux-vxlan/commits/master
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d8ef0347 b9167b2e
...@@ -197,9 +197,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, ...@@ -197,9 +197,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
#endif #endif
/* Virtual Network hash table head */ /* Virtual Network hash table head */
static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
{ {
return &vs->vni_list[hash_32(id, VNI_HASH_BITS)]; return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
} }
/* Socket hash table head */ /* Socket hash table head */
...@@ -242,12 +242,16 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, ...@@ -242,12 +242,16 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
return NULL; return NULL;
} }
static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
{ {
struct vxlan_dev *vxlan; struct vxlan_dev *vxlan;
hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) { /* For flow based devices, map all packets to VNI 0 */
if (vxlan->default_dst.remote_vni == id) if (vs->flags & VXLAN_F_COLLECT_METADATA)
vni = 0;
hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
if (vxlan->default_dst.remote_vni == vni)
return vxlan; return vxlan;
} }
...@@ -255,7 +259,7 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) ...@@ -255,7 +259,7 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
} }
/* Look up VNI in a per net namespace table */ /* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
sa_family_t family, __be16 port, sa_family_t family, __be16 port,
u32 flags) u32 flags)
{ {
...@@ -265,7 +269,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, ...@@ -265,7 +269,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
if (!vs) if (!vs)
return NULL; return NULL;
return vxlan_vs_find_vni(vs, id); return vxlan_vs_find_vni(vs, vni);
} }
/* Fill in neighbour message in skbuff. */ /* Fill in neighbour message in skbuff. */
...@@ -315,7 +319,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ...@@ -315,7 +319,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
nla_put_be16(skb, NDA_PORT, rdst->remote_port)) nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure; goto nla_put_failure;
if (rdst->remote_vni != vxlan->default_dst.remote_vni && if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure; goto nla_put_failure;
if (rdst->remote_ifindex && if (rdst->remote_ifindex &&
nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
...@@ -383,7 +387,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) ...@@ -383,7 +387,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
}; };
struct vxlan_rdst remote = { struct vxlan_rdst remote = {
.remote_ip = *ipa, /* goes to NDA_DST */ .remote_ip = *ipa, /* goes to NDA_DST */
.remote_vni = VXLAN_N_VID, .remote_vni = cpu_to_be32(VXLAN_N_VID),
}; };
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH); vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
...@@ -452,7 +456,7 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, ...@@ -452,7 +456,7 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
/* caller should hold vxlan->hash_lock */ /* caller should hold vxlan->hash_lock */
static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, union vxlan_addr *ip, __be16 port,
__u32 vni, __u32 ifindex) __be32 vni, __u32 ifindex)
{ {
struct vxlan_rdst *rd; struct vxlan_rdst *rd;
...@@ -469,7 +473,8 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, ...@@ -469,7 +473,8 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
/* Replace destination of unicast mac */ /* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f, static int vxlan_fdb_replace(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) union vxlan_addr *ip, __be16 port, __be32 vni,
__u32 ifindex)
{ {
struct vxlan_rdst *rd; struct vxlan_rdst *rd;
...@@ -491,7 +496,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, ...@@ -491,7 +496,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
/* Add/update destinations for multicast */ /* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f, static int vxlan_fdb_append(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, __u32 vni, union vxlan_addr *ip, __be16 port, __be32 vni,
__u32 ifindex, struct vxlan_rdst **rdp) __u32 ifindex, struct vxlan_rdst **rdp)
{ {
struct vxlan_rdst *rd; struct vxlan_rdst *rd;
...@@ -523,7 +528,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, ...@@ -523,7 +528,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
unsigned int off, unsigned int off,
struct vxlanhdr *vh, size_t hdrlen, struct vxlanhdr *vh, size_t hdrlen,
u32 data, struct gro_remcsum *grc, __be32 vni_field,
struct gro_remcsum *grc,
bool nopartial) bool nopartial)
{ {
size_t start, offset; size_t start, offset;
...@@ -534,10 +540,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, ...@@ -534,10 +540,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
if (!NAPI_GRO_CB(skb)->csum_valid) if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL; return NULL;
start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; start = vxlan_rco_start(vni_field);
offset = start + ((data & VXLAN_RCO_UDP) ? offset = start + vxlan_rco_offset(vni_field);
offsetof(struct udphdr, check) :
offsetof(struct tcphdr, check));
vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen, vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
start, offset, grc, nopartial); start, offset, grc, nopartial);
...@@ -557,7 +561,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, ...@@ -557,7 +561,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
int flush = 1; int flush = 1;
struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
udp_offloads); udp_offloads);
u32 flags; __be32 flags;
struct gro_remcsum grc; struct gro_remcsum grc;
skb_gro_remcsum_init(&grc); skb_gro_remcsum_init(&grc);
...@@ -573,11 +577,11 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, ...@@ -573,11 +577,11 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
flags = ntohl(vh->vx_flags); flags = vh->vx_flags;
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
ntohl(vh->vx_vni), &grc, vh->vx_vni, &grc,
!!(vs->flags & !!(vs->flags &
VXLAN_F_REMCSUM_NOPARTIAL)); VXLAN_F_REMCSUM_NOPARTIAL));
...@@ -668,7 +672,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) ...@@ -668,7 +672,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
static int vxlan_fdb_create(struct vxlan_dev *vxlan, static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip, const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags, __u16 state, __u16 flags,
__be16 port, __u32 vni, __u32 ifindex, __be16 port, __be32 vni, __u32 ifindex,
__u8 ndm_flags) __u8 ndm_flags)
{ {
struct vxlan_rdst *rd = NULL; struct vxlan_rdst *rd = NULL;
...@@ -777,7 +781,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) ...@@ -777,7 +781,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
} }
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex) union vxlan_addr *ip, __be16 *port, __be32 *vni,
u32 *ifindex)
{ {
struct net *net = dev_net(vxlan->dev); struct net *net = dev_net(vxlan->dev);
int err; int err;
...@@ -810,7 +815,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, ...@@ -810,7 +815,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
if (tb[NDA_VNI]) { if (tb[NDA_VNI]) {
if (nla_len(tb[NDA_VNI]) != sizeof(u32)) if (nla_len(tb[NDA_VNI]) != sizeof(u32))
return -EINVAL; return -EINVAL;
*vni = nla_get_u32(tb[NDA_VNI]); *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
} else { } else {
*vni = vxlan->default_dst.remote_vni; *vni = vxlan->default_dst.remote_vni;
} }
...@@ -840,7 +845,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -840,7 +845,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* struct net *net = dev_net(vxlan->dev); */ /* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip; union vxlan_addr ip;
__be16 port; __be16 port;
u32 vni, ifindex; __be32 vni;
u32 ifindex;
int err; int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
...@@ -877,7 +883,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], ...@@ -877,7 +883,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct vxlan_rdst *rd = NULL; struct vxlan_rdst *rd = NULL;
union vxlan_addr ip; union vxlan_addr ip;
__be16 port; __be16 port;
u32 vni, ifindex; __be32 vni;
u32 ifindex;
int err; int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
...@@ -1132,34 +1139,56 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) ...@@ -1132,34 +1139,56 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
return ret; return ret;
} }
static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, static bool vxlan_remcsum(struct vxlanhdr *unparsed,
size_t hdrlen, u32 data, bool nopartial) struct sk_buff *skb, u32 vxflags)
{ {
size_t start, offset, plen; size_t start, offset, plen;
if (skb->remcsum_offload) if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
return vh; goto out;
start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; start = vxlan_rco_start(unparsed->vx_vni);
offset = start + ((data & VXLAN_RCO_UDP) ? offset = start + vxlan_rco_offset(unparsed->vx_vni);
offsetof(struct udphdr, check) :
offsetof(struct tcphdr, check));
plen = hdrlen + offset + sizeof(u16); plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
if (!pskb_may_pull(skb, plen)) if (!pskb_may_pull(skb, plen))
return NULL; return false;
vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
!!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
out:
unparsed->vx_flags &= ~VXLAN_HF_RCO;
unparsed->vx_vni &= VXLAN_VNI_MASK;
return true;
}
skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
nopartial); struct vxlan_metadata *md,
struct metadata_dst *tun_dst)
{
struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
return vh; if (!(unparsed->vx_flags & VXLAN_HF_GBP))
goto out;
md->gbp = ntohs(gbp->policy_id);
if (tun_dst)
tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
if (gbp->dont_learn)
md->gbp |= VXLAN_GBP_DONT_LEARN;
if (gbp->policy_applied)
md->gbp |= VXLAN_GBP_POLICY_APPLIED;
out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
} }
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md, u32 vni, struct vxlan_metadata *md, __be32 vni,
struct metadata_dst *tun_dst) struct metadata_dst *tun_dst)
{ {
struct iphdr *oip = NULL; struct iphdr *oip = NULL;
...@@ -1169,10 +1198,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, ...@@ -1169,10 +1198,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
union vxlan_addr saddr; union vxlan_addr saddr;
int err = 0; int err = 0;
/* For flow based devices, map all packets to VNI 0 */
if (vs->flags & VXLAN_F_COLLECT_METADATA)
vni = 0;
/* Is this VNI defined? */ /* Is this VNI defined? */
vxlan = vxlan_vs_find_vni(vs, vni); vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan) if (!vxlan)
...@@ -1257,47 +1282,37 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1257,47 +1282,37 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{ {
struct metadata_dst *tun_dst = NULL; struct metadata_dst *tun_dst = NULL;
struct vxlan_sock *vs; struct vxlan_sock *vs;
struct vxlanhdr *vxh; struct vxlanhdr unparsed;
u32 flags, vni;
struct vxlan_metadata _md; struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md; struct vxlan_metadata *md = &_md;
/* Need Vxlan and inner Ethernet header to be present */ /* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN)) if (!pskb_may_pull(skb, VXLAN_HLEN))
goto error; return 1;
vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
flags = ntohl(vxh->vx_flags);
vni = ntohl(vxh->vx_vni);
if (flags & VXLAN_HF_VNI) { unparsed = *vxlan_hdr(skb);
flags &= ~VXLAN_HF_VNI;
} else {
/* VNI flag always required to be set */ /* VNI flag always required to be set */
goto bad_flags; if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
ntohl(vxlan_hdr(skb)->vx_flags),
ntohl(vxlan_hdr(skb)->vx_vni));
/* Return non vxlan pkt */
return 1;
} }
unparsed.vx_flags &= ~VXLAN_HF_VNI;
unparsed.vx_vni &= ~VXLAN_VNI_MASK;
if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
goto drop; goto drop;
vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
vs = rcu_dereference_sk_user_data(sk); vs = rcu_dereference_sk_user_data(sk);
if (!vs) if (!vs)
goto drop; goto drop;
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
!!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
if (!vxh)
goto drop;
flags &= ~VXLAN_HF_RCO;
vni &= VXLAN_VNI_MASK;
}
if (vxlan_collect_metadata(vs)) { if (vxlan_collect_metadata(vs)) {
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
cpu_to_be64(vni >> 8), sizeof(*md)); vxlan_vni(vxlan_hdr(skb)->vx_vni),
sizeof(*md));
if (!tun_dst) if (!tun_dst)
goto drop; goto drop;
...@@ -1310,25 +1325,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1310,25 +1325,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
/* For backwards compatibility, only allow reserved fields to be /* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested. * used by VXLAN extensions if explicitly requested.
*/ */
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { if (vs->flags & VXLAN_F_REMCSUM_RX)
struct vxlanhdr_gbp *gbp; if (!vxlan_remcsum(&unparsed, skb, vs->flags))
goto drop;
gbp = (struct vxlanhdr_gbp *)vxh; if (vs->flags & VXLAN_F_GBP)
md->gbp = ntohs(gbp->policy_id); vxlan_parse_gbp_hdr(&unparsed, md, tun_dst);
if (tun_dst)
tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
if (gbp->dont_learn)
md->gbp |= VXLAN_GBP_DONT_LEARN;
if (gbp->policy_applied)
md->gbp |= VXLAN_GBP_POLICY_APPLIED;
flags &= ~VXLAN_GBP_USED_BITS;
}
if (flags || vni & ~VXLAN_VNI_MASK) { if (unparsed.vx_flags || unparsed.vx_vni) {
/* If there are any unprocessed flags remaining treat /* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from * this as a malformed packet. This behavior diverges from
* VXLAN RFC (RFC7348) which stipulates that bits in reserved * VXLAN RFC (RFC7348) which stipulates that bits in reserved
...@@ -1337,28 +1340,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1337,28 +1340,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
* is more robust and provides a little more security in * is more robust and provides a little more security in
* adding extensions to VXLAN. * adding extensions to VXLAN.
*/ */
goto drop;
goto bad_flags;
} }
vxlan_rcv(vs, skb, md, vni >> 8, tun_dst); vxlan_rcv(vs, skb, md, vxlan_vni(vxlan_hdr(skb)->vx_vni), tun_dst);
return 0; return 0;
drop: drop:
/* Consume bad packet */
kfree_skb(skb);
return 0;
bad_flags:
netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
error:
if (tun_dst) if (tun_dst)
dst_release((struct dst_entry *)tun_dst); dst_release((struct dst_entry *)tun_dst);
/* Return non vxlan pkt */ /* Consume bad packet */
return 1; kfree_skb(skb);
return 0;
} }
static int arp_reduce(struct net_device *dev, struct sk_buff *skb) static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
...@@ -1683,7 +1677,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, ...@@ -1683,7 +1677,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
return; return;
gbp = (struct vxlanhdr_gbp *)vxh; gbp = (struct vxlanhdr_gbp *)vxh;
vxh->vx_flags |= htonl(VXLAN_HF_GBP); vxh->vx_flags |= VXLAN_HF_GBP;
if (md->gbp & VXLAN_GBP_DONT_LEARN) if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1; gbp->dont_learn = 1;
...@@ -1703,7 +1697,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, ...@@ -1703,7 +1697,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int min_headroom; int min_headroom;
int err; int err;
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
u16 hdrlen = sizeof(struct vxlanhdr);
if ((vxflags & VXLAN_F_REMCSUM_TX) && if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) { skb->ip_summed == CHECKSUM_PARTIAL) {
...@@ -1736,18 +1729,15 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, ...@@ -1736,18 +1729,15 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
return PTR_ERR(skb); return PTR_ERR(skb);
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vni; vxh->vx_vni = vxlan_vni_field(vni);
if (type & SKB_GSO_TUNNEL_REMCSUM) { if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> unsigned int start;
VXLAN_RCO_SHIFT;
if (skb->csum_offset == offsetof(struct udphdr, check))
data |= VXLAN_RCO_UDP;
vxh->vx_vni |= htonl(data); start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
vxh->vx_flags |= htonl(VXLAN_HF_RCO); vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
vxh->vx_flags |= VXLAN_HF_RCO;
if (!skb_is_gso(skb)) { if (!skb_is_gso(skb)) {
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
...@@ -1895,7 +1885,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1895,7 +1885,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_metadata _md; struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md; struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port; __be16 src_port = 0, dst_port;
u32 vni; __be32 vni;
__be16 df = 0; __be16 df = 0;
__u8 tos, ttl; __u8 tos, ttl;
int err; int err;
...@@ -1917,7 +1907,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1917,7 +1907,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto drop; goto drop;
} }
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = be64_to_cpu(info->key.tun_id); vni = vxlan_tun_id_to_vni(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info); remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET) if (remote_ip.sa.sa_family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
...@@ -2010,7 +2000,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2010,7 +2000,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
htonl(vni << 8), md, flags, udp_sum); vni, md, flags, udp_sum);
if (err < 0) if (err < 0)
goto xmit_tx_error; goto xmit_tx_error;
...@@ -2068,7 +2058,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2068,7 +2058,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ttl = ttl ? : ip6_dst_hoplimit(ndst); ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet); skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
htonl(vni << 8), md, flags, udp_sum); vni, md, flags, udp_sum);
if (err < 0) { if (err < 0) {
dst_release(ndst); dst_release(ndst);
return; return;
...@@ -2225,7 +2215,7 @@ static void vxlan_cleanup(unsigned long arg) ...@@ -2225,7 +2215,7 @@ static void vxlan_cleanup(unsigned long arg)
static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
{ {
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__u32 vni = vxlan->default_dst.remote_vni; __be32 vni = vxlan->default_dst.remote_vni;
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
...@@ -2840,7 +2830,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, ...@@ -2840,7 +2830,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
memset(&conf, 0, sizeof(conf)); memset(&conf, 0, sizeof(conf));
if (data[IFLA_VXLAN_ID]) if (data[IFLA_VXLAN_ID])
conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]); conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
if (data[IFLA_VXLAN_GROUP]) { if (data[IFLA_VXLAN_GROUP]) {
conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
...@@ -2944,7 +2934,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, ...@@ -2944,7 +2934,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
break; break;
case -EEXIST: case -EEXIST:
pr_info("duplicate VNI %u\n", conf.vni); pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
break; break;
} }
...@@ -3002,7 +2992,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -3002,7 +2992,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
.high = htons(vxlan->cfg.port_max), .high = htons(vxlan->cfg.port_max),
}; };
if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
goto nla_put_failure; goto nla_put_failure;
if (!vxlan_addr_any(&dst->remote_ip)) { if (!vxlan_addr_any(&dst->remote_ip)) {
......
...@@ -24,11 +24,11 @@ struct vxlanhdr { ...@@ -24,11 +24,11 @@ struct vxlanhdr {
}; };
/* VXLAN header flags. */ /* VXLAN header flags. */
#define VXLAN_HF_VNI BIT(27) #define VXLAN_HF_VNI cpu_to_be32(BIT(27))
#define VXLAN_N_VID (1u << 24) #define VXLAN_N_VID (1u << 24)
#define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_VID_MASK (VXLAN_N_VID - 1)
#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) #define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
#define VNI_HASH_BITS 10 #define VNI_HASH_BITS 10
...@@ -55,14 +55,14 @@ struct vxlanhdr { ...@@ -55,14 +55,14 @@ struct vxlanhdr {
*/ */
/* VXLAN-RCO header flags. */ /* VXLAN-RCO header flags. */
#define VXLAN_HF_RCO BIT(21) #define VXLAN_HF_RCO cpu_to_be32(BIT(21))
/* Remote checksum offload header option */ /* Remote checksum offload header option */
#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ #define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */
#define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ #define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */
#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ #define VXLAN_RCO_SHIFT 1 /* Left shift of start */
#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) #define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
/* /*
* VXLAN Group Based Policy Extension (VXLAN_F_GBP): * VXLAN Group Based Policy Extension (VXLAN_F_GBP):
...@@ -105,9 +105,9 @@ struct vxlanhdr_gbp { ...@@ -105,9 +105,9 @@ struct vxlanhdr_gbp {
}; };
/* VXLAN-GBP header flags. */ /* VXLAN-GBP header flags. */
#define VXLAN_HF_GBP BIT(31) #define VXLAN_HF_GBP cpu_to_be32(BIT(31))
#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
/* skb->mark mapping /* skb->mark mapping
* *
...@@ -144,7 +144,7 @@ union vxlan_addr { ...@@ -144,7 +144,7 @@ union vxlan_addr {
struct vxlan_rdst { struct vxlan_rdst {
union vxlan_addr remote_ip; union vxlan_addr remote_ip;
__be16 remote_port; __be16 remote_port;
u32 remote_vni; __be32 remote_vni;
u32 remote_ifindex; u32 remote_ifindex;
struct list_head list; struct list_head list;
struct rcu_head rcu; struct rcu_head rcu;
...@@ -154,7 +154,7 @@ struct vxlan_rdst { ...@@ -154,7 +154,7 @@ struct vxlan_rdst {
struct vxlan_config { struct vxlan_config {
union vxlan_addr remote_ip; union vxlan_addr remote_ip;
union vxlan_addr saddr; union vxlan_addr saddr;
u32 vni; __be32 vni;
int remote_ifindex; int remote_ifindex;
int mtu; int mtu;
__be16 dst_port; __be16 dst_port;
...@@ -262,6 +262,59 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, ...@@ -262,6 +262,59 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
/* IPv6 header + UDP + VXLAN + Ethernet header */ /* IPv6 header + UDP + VXLAN + Ethernet header */
#define VXLAN6_HEADROOM (40 + 8 + 8 + 14) #define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
{
return (struct vxlanhdr *)(udp_hdr(skb) + 1);
}
static inline __be32 vxlan_vni(__be32 vni_field)
{
#if defined(__BIG_ENDIAN)
return vni_field >> 8;
#else
return (vni_field & VXLAN_VNI_MASK) << 8;
#endif
}
static inline __be32 vxlan_vni_field(__be32 vni)
{
#if defined(__BIG_ENDIAN)
return vni << 8;
#else
return vni >> 8;
#endif
}
static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
{
#if defined(__BIG_ENDIAN)
return tun_id;
#else
return tun_id >> 32;
#endif
}
static inline size_t vxlan_rco_start(__be32 vni_field)
{
return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
}
static inline size_t vxlan_rco_offset(__be32 vni_field)
{
return (vni_field & VXLAN_RCO_UDP) ?
offsetof(struct udphdr, check) :
offsetof(struct tcphdr, check);
}
static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
{
__be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
if (offset == offsetof(struct udphdr, check))
vni_field |= VXLAN_RCO_UDP;
return vni_field;
}
#if IS_ENABLED(CONFIG_VXLAN) #if IS_ENABLED(CONFIG_VXLAN)
void vxlan_get_rx_port(struct net_device *netdev); void vxlan_get_rx_port(struct net_device *netdev);
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment