Commit f5796684 authored by Jesse Gross's avatar Jesse Gross Committed by David S. Miller

openvswitch: Add support for Geneve tunneling.

The Openvswitch implementation is completely agnostic to the options
that are in use and can handle newly defined options without
further work. It does this by simply matching on a byte array
of options and allowing userspace to setup flows on this array.
Signed-off-by: default avatarJesse Gross <jesse@nicira.com>
Singed-off-by: default avatarAnsis Atteka <aatteka@nicira.com>
Signed-off-by: default avatarAndy Zhou <azhou@nicira.com>
Acked-by: default avatarThomas Graf <tgraf@noironetworks.com>
Acked-by: default avatarPravin B Shelar <pshelar@nicira.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6b205b2c
...@@ -97,6 +97,7 @@ struct ip_tunnel { ...@@ -97,6 +97,7 @@ struct ip_tunnel {
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#define TUNNEL_OAM __cpu_to_be16(0x0200) #define TUNNEL_OAM __cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
struct tnl_ptk_info { struct tnl_ptk_info {
__be16 flags; __be16 flags;
......
...@@ -192,6 +192,7 @@ enum ovs_vport_type { ...@@ -192,6 +192,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */
OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
__OVS_VPORT_TYPE_MAX __OVS_VPORT_TYPE_MAX
}; };
...@@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr { ...@@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
__OVS_TUNNEL_KEY_ATTR_MAX __OVS_TUNNEL_KEY_ATTR_MAX
}; };
......
...@@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN ...@@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN
Say N to exclude this support and reduce the binary size. Say N to exclude this support and reduce the binary size.
If unsure, say Y. If unsure, say Y.
config OPENVSWITCH_GENEVE
bool "Open vSwitch Geneve tunneling support"
depends on INET
depends on OPENVSWITCH
depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
default y
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
Say N to exclude this support and reduce the binary size.
...@@ -15,6 +15,10 @@ openvswitch-y := \ ...@@ -15,6 +15,10 @@ openvswitch-y := \
vport-internal_dev.o \ vport-internal_dev.o \
vport-netdev.o vport-netdev.o
ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
openvswitch-y += vport-geneve.o
endif
ifneq ($(CONFIG_OPENVSWITCH_VXLAN),) ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o openvswitch-y += vport-vxlan.o
endif endif
......
...@@ -370,6 +370,7 @@ static size_t key_attr_size(void) ...@@ -370,6 +370,7 @@ static size_t key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
...@@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, 0, &acts); &flow->key, 0, &acts);
rcu_assign_pointer(flow->sf_acts, acts);
if (err) if (err)
goto err_flow_free; goto err_flow_free;
rcu_assign_pointer(flow->sf_acts, acts);
OVS_CB(packet)->egress_tun_info = NULL;
OVS_CB(packet)->flow = flow; OVS_CB(packet)->flow = flow;
packet->priority = flow->key.phy.priority; packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark; packet->mark = flow->key.phy.skb_mark;
......
...@@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
int error; int error;
struct ethhdr *eth; struct ethhdr *eth;
/* Flags are always used as part of stats */
key->tp.flags = 0;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
/* Link layer. We are guaranteed to have at least the 14 byte Ethernet /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
...@@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, ...@@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key) struct sk_buff *skb, struct sw_flow_key *key)
{ {
/* Extract metadata from packet. */ /* Extract metadata from packet. */
if (tun_info) if (tun_info) {
memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
else
if (tun_info->options) {
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
8)) - 1
> sizeof(key->tun_opts));
memcpy(GENEVE_OPTS(key, tun_info->options_len),
tun_info->options, tun_info->options_len);
key->tun_opts_len = tun_info->options_len;
} else {
key->tun_opts_len = 0;
}
} else {
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key)); memset(&key->tun_key, 0, sizeof(key->tun_key));
}
key->phy.priority = skb->priority; key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
......
...@@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel { ...@@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info { struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel; struct ovs_key_ipv4_tunnel tunnel;
struct geneve_opt *options;
u8 options_len;
}; };
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
*/
#define GENEVE_OPTS(flow_key, opt_len) \
((struct geneve_opt *)((flow_key)->tun_opts + \
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
opt_len))
static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
const struct iphdr *iph, const struct iphdr *iph,
__be64 tun_id, __be16 tun_flags) __be64 tun_id, __be16 tun_flags,
struct geneve_opt *opts,
u8 opts_len)
{ {
tun_info->tunnel.tun_id = tun_id; tun_info->tunnel.tun_id = tun_id;
tun_info->tunnel.ipv4_src = iph->saddr; tun_info->tunnel.ipv4_src = iph->saddr;
...@@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, ...@@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
/* clear struct padding. */ /* clear struct padding. */
memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
tun_info->options = opts;
tun_info->options_len = opts_len;
} }
struct sw_flow_key { struct sw_flow_key {
u8 tun_opts[255];
u8 tun_opts_len;
struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
struct { struct {
u32 priority; /* Packet QoS priority. */ u32 priority; /* Packet QoS priority. */
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/icmp.h> #include <linux/icmp.h>
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <net/geneve.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
...@@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match, ...@@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match,
} \ } \
} while (0) } while (0)
#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \ do { \
update_range__(match, offsetof(struct sw_flow_key, field), \ update_range__(match, offset, len, is_mask); \
len, is_mask); \ if (is_mask) \
if (is_mask) { \ memcpy((u8 *)&(match)->mask->key + offset, value_p, \
if ((match)->mask) \ len); \
memcpy(&(match)->mask->key.field, value_p, len);\ else \
} else { \ memcpy((u8 *)(match)->key + offset, value_p, len); \
memcpy(&(match)->key->field, value_p, len); \
} \
} while (0) } while (0)
#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
value_p, len, is_mask)
static u16 range_n_bytes(const struct sw_flow_key_range *range) static u16 range_n_bytes(const struct sw_flow_key_range *range)
{ {
return range->end - range->start; return range->end - range->start;
...@@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int rem; int rem;
bool ttl = false; bool ttl = false;
__be16 tun_flags = 0; __be16 tun_flags = 0;
unsigned long opt_key_offset;
nla_for_each_nested(a, attr, rem) { nla_for_each_nested(a, attr, rem) {
int type = nla_type(a); int type = nla_type(a);
...@@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
[OVS_TUNNEL_KEY_ATTR_OAM] = 0, [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
}; };
if (type > OVS_TUNNEL_KEY_ATTR_MAX) { if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
...@@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return -EINVAL; return -EINVAL;
} }
if (ovs_tunnel_key_lens[type] != nla_len(a)) { if (ovs_tunnel_key_lens[type] != nla_len(a) &&
ovs_tunnel_key_lens[type] != -1) {
OVS_NLERR("IPv4 tunnel attribute type has unexpected " OVS_NLERR("IPv4 tunnel attribute type has unexpected "
" length (type=%d, length=%d, expected=%d).\n", " length (type=%d, length=%d, expected=%d).\n",
type, nla_len(a), ovs_tunnel_key_lens[type]); type, nla_len(a), ovs_tunnel_key_lens[type]);
...@@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_OAM: case OVS_TUNNEL_KEY_ATTR_OAM:
tun_flags |= TUNNEL_OAM; tun_flags |= TUNNEL_OAM;
break; break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
tun_flags |= TUNNEL_OPTIONS_PRESENT;
if (nla_len(a) > sizeof(match->key->tun_opts)) {
OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
nla_len(a),
sizeof(match->key->tun_opts));
return -EINVAL;
}
if (nla_len(a) % 4 != 0) {
OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
nla_len(a));
return -EINVAL;
}
/* We need to record the length of the options passed
* down, otherwise packets with the same format but
* additional options will be silently matched.
*/
if (!is_mask) {
SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
false);
} else {
/* This is somewhat unusual because it looks at
* both the key and mask while parsing the
* attributes (and by extension assumes the key
* is parsed first). Normally, we would verify
* that each is the correct length and that the
* attributes line up in the validate function.
* However, that is difficult because this is
* variable length and we won't have the
* information later.
*/
if (match->key->tun_opts_len != nla_len(a)) {
OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
match->key->tun_opts_len,
nla_len(a));
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
true);
}
opt_key_offset = (unsigned long)GENEVE_OPTS(
(struct sw_flow_key *)0,
nla_len(a));
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
nla_data(a), nla_len(a),
is_mask);
break;
default: default:
OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
type);
return -EINVAL; return -EINVAL;
} }
} }
...@@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return 0; return 0;
} }
static int ipv4_tun_to_nlattr(struct sk_buff *skb, static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *tun_key, const struct ovs_key_ipv4_tunnel *output,
const struct ovs_key_ipv4_tunnel *output) const struct geneve_opt *tun_opts,
int swkey_tun_opts_len)
{ {
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
if (!nla)
return -EMSGSIZE;
if (output->tun_flags & TUNNEL_KEY && if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE; return -EMSGSIZE;
...@@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, ...@@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) && if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE; return -EMSGSIZE;
if (tun_opts &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
nla_nest_end(skb, nla);
return 0; return 0;
} }
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output,
const struct geneve_opt *tun_opts,
int swkey_tun_opts_len)
{
struct nlattr *nla;
int err;
nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
if (!nla)
return -EMSGSIZE;
err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
if (err)
return err;
nla_nest_end(skb, nla);
return 0;
}
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
const struct nlattr **a, bool is_mask) const struct nlattr **a, bool is_mask)
{ {
...@@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure; goto nla_put_failure;
if ((swkey->tun_key.ipv4_dst || is_mask) && if ((swkey->tun_key.ipv4_dst || is_mask)) {
ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) const struct geneve_opt *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
opts = GENEVE_OPTS(output, swkey->tun_opts_len);
if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
swkey->tun_opts_len))
goto nla_put_failure; goto nla_put_failure;
}
if (swkey->phy.in_port == DP_MAX_PORTS) { if (swkey->phy.in_port == DP_MAX_PORTS) {
if (is_mask && (output->phy.in_port == 0xffff)) if (is_mask && (output->phy.in_port == 0xffff))
...@@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, ...@@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (err) if (err)
return err; return err;
if (key.tun_opts_len) {
struct geneve_opt *option = GENEVE_OPTS(&key,
key.tun_opts_len);
int opts_len = key.tun_opts_len;
bool crit_opt = false;
while (opts_len > 0) {
int len;
if (opts_len < sizeof(*option))
return -EINVAL;
len = sizeof(*option) + option->length * 4;
if (len > opts_len)
return -EINVAL;
crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
option = (struct geneve_opt *)((u8 *)option + len);
opts_len -= len;
};
key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
};
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
if (start < 0) if (start < 0)
return start; return start;
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
sizeof(*tun_info)); sizeof(*tun_info) + key.tun_opts_len);
if (IS_ERR(a)) if (IS_ERR(a))
return PTR_ERR(a); return PTR_ERR(a);
tun_info = nla_data(a); tun_info = nla_data(a);
tun_info->tunnel = key.tun_key; tun_info->tunnel = key.tun_key;
tun_info->options_len = key.tun_opts_len;
if (tun_info->options_len) {
/* We need to store the options in the action itself since
* everything else will go away after flow setup. We can append
* it to tun_info and then point there.
*/
memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
key.tun_opts_len);
tun_info->options = (struct geneve_opt *)(tun_info + 1);
} else {
tun_info->options = NULL;
}
add_nested_action_end(*sfa, start); add_nested_action_end(*sfa, start);
...@@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) ...@@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return -EMSGSIZE; return -EMSGSIZE;
err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
nla_data(ovs_key)); tun_info->options_len ?
tun_info->options : NULL,
tun_info->options_len);
if (err) if (err)
return err; return err;
nla_nest_end(skb, start); nla_nest_end(skb, start);
......
/*
* Copyright (c) 2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/version.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <linux/if_vlan.h>
#include <net/geneve.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/xfrm.h>
#include "datapath.h"
#include "vport.h"
/**
* struct geneve_port - Keeps track of open UDP ports
* @sock: The socket created for this port number.
* @name: vport name.
*/
struct geneve_port {
struct geneve_sock *gs;
char name[IFNAMSIZ];
};
static LIST_HEAD(geneve_ports);
static inline struct geneve_port *geneve_vport(const struct vport *vport)
{
return vport_priv(vport);
}
static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
{
return (struct genevehdr *)(udp_hdr(skb) + 1);
}
/* Convert 64 bit tunnel ID to 24 bit VNI. */
static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
{
#ifdef __BIG_ENDIAN
vni[0] = (__force __u8)(tun_id >> 16);
vni[1] = (__force __u8)(tun_id >> 8);
vni[2] = (__force __u8)tun_id;
#else
vni[0] = (__force __u8)((__force u64)tun_id >> 40);
vni[1] = (__force __u8)((__force u64)tun_id >> 48);
vni[2] = (__force __u8)((__force u64)tun_id >> 56);
#endif
}
/* Convert 24 bit VNI to 64 bit tunnel ID. */
static __be64 vni_to_tunnel_id(__u8 *vni)
{
#ifdef __BIG_ENDIAN
return (vni[0] << 16) | (vni[1] << 8) | vni[2];
#else
return (__force __be64)(((__force u64)vni[0] << 40) |
((__force u64)vni[1] << 48) |
((__force u64)vni[2] << 56));
#endif
}
static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
{
struct vport *vport = gs->rcv_data;
struct genevehdr *geneveh = geneve_hdr(skb);
int opts_len;
struct ovs_tunnel_info tun_info;
__be64 key;
__be16 flags;
opts_len = geneveh->opt_len * 4;
flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
(geneveh->oam ? TUNNEL_OAM : 0) |
(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
key = vni_to_tunnel_id(geneveh->vni);
ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
geneveh->options, opts_len);
ovs_vport_receive(vport, skb, &tun_info);
}
static int geneve_get_options(const struct vport *vport,
struct sk_buff *skb)
{
struct geneve_port *geneve_port = geneve_vport(vport);
__be16 sport;
sport = ntohs(inet_sk(geneve_port->gs->sock->sk)->inet_sport);
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, sport))
return -EMSGSIZE;
return 0;
}
static void geneve_tnl_destroy(struct vport *vport)
{
struct geneve_port *geneve_port = geneve_vport(vport);
geneve_sock_release(geneve_port->gs);
ovs_vport_deferred_free(vport);
}
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct geneve_port *geneve_port;
struct geneve_sock *gs;
struct vport *vport;
struct nlattr *a;
int err;
u16 dst_port;
if (!options) {
err = -EINVAL;
goto error;
}
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
dst_port = nla_get_u16(a);
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
vport = ovs_vport_alloc(sizeof(struct geneve_port),
&ovs_geneve_vport_ops, parms);
if (IS_ERR(vport))
return vport;
geneve_port = geneve_vport(vport);
strncpy(geneve_port->name, parms->name, IFNAMSIZ);
gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
if (IS_ERR(gs)) {
ovs_vport_free(vport);
return (void *)gs;
}
geneve_port->gs = gs;
return vport;
error:
return ERR_PTR(err);
}
static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct ovs_key_ipv4_tunnel *tun_key;
struct ovs_tunnel_info *tun_info;
struct net *net = ovs_dp_get_net(vport->dp);
struct geneve_port *geneve_port = geneve_vport(vport);
__be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
__be16 sport;
struct rtable *rt;
struct flowi4 fl;
u8 vni[3];
__be16 df;
int err;
tun_info = OVS_CB(skb)->egress_tun_info;
if (unlikely(!tun_info)) {
err = -EINVAL;
goto error;
}
tun_key = &tun_info->tunnel;
/* Route lookup */
memset(&fl, 0, sizeof(fl));
fl.daddr = tun_key->ipv4_dst;
fl.saddr = tun_key->ipv4_src;
fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_UDP;
rt = ip_route_output_key(net, &fl);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
}
df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
tunnel_id_to_vni(tun_key->tun_id, vni);
skb->ignore_df = 1;
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
tun_key->ipv4_dst, tun_key->ipv4_tos,
tun_key->ipv4_ttl, df, sport, dport,
tun_key->tun_flags, vni,
tun_info->options_len, (u8 *)tun_info->options,
false);
if (err < 0)
ip_rt_put(rt);
error:
return err;
}
static const char *geneve_get_name(const struct vport *vport)
{
struct geneve_port *geneve_port = geneve_vport(vport);
return geneve_port->name;
}
const struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
.create = geneve_tnl_create,
.destroy = geneve_tnl_destroy,
.get_name = geneve_get_name,
.get_options = geneve_get_options,
.send = geneve_tnl_send,
};
...@@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb, ...@@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb,
key = key_to_tunnel_id(tpi->key, tpi->seq); key = key_to_tunnel_id(tpi->key, tpi->seq);
ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
filter_tnl_flags(tpi->flags)); filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info); ovs_vport_receive(vport, skb, &tun_info);
return PACKET_RCVD; return PACKET_RCVD;
......
...@@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) ...@@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */ /* Save outer tunnel values */
iph = ip_hdr(skb); iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8); key = cpu_to_be64(ntohl(vx_vni) >> 8);
ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
ovs_vport_receive(vport, skb, &tun_info); ovs_vport_receive(vport, skb, &tun_info);
} }
......
...@@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = { ...@@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = {
#ifdef CONFIG_OPENVSWITCH_VXLAN #ifdef CONFIG_OPENVSWITCH_VXLAN
&ovs_vxlan_vport_ops, &ovs_vxlan_vport_ops,
#endif #endif
#ifdef CONFIG_OPENVSWITCH_GENEVE
&ovs_geneve_vport_ops,
#endif
}; };
/* Protected by RCU read lock for reading, ovs_mutex for writing. */ /* Protected by RCU read lock for reading, ovs_mutex for writing. */
......
...@@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops; ...@@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops; extern const struct vport_ops ovs_internal_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops; extern const struct vport_ops ovs_gre_vport_ops;
extern const struct vport_ops ovs_vxlan_vport_ops; extern const struct vport_ops ovs_vxlan_vport_ops;
extern const struct vport_ops ovs_geneve_vport_ops;
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len) const void *start, unsigned int len)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment