Commit 614732ea authored by Thomas Graf's avatar Thomas Graf Committed by David S. Miller

openvswitch: Use regular VXLAN net_device device

This gets rid of all OVS specific VXLAN code in the receive and
transmit path by using a VXLAN net_device to represent the vport.
Only a small shim layer remains which takes care of handling the
VXLAN specific OVS Netlink configuration.

Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb()
since they are no longer needed.
Signed-off-by: default avatarThomas Graf <tgraf@suug.ch>
Signed-off-by: default avatarPravin B Shelar <pshelar@nicira.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c9db965c
This diff is collapsed.
......@@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
unsigned char name_assign_type,
const struct rtnl_link_ops *ops,
struct nlattr *tb[]);
int rtnl_delete_link(struct net_device *dev);
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len);
......
......@@ -101,22 +101,12 @@ struct vxlanhdr {
#define FDB_HASH_SIZE (1<<FDB_HASH_BITS)
struct vxlan_metadata {
__be32 vni;
u32 gbp;
/* Temporary until vxlan_rcv() API is gone */
struct metadata_dst *tun_dst;
};
struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
struct vxlan_metadata *md);
/* per UDP socket information */
struct vxlan_sock {
struct hlist_node hlist;
vxlan_rcv_t *rcv;
void *data;
struct work_struct del_work;
struct socket *sock;
struct rcu_head rcu;
......@@ -203,19 +193,13 @@ struct vxlan_dev {
VXLAN_F_COLLECT_METADATA | \
VXLAN_F_FLOW_BASED)
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
bool no_share, u32 flags);
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
bool xnet, u32 vxflags);
static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan)
{
return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport;
}
static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
netdev_features_t features)
......
......@@ -1960,16 +1960,30 @@ static int rtnl_group_dellink(const struct net *net, int group)
return 0;
}
int rtnl_delete_link(struct net_device *dev)
{
const struct rtnl_link_ops *ops;
LIST_HEAD(list_kill);
ops = dev->rtnl_link_ops;
if (!ops || !ops->dellink)
return -EOPNOTSUPP;
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
LIST_HEAD(list_kill);
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
......@@ -1991,13 +2005,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev)
return -ENODEV;
ops = dev->rtnl_link_ops;
if (!ops || !ops->dellink)
return -EOPNOTSUPP;
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
return 0;
return rtnl_delete_link(dev);
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
......
......@@ -44,18 +44,6 @@ config OPENVSWITCH_GRE
If unsure, say Y.
config OPENVSWITCH_VXLAN
tristate "Open vSwitch VXLAN tunneling support"
depends on OPENVSWITCH
depends on VXLAN
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create vxlan vport.
Say N to exclude this support and reduce the binary size.
If unsure, say Y.
config OPENVSWITCH_GENEVE
tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
......
......@@ -16,5 +16,4 @@ openvswitch-y := \
vport-netdev.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_VXLAN) += vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
......@@ -47,9 +47,9 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
#include "flow_netlink.h"
#include "vport-vxlan.h"
struct ovs_len_tbl {
int len;
......@@ -475,7 +475,7 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
{
struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
unsigned long opt_key_offset;
struct ovs_vxlan_opts opts;
struct vxlan_metadata opts;
int err;
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
......@@ -626,7 +626,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
static int vxlan_opt_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len)
{
const struct ovs_vxlan_opts *opts = tun_opts;
const struct vxlan_metadata *opts = tun_opts;
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
......
......@@ -27,9 +27,13 @@
#include <linux/skbuff.h>
#include <linux/openvswitch.h>
#include <net/llc.h>
#include <net/udp.h>
#include <net/ip_tunnels.h>
#include <net/rtnetlink.h>
#include <net/vxlan.h>
#include "datapath.h"
#include "vport.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
......@@ -147,7 +151,8 @@ static void free_port_rcu(struct rcu_head *rcu)
{
struct vport *vport = container_of(rcu, struct vport, rcu);
dev_put(vport->dev);
if (vport->dev)
dev_put(vport->dev);
ovs_vport_free(vport);
}
......@@ -221,12 +226,202 @@ static struct vport_ops ovs_netdev_vport_ops = {
.send = netdev_send,
};
/* Compat code for old userspace. */
#if IS_ENABLED(CONFIG_VXLAN)
static struct vport_ops ovs_vxlan_netdev_vport_ops;
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
__be16 dst_port = vxlan->cfg.dst_port;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
if (vxlan->flags & VXLAN_F_GBP) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
nla_nest_end(skb, exts);
}
return 0;
}
static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
struct vxlan_config *conf)
{
struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
int err;
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
if (err < 0)
return err;
if (exts[OVS_VXLAN_EXT_GBP])
conf->flags |= VXLAN_F_GBP;
return 0;
}
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct net_device *dev;
struct vport *vport;
struct nlattr *a;
int err;
struct vxlan_config conf = {
.no_share = true,
.flags = VXLAN_F_FLOW_BASED | VXLAN_F_COLLECT_METADATA,
};
if (!options) {
err = -EINVAL;
goto error;
}
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
conf.dst_port = htons(nla_get_u16(a));
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
if (IS_ERR(vport))
return vport;
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
err = vxlan_configure_exts(vport, a, &conf);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
rtnl_lock();
dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
if (IS_ERR(dev)) {
rtnl_unlock();
ovs_vport_free(vport);
return ERR_CAST(dev);
}
dev_change_flags(dev, dev->flags | IFF_UP);
rtnl_unlock();
return vport;
error:
return ERR_PTR(err);
}
static struct vport *vxlan_create(const struct vport_parms *parms)
{
struct vport *vport;
vport = vxlan_tnl_create(parms);
if (IS_ERR(vport))
return vport;
return netdev_link(vport, parms->name);
}
static void vxlan_destroy(struct vport *vport)
{
rtnl_lock();
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
/* Early release so we can unregister the device */
dev_put(vport->dev);
rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
call_rcu(&vport->rcu, free_port_rcu);
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ip_tunnel_info *egress_tun_info)
{
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
__be16 dst_port = vxlan_dev_dst_port(vxlan);
__be16 src_port;
int port_min;
int port_max;
inet_get_local_port_range(net, &port_min, &port_max);
src_port = udp_flow_src_port(net, skb, 0, 0, true);
return ovs_tunnel_get_egress_info(egress_tun_info, net,
OVS_CB(skb)->egress_tun_info,
IPPROTO_UDP, skb->mark,
src_port, dst_port);
}
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = vxlan_destroy,
.get_options = vxlan_get_options,
.send = netdev_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,
};
static int vxlan_compat_init(void)
{
return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
}
static void vxlan_compat_exit(void)
{
ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
}
#else
static int vxlan_compat_init(void)
{
return 0;
}
static void vxlan_compat_exit(void)
{
}
#endif
int __init ovs_netdev_init(void)
{
return ovs_vport_ops_register(&ovs_netdev_vport_ops);
int err;
err = ovs_vport_ops_register(&ovs_netdev_vport_ops);
if (err)
return err;
err = vxlan_compat_init();
if (err)
vxlan_compat_exit();
return err;
}
void ovs_netdev_exit(void)
{
ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
vxlan_compat_exit();
}
/*
* Copyright (c) 2014 Nicira, Inc.
* Copyright (c) 2013 Cisco Systems, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <linux/module.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/udp.h>
#include <net/ip_tunnels.h>
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include "datapath.h"
#include "vport.h"
#include "vport-vxlan.h"
/**
* struct vxlan_port - Keeps track of open UDP ports
* @vs: vxlan_sock created for the port.
* @name: vport name.
*/
struct vxlan_port {
struct vxlan_sock *vs;
char name[IFNAMSIZ];
u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
};
static struct vport_ops ovs_vxlan_vport_ops;
static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
{
return vport_priv(vport);
}
/* Called with rcu_read_lock and BH disabled. */
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md)
{
struct ip_tunnel_info tun_info;
struct vxlan_port *vxlan_port;
struct vport *vport = vs->data;
struct iphdr *iph;
struct ovs_vxlan_opts opts = {
.gbp = md->gbp,
};
__be64 key;
__be16 flags;
flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
vxlan_port = vxlan_vport(vport);
if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
flags |= TUNNEL_VXLAN_OPT;
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(md->vni) >> 8);
ip_tunnel_info_init(&tun_info, iph,
udp_hdr(skb)->source, udp_hdr(skb)->dest,
key, flags, &opts, sizeof(opts));
ovs_vport_receive(vport, skb, &tun_info);
}
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
if (vxlan_port->exts) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
if (vxlan_port->exts & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
nla_nest_end(skb, exts);
}
return 0;
}
static void vxlan_tnl_destroy(struct vport *vport)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
vxlan_sock_release(vxlan_port->vs);
ovs_vport_deferred_free(vport);
}
static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
{
struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
struct vxlan_port *vxlan_port;
int err;
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
if (err < 0)
return err;
vxlan_port = vxlan_vport(vport);
if (exts[OVS_VXLAN_EXT_GBP])
vxlan_port->exts |= VXLAN_F_GBP;
return 0;
}
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct vxlan_port *vxlan_port;
struct vxlan_sock *vs;
struct vport *vport;
struct nlattr *a;
u16 dst_port;
int err;
if (!options) {
err = -EINVAL;
goto error;
}
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
dst_port = nla_get_u16(a);
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
vport = ovs_vport_alloc(sizeof(struct vxlan_port),
&ovs_vxlan_vport_ops, parms);
if (IS_ERR(vport))
return vport;
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
err = vxlan_configure_exts(vport, a);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
vxlan_port->exts);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
}
vxlan_port->vs = vs;
return vport;
error:
return ERR_PTR(err);
}
static int vxlan_ext_gbp(struct sk_buff *skb)
{
const struct ip_tunnel_info *tun_info;
const struct ovs_vxlan_opts *opts;
tun_info = OVS_CB(skb)->egress_tun_info;
opts = tun_info->options;
if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT &&
tun_info->options_len >= sizeof(*opts))
return opts->gbp;
else
return 0;
}
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
struct sock *sk = vxlan_port->vs->sock->sk;
__be16 dst_port = inet_sk(sk)->inet_sport;
const struct ip_tunnel_key *tun_key;
struct vxlan_metadata md = {0};
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
__be16 df;
int err;
u32 vxflags;
if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
goto error;
}
tun_key = &OVS_CB(skb)->egress_tun_info->key;
rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
}
df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
htons(IP_DF) : 0;
skb->ignore_df = 1;
src_port = udp_flow_src_port(net, skb, 0, 0, true);
md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
md.gbp = vxlan_ext_gbp(skb);
vxflags = vxlan_port->exts |
(tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port,
&md, false, vxflags);
if (err < 0)
ip_rt_put(rt);
return err;
error:
kfree_skb(skb);
return err;
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ip_tunnel_info *egress_tun_info)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
__be16 src_port;
int port_min;
int port_max;
inet_get_local_port_range(net, &port_min, &port_max);
src_port = udp_flow_src_port(net, skb, 0, 0, true);
return ovs_tunnel_get_egress_info(egress_tun_info, net,
OVS_CB(skb)->egress_tun_info,
IPPROTO_UDP, skb->mark,
src_port, dst_port);
}
static const char *vxlan_get_name(const struct vport *vport)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
return vxlan_port->name;
}
static struct vport_ops ovs_vxlan_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
.get_name = vxlan_get_name,
.get_options = vxlan_get_options,
.send = vxlan_tnl_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_vxlan_tnl_init(void)
{
return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
}
static void __exit ovs_vxlan_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
}
module_init(ovs_vxlan_tnl_init);
module_exit(ovs_vxlan_tnl_exit);
MODULE_DESCRIPTION("OVS: VXLAN switching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-4");
#ifndef VPORT_VXLAN_H
#define VPORT_VXLAN_H 1
#include <linux/kernel.h>
#include <linux/types.h>
struct ovs_vxlan_opts {
__u32 gbp;
};
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment