Commit c73a91b8 authored by David S. Miller's avatar David S. Miller

Merge branch 'ovs-gre'

Pravin B Shelar says:

====================
GRE: Use flow based tunneling for OVS GRE vport.

Following patches make use of new Using GRE tunnel meta data
collection feature. This allows us to directly use netdev
based GRE tunnel implementation. While doing so I have
removed GRE demux API which were targeted for OVS. Most
of GRE protocol code is now consolidated in ip_gre module.

v5-v4:
Fixed Kconfig dependency for vport-gre module.

v3-v4:
Added interface to ip-gre device to enable meta data collection.
While doing this I split second patch into two patches.

v2-v3:
Add API to create GRE flow based device.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fb811395 9f57c67c
...@@ -4,6 +4,12 @@ ...@@ -4,6 +4,12 @@
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/ip_tunnels.h> #include <net/ip_tunnels.h>
struct gre_base_hdr {
__be16 flags;
__be16 protocol;
};
#define GRE_HEADER_SECTION 4
#define GREPROTO_CISCO 0 #define GREPROTO_CISCO 0
#define GREPROTO_PPTP 1 #define GREPROTO_PPTP 1
#define GREPROTO_MAX 2 #define GREPROTO_MAX 2
...@@ -14,91 +20,9 @@ struct gre_protocol { ...@@ -14,91 +20,9 @@ struct gre_protocol {
void (*err_handler)(struct sk_buff *skb, u32 info); void (*err_handler)(struct sk_buff *skb, u32 info);
}; };
struct gre_base_hdr {
__be16 flags;
__be16 protocol;
};
#define GRE_HEADER_SECTION 4
int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_add_protocol(const struct gre_protocol *proto, u8 version);
int gre_del_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version);
struct gre_cisco_protocol { struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); u8 name_assign_type);
int (*err_handler)(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi);
u8 priority;
};
int gre_cisco_register(struct gre_cisco_protocol *proto);
int gre_cisco_unregister(struct gre_cisco_protocol *proto);
void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
int hdr_len);
static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
bool csum)
{
return iptunnel_handle_offloads(skb, csum,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
static inline int ip_gre_calc_hlen(__be16 o_flags)
{
int addend = 4;
if (o_flags&TUNNEL_CSUM)
addend += 4;
if (o_flags&TUNNEL_KEY)
addend += 4;
if (o_flags&TUNNEL_SEQ)
addend += 4;
return addend;
}
static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
{
__be16 tflags = 0;
if (flags & GRE_CSUM)
tflags |= TUNNEL_CSUM;
if (flags & GRE_ROUTING)
tflags |= TUNNEL_ROUTING;
if (flags & GRE_KEY)
tflags |= TUNNEL_KEY;
if (flags & GRE_SEQ)
tflags |= TUNNEL_SEQ;
if (flags & GRE_STRICT)
tflags |= TUNNEL_STRICT;
if (flags & GRE_REC)
tflags |= TUNNEL_REC;
if (flags & GRE_VERSION)
tflags |= TUNNEL_VERSION;
return tflags;
}
static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
{
__be16 flags = 0;
if (tflags & TUNNEL_CSUM)
flags |= GRE_CSUM;
if (tflags & TUNNEL_ROUTING)
flags |= GRE_ROUTING;
if (tflags & TUNNEL_KEY)
flags |= GRE_KEY;
if (tflags & TUNNEL_SEQ)
flags |= GRE_SEQ;
if (tflags & TUNNEL_STRICT)
flags |= GRE_STRICT;
if (tflags & TUNNEL_REC)
flags |= GRE_REC;
if (tflags & TUNNEL_VERSION)
flags |= GRE_VERSION;
return flags;
}
#endif #endif
...@@ -82,6 +82,8 @@ struct ip_tunnel_dst { ...@@ -82,6 +82,8 @@ struct ip_tunnel_dst {
__be32 saddr; __be32 saddr;
}; };
struct metadata_dst;
struct ip_tunnel { struct ip_tunnel {
struct ip_tunnel __rcu *next; struct ip_tunnel __rcu *next;
struct hlist_node hash_node; struct hlist_node hash_node;
...@@ -115,6 +117,7 @@ struct ip_tunnel { ...@@ -115,6 +117,7 @@ struct ip_tunnel {
unsigned int prl_count; /* # of entries in PRL */ unsigned int prl_count; /* # of entries in PRL */
int ip_tnl_net_id; int ip_tnl_net_id;
struct gro_cells gro_cells; struct gro_cells gro_cells;
bool collect_md;
}; };
#define TUNNEL_CSUM __cpu_to_be16(0x01) #define TUNNEL_CSUM __cpu_to_be16(0x01)
...@@ -149,6 +152,7 @@ struct tnl_ptk_info { ...@@ -149,6 +152,7 @@ struct tnl_ptk_info {
struct ip_tunnel_net { struct ip_tunnel_net {
struct net_device *fb_tunnel_dev; struct net_device *fb_tunnel_dev;
struct hlist_head tunnels[IP_TNL_HASH_SIZE]; struct hlist_head tunnels[IP_TNL_HASH_SIZE];
struct ip_tunnel __rcu *collect_md_tun;
}; };
struct ip_tunnel_encap_ops { struct ip_tunnel_encap_ops {
...@@ -235,7 +239,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, ...@@ -235,7 +239,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 key); __be32 key);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, bool log_ecn_error); const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p); struct ip_tunnel_parm *p);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
......
...@@ -112,6 +112,7 @@ enum { ...@@ -112,6 +112,7 @@ enum {
IFLA_GRE_ENCAP_FLAGS, IFLA_GRE_ENCAP_FLAGS,
IFLA_GRE_ENCAP_SPORT, IFLA_GRE_ENCAP_SPORT,
IFLA_GRE_ENCAP_DPORT, IFLA_GRE_ENCAP_DPORT,
IFLA_GRE_COLLECT_METADATA,
__IFLA_GRE_MAX, __IFLA_GRE_MAX,
}; };
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include <net/xfrm.h> #include <net/xfrm.h>
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
int gre_add_protocol(const struct gre_protocol *proto, u8 version) int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{ {
...@@ -61,197 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) ...@@ -61,197 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
} }
EXPORT_SYMBOL_GPL(gre_del_protocol); EXPORT_SYMBOL_GPL(gre_del_protocol);
void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
int hdr_len)
{
struct gre_base_hdr *greh;
skb_push(skb, hdr_len);
skb_reset_transport_header(skb);
greh = (struct gre_base_hdr *)skb->data;
greh->flags = tnl_flags_to_gre_flags(tpi->flags);
greh->protocol = tpi->proto;
if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
if (tpi->flags&TUNNEL_SEQ) {
*ptr = tpi->seq;
ptr--;
}
if (tpi->flags&TUNNEL_KEY) {
*ptr = tpi->key;
ptr--;
}
if (tpi->flags&TUNNEL_CSUM &&
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
*ptr = 0;
*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
skb->len, 0));
}
}
}
EXPORT_SYMBOL_GPL(gre_build_header);
static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
{
const struct gre_base_hdr *greh;
__be32 *options;
int hdr_len;
if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
return -EINVAL;
greh = (struct gre_base_hdr *)skb_transport_header(skb);
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
tpi->flags = gre_flags_to_tnl_flags(greh->flags);
hdr_len = ip_gre_calc_hlen(tpi->flags);
if (!pskb_may_pull(skb, hdr_len))
return -EINVAL;
greh = (struct gre_base_hdr *)skb_transport_header(skb);
tpi->proto = greh->protocol;
options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
if (skb_checksum_simple_validate(skb)) {
*csum_err = true;
return -EINVAL;
}
skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
null_compute_pseudo);
options++;
}
if (greh->flags & GRE_KEY) {
tpi->key = *options;
options++;
} else
tpi->key = 0;
if (unlikely(greh->flags & GRE_SEQ)) {
tpi->seq = *options;
options++;
} else
tpi->seq = 0;
/* WCCP version 1 and 2 protocol decoding.
* - Change protocol to IP
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
tpi->proto = htons(ETH_P_IP);
if ((*(u8 *)options & 0xF0) != 0x40) {
hdr_len += 4;
if (!pskb_may_pull(skb, hdr_len))
return -EINVAL;
}
}
return iptunnel_pull_header(skb, hdr_len, tpi->proto);
}
static int gre_cisco_rcv(struct sk_buff *skb)
{
struct tnl_ptk_info tpi;
int i;
bool csum_err = false;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
/* Looped back packet, drop it! */
if (rt_is_output_route(skb_rtable(skb)))
goto drop;
}
#endif
if (parse_gre_header(skb, &tpi, &csum_err) < 0)
goto drop;
rcu_read_lock();
for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
struct gre_cisco_protocol *proto;
int ret;
proto = rcu_dereference(gre_cisco_proto_list[i]);
if (!proto)
continue;
ret = proto->handler(skb, &tpi);
if (ret == PACKET_RCVD) {
rcu_read_unlock();
return 0;
}
}
rcu_read_unlock();
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
return 0;
}
static void gre_cisco_err(struct sk_buff *skb, u32 info)
{
/* All the routers (except for Linux) return only
* 8 bytes of packet payload. It means, that precise relaying of
* ICMP in the real Internet is absolutely infeasible.
*
* Moreover, Cisco "wise men" put GRE key to the third word
* in GRE header. It makes impossible maintaining even soft
* state for keyed
* GRE tunnels with enabled checksum. Tell them "thank you".
*
* Well, I wonder, rfc1812 was written by Cisco employee,
* what the hell these idiots break standards established
* by themselves???
*/
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct tnl_ptk_info tpi;
bool csum_err = false;
int i;
if (parse_gre_header(skb, &tpi, &csum_err)) {
if (!csum_err) /* ignore csum errors. */
return;
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
skb->dev->ifindex, 0, IPPROTO_GRE, 0);
return;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
IPPROTO_GRE, 0);
return;
}
rcu_read_lock();
for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
struct gre_cisco_protocol *proto;
proto = rcu_dereference(gre_cisco_proto_list[i]);
if (!proto)
continue;
if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
goto out;
}
out:
rcu_read_unlock();
}
static int gre_rcv(struct sk_buff *skb) static int gre_rcv(struct sk_buff *skb)
{ {
const struct gre_protocol *proto; const struct gre_protocol *proto;
...@@ -302,60 +110,19 @@ static const struct net_protocol net_gre_protocol = { ...@@ -302,60 +110,19 @@ static const struct net_protocol net_gre_protocol = {
.netns_ok = 1, .netns_ok = 1,
}; };
static const struct gre_protocol ipgre_protocol = {
.handler = gre_cisco_rcv,
.err_handler = gre_cisco_err,
};
int gre_cisco_register(struct gre_cisco_protocol *newp)
{
struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
&gre_cisco_proto_list[newp->priority];
return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(gre_cisco_register);
int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
{
struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
&gre_cisco_proto_list[del_proto->priority];
int ret;
ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
if (ret)
return ret;
synchronize_net();
return 0;
}
EXPORT_SYMBOL_GPL(gre_cisco_unregister);
static int __init gre_init(void) static int __init gre_init(void)
{ {
pr_info("GRE over IPv4 demultiplexor driver\n"); pr_info("GRE over IPv4 demultiplexor driver\n");
if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
pr_err("can't add protocol\n"); pr_err("can't add protocol\n");
goto err; return -EAGAIN;
}
if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
pr_info("%s: can't add ipgre handler\n", __func__);
goto err_gre;
} }
return 0; return 0;
err_gre:
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
err:
return -EAGAIN;
} }
static void __exit gre_exit(void) static void __exit gre_exit(void)
{ {
gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
} }
......
This diff is collapsed.
...@@ -230,10 +230,13 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, ...@@ -230,10 +230,13 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (cand) if (cand)
return cand; return cand;
t = rcu_dereference(itn->collect_md_tun);
if (t)
return t;
if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
return netdev_priv(itn->fb_tunnel_dev); return netdev_priv(itn->fb_tunnel_dev);
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(ip_tunnel_lookup); EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
...@@ -261,11 +264,15 @@ static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) ...@@ -261,11 +264,15 @@ static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{ {
struct hlist_head *head = ip_bucket(itn, &t->parms); struct hlist_head *head = ip_bucket(itn, &t->parms);
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, t);
hlist_add_head_rcu(&t->hash_node, head); hlist_add_head_rcu(&t->hash_node, head);
} }
static void ip_tunnel_del(struct ip_tunnel *t) static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{ {
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, NULL);
hlist_del_init_rcu(&t->hash_node); hlist_del_init_rcu(&t->hash_node);
} }
...@@ -419,7 +426,8 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, ...@@ -419,7 +426,8 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
} }
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, bool log_ecn_error) const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error)
{ {
struct pcpu_sw_netstats *tstats; struct pcpu_sw_netstats *tstats;
const struct iphdr *iph = ip_hdr(skb); const struct iphdr *iph = ip_hdr(skb);
...@@ -478,6 +486,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, ...@@ -478,6 +486,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
skb->dev = tunnel->dev; skb->dev = tunnel->dev;
} }
if (tun_dst)
skb_dst_set(skb, (struct dst_entry *)tun_dst);
gro_cells_receive(&tunnel->gro_cells, skb); gro_cells_receive(&tunnel->gro_cells, skb);
return 0; return 0;
...@@ -806,7 +817,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, ...@@ -806,7 +817,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel_parm *p, struct ip_tunnel_parm *p,
bool set_mtu) bool set_mtu)
{ {
ip_tunnel_del(t); ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr; t->parms.iph.saddr = p->iph.saddr;
t->parms.iph.daddr = p->iph.daddr; t->parms.iph.daddr = p->iph.daddr;
t->parms.i_key = p->i_key; t->parms.i_key = p->i_key;
...@@ -967,7 +978,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) ...@@ -967,7 +978,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
if (itn->fb_tunnel_dev != dev) { if (itn->fb_tunnel_dev != dev) {
ip_tunnel_del(netdev_priv(dev)); ip_tunnel_del(itn, netdev_priv(dev));
unregister_netdevice_queue(dev, head); unregister_netdevice_queue(dev, head);
} }
} }
...@@ -1072,8 +1083,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -1072,8 +1083,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt = netdev_priv(dev); nt = netdev_priv(dev);
itn = net_generic(net, nt->ip_tnl_net_id); itn = net_generic(net, nt->ip_tnl_net_id);
if (ip_tunnel_find(itn, p, dev->type)) if (nt->collect_md) {
return -EEXIST; if (rtnl_dereference(itn->collect_md_tun))
return -EEXIST;
} else {
if (ip_tunnel_find(itn, p, dev->type))
return -EEXIST;
}
nt->net = net; nt->net = net;
nt->parms = *p; nt->parms = *p;
...@@ -1089,7 +1105,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -1089,7 +1105,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
dev->mtu = mtu; dev->mtu = mtu;
ip_tunnel_add(itn, nt); ip_tunnel_add(itn, nt);
out: out:
return err; return err;
} }
...@@ -1163,6 +1178,10 @@ int ip_tunnel_init(struct net_device *dev) ...@@ -1163,6 +1178,10 @@ int ip_tunnel_init(struct net_device *dev)
iph->version = 4; iph->version = 4;
iph->ihl = 5; iph->ihl = 5;
if (tunnel->collect_md) {
dev->features |= NETIF_F_NETNS_LOCAL;
netif_keep_dst(dev);
}
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(ip_tunnel_init); EXPORT_SYMBOL_GPL(ip_tunnel_init);
...@@ -1176,7 +1195,7 @@ void ip_tunnel_uninit(struct net_device *dev) ...@@ -1176,7 +1195,7 @@ void ip_tunnel_uninit(struct net_device *dev)
itn = net_generic(net, tunnel->ip_tnl_net_id); itn = net_generic(net, tunnel->ip_tnl_net_id);
/* fb_tunnel_dev will be unregisted in net-exit call. */ /* fb_tunnel_dev will be unregisted in net-exit call. */
if (itn->fb_tunnel_dev != dev) if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(netdev_priv(dev)); ip_tunnel_del(itn, netdev_priv(dev));
ip_tunnel_dst_reset_all(tunnel); ip_tunnel_dst_reset_all(tunnel);
} }
......
...@@ -198,7 +198,7 @@ static int ipip_rcv(struct sk_buff *skb) ...@@ -198,7 +198,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop; goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto)) if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop; goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
} }
return -1; return -1;
......
...@@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb) ...@@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop; goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto)) if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop; goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
} }
return 1; return 1;
......
...@@ -34,7 +34,7 @@ config OPENVSWITCH ...@@ -34,7 +34,7 @@ config OPENVSWITCH
config OPENVSWITCH_GRE config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support" tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH depends on OPENVSWITCH
depends on NET_IPGRE_DEMUX depends on NET_IPGRE
default OPENVSWITCH default OPENVSWITCH
---help--- ---help---
If you say Y here, then the Open vSwitch will be able create GRE If you say Y here, then the Open vSwitch will be able create GRE
......
...@@ -45,235 +45,43 @@ ...@@ -45,235 +45,43 @@
#include "datapath.h" #include "datapath.h"
#include "vport.h" #include "vport.h"
#include "vport-netdev.h"
static struct vport_ops ovs_gre_vport_ops; static struct vport_ops ovs_gre_vport_ops;
/* Returns the least-significant 32 bits of a __be64. */ static struct vport *gre_tnl_create(const struct vport_parms *parms)
static __be32 be64_get_low32(__be64 x)
{ {
#ifdef __BIG_ENDIAN struct net *net = ovs_dp_get_net(parms->dp);
return (__force __be32)x; struct net_device *dev;
#else
return (__force __be32)((__force u64)x >> 32);
#endif
}
static __be16 filter_tnl_flags(__be16 flags)
{
return flags & (TUNNEL_CSUM | TUNNEL_KEY);
}
static struct sk_buff *__build_header(struct sk_buff *skb,
int tunnel_hlen)
{
struct tnl_ptk_info tpi;
const struct ip_tunnel_key *tun_key;
tun_key = &OVS_CB(skb)->egress_tun_info->key;
skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
if (IS_ERR(skb))
return skb;
tpi.flags = filter_tnl_flags(tun_key->tun_flags);
tpi.proto = htons(ETH_P_TEB);
tpi.key = be64_get_low32(tun_key->tun_id);
tpi.seq = 0;
gre_build_header(skb, &tpi, tunnel_hlen);
return skb;
}
static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
{
#ifdef __BIG_ENDIAN
return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
#else
return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
#endif
}
/* Called with rcu_read_lock and BH disabled. */
static int gre_rcv(struct sk_buff *skb,
const struct tnl_ptk_info *tpi)
{
struct ip_tunnel_info tun_info;
struct ovs_net *ovs_net;
struct vport *vport;
__be64 key;
ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
vport = rcu_dereference(ovs_net->vport_net.gre_vport);
if (unlikely(!vport))
return PACKET_REJECT;
key = key_to_tunnel_id(tpi->key, tpi->seq);
ip_tunnel_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
return PACKET_RCVD;
}
/* Called with rcu_read_lock and BH disabled. */
static int gre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
{
struct ovs_net *ovs_net;
struct vport *vport; struct vport *vport;
ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
vport = rcu_dereference(ovs_net->vport_net.gre_vport); if (IS_ERR(vport))
return vport;
if (unlikely(!vport))
return PACKET_REJECT; rtnl_lock();
else dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
return PACKET_RCVD; if (IS_ERR(dev)) {
} rtnl_unlock();
ovs_vport_free(vport);
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) return ERR_CAST(dev);
{
struct net *net = ovs_dp_get_net(vport->dp);
const struct ip_tunnel_key *tun_key;
struct flowi4 fl;
struct rtable *rt;
int min_headroom;
int tunnel_hlen;
__be16 df;
int err;
if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
goto err_free_skb;
}
tun_key = &OVS_CB(skb)->egress_tun_info->key;
rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto err_free_skb;
}
tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
int head_delta = SKB_DATA_ALIGN(min_headroom -
skb_headroom(skb) +
16);
err = pskb_expand_head(skb, max_t(int, head_delta, 0),
0, GFP_ATOMIC);
if (unlikely(err))
goto err_free_rt;
}
skb = vlan_hwaccel_push_inside(skb);
if (unlikely(!skb)) {
err = -ENOMEM;
goto err_free_rt;
}
/* Push Tunnel header. */
skb = __build_header(skb, tunnel_hlen);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
skb = NULL;
goto err_free_rt;
} }
df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? dev_change_flags(dev, dev->flags | IFF_UP);
htons(IP_DF) : 0; rtnl_unlock();
skb->ignore_df = 1;
return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
tun_key->ipv4_dst, IPPROTO_GRE,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
return err;
}
static struct gre_cisco_protocol gre_protocol = {
.handler = gre_rcv,
.err_handler = gre_err,
.priority = 1,
};
static int gre_ports;
static int gre_init(void)
{
int err;
gre_ports++;
if (gre_ports > 1)
return 0;
err = gre_cisco_register(&gre_protocol);
if (err)
pr_warn("cannot register gre protocol handler\n");
return err;
}
static void gre_exit(void)
{
gre_ports--;
if (gre_ports > 0)
return;
gre_cisco_unregister(&gre_protocol);
}
static const char *gre_get_name(const struct vport *vport) return vport;
{
return vport_priv(vport);
} }
static struct vport *gre_create(const struct vport_parms *parms) static struct vport *gre_create(const struct vport_parms *parms)
{ {
struct net *net = ovs_dp_get_net(parms->dp);
struct ovs_net *ovs_net;
struct vport *vport; struct vport *vport;
int err;
err = gre_init();
if (err)
return ERR_PTR(err);
ovs_net = net_generic(net, ovs_net_id);
if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
vport = ERR_PTR(-EEXIST);
goto error;
}
vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); vport = gre_tnl_create(parms);
if (IS_ERR(vport)) if (IS_ERR(vport))
goto error; return vport;
strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
return vport;
error:
gre_exit();
return vport;
}
static void gre_tnl_destroy(struct vport *vport)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct ovs_net *ovs_net;
ovs_net = net_generic(net, ovs_net_id);
RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL); return ovs_netdev_link(vport, parms->name);
ovs_vport_deferred_free(vport);
gre_exit();
} }
static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
...@@ -288,10 +96,9 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, ...@@ -288,10 +96,9 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
static struct vport_ops ovs_gre_vport_ops = { static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE, .type = OVS_VPORT_TYPE_GRE,
.create = gre_create, .create = gre_create,
.destroy = gre_tnl_destroy, .send = ovs_netdev_send,
.get_name = gre_get_name,
.send = gre_tnl_send,
.get_egress_tun_info = gre_get_egress_tun_info, .get_egress_tun_info = gre_get_egress_tun_info,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE, .owner = THIS_MODULE,
}; };
......
...@@ -147,7 +147,7 @@ static struct vport *netdev_create(const struct vport_parms *parms) ...@@ -147,7 +147,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name); return ovs_netdev_link(vport, parms->name);
} }
void ovs_vport_free_rcu(struct rcu_head *rcu) static void vport_netdev_free(struct rcu_head *rcu)
{ {
struct vport *vport = container_of(rcu, struct vport, rcu); struct vport *vport = container_of(rcu, struct vport, rcu);
...@@ -155,7 +155,6 @@ void ovs_vport_free_rcu(struct rcu_head *rcu) ...@@ -155,7 +155,6 @@ void ovs_vport_free_rcu(struct rcu_head *rcu)
dev_put(vport->dev); dev_put(vport->dev);
ovs_vport_free(vport); ovs_vport_free(vport);
} }
EXPORT_SYMBOL_GPL(ovs_vport_free_rcu);
void ovs_netdev_detach_dev(struct vport *vport) void ovs_netdev_detach_dev(struct vport *vport)
{ {
...@@ -175,9 +174,25 @@ static void netdev_destroy(struct vport *vport) ...@@ -175,9 +174,25 @@ static void netdev_destroy(struct vport *vport)
ovs_netdev_detach_dev(vport); ovs_netdev_detach_dev(vport);
rtnl_unlock(); rtnl_unlock();
call_rcu(&vport->rcu, ovs_vport_free_rcu); call_rcu(&vport->rcu, vport_netdev_free);
} }
void ovs_netdev_tunnel_destroy(struct vport *vport)
{
rtnl_lock();
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
/* Early release so we can unregister the device */
dev_put(vport->dev);
rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
call_rcu(&vport->rcu, vport_netdev_free);
}
EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
static unsigned int packet_length(const struct sk_buff *skb) static unsigned int packet_length(const struct sk_buff *skb)
{ {
unsigned int length = skb->len - ETH_HLEN; unsigned int length = skb->len - ETH_HLEN;
......
...@@ -29,9 +29,9 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); ...@@ -29,9 +29,9 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev);
struct vport *ovs_netdev_link(struct vport *vport, const char *name); struct vport *ovs_netdev_link(struct vport *vport, const char *name);
int ovs_netdev_send(struct vport *vport, struct sk_buff *skb); int ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
void ovs_netdev_detach_dev(struct vport *); void ovs_netdev_detach_dev(struct vport *);
void ovs_vport_free_rcu(struct rcu_head *);
int __init ovs_netdev_init(void); int __init ovs_netdev_init(void);
void ovs_netdev_exit(void); void ovs_netdev_exit(void);
void ovs_netdev_tunnel_destroy(struct vport *vport);
#endif /* vport_netdev.h */ #endif /* vport_netdev.h */
...@@ -146,21 +146,6 @@ static struct vport *vxlan_create(const struct vport_parms *parms) ...@@ -146,21 +146,6 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name); return ovs_netdev_link(vport, parms->name);
} }
static void vxlan_destroy(struct vport *vport)
{
rtnl_lock();
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
/* Early release so we can unregister the device */
dev_put(vport->dev);
rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
call_rcu(&vport->rcu, ovs_vport_free_rcu);
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ip_tunnel_info *egress_tun_info) struct ip_tunnel_info *egress_tun_info)
{ {
...@@ -183,7 +168,7 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, ...@@ -183,7 +168,7 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
static struct vport_ops ovs_vxlan_netdev_vport_ops = { static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN, .type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create, .create = vxlan_create,
.destroy = vxlan_destroy, .destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options, .get_options = vxlan_get_options,
.send = ovs_netdev_send, .send = ovs_netdev_send,
.get_egress_tun_info = vxlan_get_egress_tun_info, .get_egress_tun_info = vxlan_get_egress_tun_info,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment