Commit 4cba259f authored by David S. Miller's avatar David S. Miller

Merge branch 'unified-tunnel-dst-caching'

Paolo Abeni says:

====================
net: unify dst caching for tunnel devices

This patch series try to unify the dst cache implementations currently
present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a
new generic implementation, replacing the existing ones, and then using
the new implementation in other tunnel devices which currently lack it.

The new dst implementation is compiled, as built-in, only if any device using
it is enabled.

Caching the dst for the tunnel remote address gives small, but measurable,
performance improvement when tunneling over ipv4 (in the 2%-4% range) and
significant ones when tunneling over ipv6 (roughly 60% when no
fragmentation/segmentation take place and the tunnel local address
is not specified).

v2:
- move the vxlan dst_cache usage inside the device lookup functions
- fix usage after free for lwt tunnel moving the dst cache storage inside
  the dst_metadata,
- sparse codying style cleanup
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64f63d59 3c1cb4d2
......@@ -72,6 +72,7 @@ struct geneve_dev {
bool collect_md;
struct gro_cells gro_cells;
u32 flags;
struct dst_cache dst_cache;
};
/* Geneve device flags */
......@@ -297,6 +298,13 @@ static int geneve_init(struct net_device *dev)
return err;
}
err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
if (err) {
free_percpu(dev->tstats);
gro_cells_destroy(&geneve->gro_cells);
return err;
}
return 0;
}
......@@ -304,6 +312,7 @@ static void geneve_uninit(struct net_device *dev)
{
struct geneve_dev *geneve = netdev_priv(dev);
dst_cache_destroy(&geneve->dst_cache);
gro_cells_destroy(&geneve->gro_cells);
free_percpu(dev->tstats);
}
......@@ -753,7 +762,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct ip_tunnel_info *info)
{
struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL;
bool use_cache = true;
__u8 tos;
memset(fl4, 0, sizeof(*fl4));
......@@ -764,16 +775,26 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
fl4->daddr = info->key.u.ipv4.dst;
fl4->saddr = info->key.u.ipv4.src;
fl4->flowi4_tos = RT_TOS(info->key.tos);
dst_cache = &info->dst_cache;
} else {
tos = geneve->tos;
if (tos == 1) {
const struct iphdr *iip = ip_hdr(skb);
tos = ip_tunnel_get_dsfield(iip, skb);
use_cache = false;
}
fl4->flowi4_tos = RT_TOS(tos);
fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
return rt;
}
rt = ip_route_output_key(geneve->net, fl4);
......@@ -786,6 +807,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
return rt;
}
......@@ -798,6 +821,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
struct dst_cache *dst_cache;
bool use_cache = true;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
......@@ -808,16 +833,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->daddr = info->key.u.ipv6.dst;
fl6->saddr = info->key.u.ipv6.src;
fl6->flowi6_tos = RT_TOS(info->key.tos);
dst_cache = &info->dst_cache;
} else {
prio = geneve->tos;
if (prio == 1) {
const struct iphdr *iip = ip_hdr(skb);
prio = ip_tunnel_get_dsfield(iip, skb);
use_cache = false;
}
fl6->flowi6_tos = RT_TOS(prio);
fl6->daddr = geneve->remote.sin6.sin6_addr;
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst)
return dst;
}
if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
......@@ -830,6 +865,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
return ERR_PTR(-ELOOP);
}
if (use_cache)
dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
return dst;
}
#endif
......@@ -1272,6 +1309,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
return -EPERM;
}
dst_cache_reset(&geneve->dst_cache);
err = register_netdevice(dev);
if (err)
return err;
......
......@@ -480,6 +480,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
if (!rd)
return 0;
dst_cache_reset(&rd->dst_cache);
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
......@@ -501,6 +503,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL)
return -ENOBUFS;
if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
kfree(rd);
return -ENOBUFS;
}
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
......@@ -749,8 +757,10 @@ static void vxlan_fdb_free(struct rcu_head *head)
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
struct vxlan_rdst *rd, *nd;
list_for_each_entry_safe(rd, nd, &f->remotes, list)
list_for_each_entry_safe(rd, nd, &f->remotes, list) {
dst_cache_destroy(&rd->dst_cache);
kfree(rd);
}
kfree(f);
}
......@@ -1754,11 +1764,24 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr)
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
struct ip_tunnel_info *info)
{
struct rtable *rt = NULL;
bool use_cache = false;
struct flowi4 fl4;
/* when the ip_tunnel_info is availble, the tos used for lookup is
* packet independent, so we can use the cache
*/
if (!skb->mark && (!tos || info)) {
use_cache = true;
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos);
......@@ -1768,8 +1791,11 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt))
if (!IS_ERR(rt)) {
*saddr = fl4.saddr;
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
}
return rt;
}
......@@ -1777,12 +1803,19 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif,
const struct in6_addr *daddr,
struct in6_addr *saddr)
struct in6_addr *saddr,
struct dst_cache *dst_cache)
{
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
if (!skb->mark) {
ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst)
return ndst;
}
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
......@@ -1797,6 +1830,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
if (!skb->mark)
dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
#endif
......@@ -1849,6 +1884,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk;
......@@ -1873,6 +1909,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
dst_cache = &rdst->dst_cache;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
......@@ -1887,6 +1924,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip;
dst_cache = &info->dst_cache;
}
if (vxlan_addr_any(dst)) {
......@@ -1938,7 +1976,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr, &saddr);
dst->sin.sin_addr.s_addr, &saddr,
dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
......@@ -1990,7 +2029,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0,
&dst->sin6.sin6_addr, &saddr);
&dst->sin6.sin6_addr, &saddr,
dst_cache);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
......@@ -2331,7 +2371,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL;
rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
&info->key.u.ipv4.src);
&info->key.u.ipv4.src, NULL, info);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
......@@ -2343,7 +2383,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0,
&info->key.u.ipv6.dst,
&info->key.u.ipv6.src);
&info->key.u.ipv6.src, NULL);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
......
#ifndef _NET_DST_CACHE_H
#define _NET_DST_CACHE_H
#include <linux/jiffies.h>
#include <net/dst.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
struct dst_cache {
struct dst_cache_pcpu __percpu *cache;
unsigned long reset_ts;
};
/**
* dst_cache_get - perform cache lookup
* @dst_cache: the cache
*
* The caller should use dst_cache_get_ip4() if it need to retrieve the
* source address to be used when xmitting to the cached dst.
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
/**
* dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
/**
* dst_cache_set_ip4 - store the ipv4 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr);
#if IS_ENABLED(CONFIG_IPV6)
/**
* dst_cache_set_ip6 - store the ipv6 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr);
/**
* dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr);
#endif
/**
* dst_cache_reset - invalidate the cache contents
* @dst_cache: the cache
*
* This do not free the cached dst to avoid races and contentions.
* the dst will be freed on later cache lookup.
*/
static inline void dst_cache_reset(struct dst_cache *dst_cache)
{
dst_cache->reset_ts = jiffies;
}
/**
* dst_cache_init - initialize the cache, allocating the required storage
* @dst_cache: the cache
* @gfp: allocation flags
*/
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
/**
* dst_cache_destroy - empty the cache and free the allocated storage
* @dst_cache: the cache
*
* No synchronization is enforced: it must be called only when the cache
* is unsed.
*/
void dst_cache_destroy(struct dst_cache *dst_cache);
#endif
......@@ -62,6 +62,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
sizeof(a->u.tun_info) + a->u.tun_info.options_len);
}
void metadata_dst_free(struct metadata_dst *);
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
......
......@@ -6,6 +6,7 @@
#include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h>
#include <net/ip_tunnels.h>
#include <net/dst_cache.h>
#define IP6TUNNEL_ERR_TIMEO (30*HZ)
......@@ -33,12 +34,6 @@ struct __ip6_tnl_parm {
__be32 o_key;
};
struct ip6_tnl_dst {
seqlock_t lock;
struct dst_entry __rcu *dst;
u32 cookie;
};
/* IPv6 tunnel */
struct ip6_tnl {
struct ip6_tnl __rcu *next; /* next tunnel in list */
......@@ -46,7 +41,7 @@ struct ip6_tnl {
struct net *net; /* netns for packet i/o */
struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
struct flowi fl; /* flowi template for xmit */
struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */
struct dst_cache dst_cache; /* cached dst */
int err_count;
unsigned long err_time;
......@@ -66,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim {
__u8 encap_limit; /* tunnel encapsulation limit */
} __packed;
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
int ip6_tnl_dst_init(struct ip6_tnl *t);
void ip6_tnl_dst_destroy(struct ip6_tnl *t);
void ip6_tnl_dst_reset(struct ip6_tnl *t);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
const struct in6_addr *raddr);
int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
......
......@@ -13,6 +13,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/lwtunnel.h>
#include <net/dst_cache.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
......@@ -57,6 +58,9 @@ struct ip_tunnel_key {
struct ip_tunnel_info {
struct ip_tunnel_key key;
#ifdef CONFIG_DST_CACHE
struct dst_cache dst_cache;
#endif
u8 options_len;
u8 mode;
};
......@@ -85,11 +89,6 @@ struct ip_tunnel_prl_entry {
struct rcu_head rcu_head;
};
struct ip_tunnel_dst {
struct dst_entry __rcu *dst;
__be32 saddr;
};
struct metadata_dst;
struct ip_tunnel {
......@@ -108,7 +107,7 @@ struct ip_tunnel {
int tun_hlen; /* Precalculated header length */
int mlink;
struct ip_tunnel_dst __percpu *dst_cache;
struct dst_cache dst_cache;
struct ip_tunnel_parm parms;
......@@ -247,7 +246,6 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id);
void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
int ip_tunnel_encap_setup(struct ip_tunnel *t,
struct ip_tunnel_encap *ipencap);
......
......@@ -148,6 +148,7 @@ struct vxlan_rdst {
u32 remote_ifindex;
struct list_head list;
struct rcu_head rcu;
struct dst_cache dst_cache;
};
struct vxlan_config {
......
......@@ -392,6 +392,10 @@ config LWTUNNEL
weight tunnel endpoint. Tunnel encapsulation parameters are stored
with light weight tunnel state associated with fib routes.
config DST_CACHE
bool "dst cache"
default n
endif # if NET
# Used by archs to tell that they support BPF_JIT
......
......@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
......@@ -265,7 +265,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
lwtstate_put(dst->lwtstate);
if (dst->flags & DST_METADATA)
kfree(dst);
metadata_dst_free((struct metadata_dst *)dst);
else
kmem_cache_free(dst->ops->kmem_cachep, dst);
......@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
void metadata_dst_free(struct metadata_dst *md_dst)
{
#ifdef CONFIG_DST_CACHE
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
kfree(md_dst);
}
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
{
int cpu;
......
/*
* net/core/dst_cache.c - dst entry cache
*
* Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <net/dst_cache.h>
#include <net/route.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
#include <uapi/linux/in.h>
struct dst_cache_pcpu {
unsigned long refresh_ts;
struct dst_entry *dst;
u32 cookie;
union {
struct in_addr in_saddr;
struct in6_addr in6_saddr;
};
};
void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
dst_release(dst_cache->dst);
if (dst)
dst_hold(dst);
dst_cache->cookie = cookie;
dst_cache->dst = dst;
}
struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
struct dst_cache_pcpu *idst)
{
struct dst_entry *dst;
dst = idst->dst;
if (!dst)
goto fail;
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
goto fail;
}
return dst;
fail:
idst->refresh_ts = jiffies;
return NULL;
}
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
{
if (!dst_cache->cache)
return NULL;
return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
}
EXPORT_SYMBOL_GPL(dst_cache_get);
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in_saddr.s_addr;
return container_of(dst, struct rtable, dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(idst, dst, 0);
idst->in_saddr.s_addr = saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
idst->in6_saddr = *addr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in6_saddr;
return dst;
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
#endif
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
{
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
gfp | __GFP_ZERO);
if (!dst_cache->cache)
return -ENOMEM;
dst_cache_reset(dst_cache);
return 0;
}
EXPORT_SYMBOL_GPL(dst_cache_init);
void dst_cache_destroy(struct dst_cache *dst_cache)
{
int i;
if (!dst_cache->cache)
return;
for_each_possible_cpu(i)
dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
free_percpu(dst_cache->cache);
}
EXPORT_SYMBOL_GPL(dst_cache_destroy);
......@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
config NET_IP_TUNNEL
tristate
select DST_CACHE
default n
config NET_IPGRE
......
......@@ -540,9 +540,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
NULL;
if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
if (!skb->mark)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
}
tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
......
......@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
struct dst_entry *dst, __be32 saddr)
{
struct dst_entry *old_dst;
dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
idst->saddr = saddr;
}
static noinline void tunnel_dst_set(struct ip_tunnel *t,
struct dst_entry *dst, __be32 saddr)
{
__tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
}
static void tunnel_dst_reset(struct ip_tunnel *t)
{
tunnel_dst_set(t, NULL, 0);
}
void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
}
EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
u32 cookie, __be32 *saddr)
{
struct ip_tunnel_dst *idst;
struct dst_entry *dst;
rcu_read_lock();
idst = raw_cpu_ptr(t->dst_cache);
dst = rcu_dereference(idst->dst);
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
dst = NULL;
if (dst) {
if (!dst->obsolete || dst->ops->check(dst, cookie)) {
*saddr = idst->saddr;
} else {
tunnel_dst_reset(t);
dst_release(dst);
dst = NULL;
}
}
rcu_read_unlock();
return (struct rtable *)dst;
}
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
......@@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
......@@ -729,7 +675,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
......@@ -739,7 +686,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
if (connected)
tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
fl4.saddr);
}
if (rt->dst.dev == dev) {
......@@ -836,7 +784,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
ip_tunnel_dst_reset_all(t);
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
......@@ -961,7 +909,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells);
free_percpu(tunnel->dst_cache);
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
......@@ -1155,15 +1103,15 @@ int ip_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (err) {
free_percpu(dev->tstats);
return -ENOMEM;
return err;
}
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
free_percpu(tunnel->dst_cache);
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
return err;
}
......@@ -1193,7 +1141,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(itn, netdev_priv(dev));
ip_tunnel_dst_reset_all(tunnel);
dst_cache_reset(&tunnel->dst_cache);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
......
......@@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE
config IPV6_TUNNEL
tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
select INET6_TUNNEL
select DST_CACHE
---help---
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473.
......
......@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
ip6_tnl_dst_reset(t);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
......@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_get(tunnel);
dst = dst_cache_get(&tunnel->dst_cache);
if (!dst) {
dst = ip6_route_output(net, NULL, fl6);
......@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark && ndst)
ip6_tnl_dst_set(tunnel, ndst);
dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
......@@ -1009,7 +1009,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
ip6_tnl_dst_reset(t);
dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
}
......@@ -1219,7 +1219,7 @@ static void ip6gre_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
ip6_tnl_dst_destroy(t);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
......@@ -1257,7 +1257,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
ret = ip6_tnl_dst_init(tunnel);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
......
......@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
return &dev->stats;
}
/*
* Locking : hash tables are protected by RCU and RTNL
*/
static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
struct dst_entry *dst)
{
write_seqlock_bh(&idst->lock);
dst_release(rcu_dereference_protected(
idst->dst,
lockdep_is_held(&idst->lock.lock)));
if (dst) {
dst_hold(dst);
idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
} else {
idst->cookie = 0;
}
rcu_assign_pointer(idst->dst, dst);
write_sequnlock_bh(&idst->lock);
}
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
{
struct ip6_tnl_dst *idst;
struct dst_entry *dst;
unsigned int seq;
u32 cookie;
idst = raw_cpu_ptr(t->dst_cache);
rcu_read_lock();
do {
seq = read_seqbegin(&idst->lock);
dst = rcu_dereference(idst->dst);
cookie = idst->cookie;
} while (read_seqretry(&idst->lock, seq));
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
dst = NULL;
rcu_read_unlock();
if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
ip6_tnl_per_cpu_dst_set(idst, NULL);
dst_release(dst);
dst = NULL;
}
return dst;
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
int i;
for_each_possible_cpu(i)
ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
{
ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
void ip6_tnl_dst_destroy(struct ip6_tnl *t)
{
if (!t->dst_cache)
return;
ip6_tnl_dst_reset(t);
free_percpu(t->dst_cache);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
int ip6_tnl_dst_init(struct ip6_tnl *t)
{
int i;
t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
if (!t->dst_cache)
return -ENOMEM;
for_each_possible_cpu(i)
seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
return 0;
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point
......@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
ip6_tnl_dst_destroy(t);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
......@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
ip6_tnl_unlink(ip6n, t);
ip6_tnl_dst_reset(t);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
......@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
} else if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_get(t);
dst = dst_cache_get(&t->dst_cache);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
goto tx_err_link_failure;
......@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark && ndst)
ip6_tnl_dst_set(t, ndst);
dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
skb->transport_header = skb->network_header;
......@@ -1366,7 +1275,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
t->parms.proto = p->proto;
ip6_tnl_dst_reset(t);
dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t);
return 0;
}
......@@ -1637,7 +1546,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
ret = ip6_tnl_dst_init(t);
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
......
......@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
t->parms.proto = p->proto;
ip6_tnl_dst_reset(t);
dst_cache_reset(&t->dst_cache);
vti6_link_config(t);
return 0;
}
......
......@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
ip_tunnel_dst_reset_all(tunnel);
dst_cache_reset(&tunnel->dst_cache);
dev_put(dev);
}
......@@ -1093,7 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
ip_tunnel_dst_reset_all(t);
dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev);
}
......@@ -1124,7 +1124,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
ip_tunnel_dst_reset_all(t);
dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev);
return 0;
}
......@@ -1278,7 +1278,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
ip_tunnel_dst_reset_all(t);
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
break;
......@@ -1339,7 +1339,7 @@ static void ipip6_dev_free(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
free_percpu(tunnel->dst_cache);
dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
......@@ -1372,6 +1372,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int err;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
......@@ -1382,10 +1383,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (err) {
free_percpu(dev->tstats);
return -ENOMEM;
return err;
}
return 0;
......
......@@ -10,6 +10,7 @@ config OPENVSWITCH
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
select DST_CACHE
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
......
......@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (!tun_dst)
return -ENOMEM;
err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
if (err) {
dst_release((struct dst_entry *)tun_dst);
return err;
}
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
sizeof(*ovs_tun), log);
if (IS_ERR(a)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment