Commit 4cba259f authored by David S. Miller's avatar David S. Miller

Merge branch 'unified-tunnel-dst-caching'

Paolo Abeni says:

====================
net: unify dst caching for tunnel devices

This patch series try to unify the dst cache implementations currently
present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a
new generic implementation, replacing the existing ones, and then using
the new implementation in other tunnel devices which currently lack it.

The new dst implementation is compiled, as built-in, only if any device using
it is enabled.

Caching the dst for the tunnel remote address gives small, but measurable,
performance improvement when tunneling over ipv4 (in the 2%-4% range) and
significant ones when tunneling over ipv6 (roughly 60% when no
fragmentation/segmentation take place and the tunnel local address
is not specified).

v2:
- move the vxlan dst_cache usage inside the device lookup functions
- fix usage after free for lwt tunnel moving the dst cache storage inside
  the dst_metadata,
- sparse codying style cleanup
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64f63d59 3c1cb4d2
...@@ -72,6 +72,7 @@ struct geneve_dev { ...@@ -72,6 +72,7 @@ struct geneve_dev {
bool collect_md; bool collect_md;
struct gro_cells gro_cells; struct gro_cells gro_cells;
u32 flags; u32 flags;
struct dst_cache dst_cache;
}; };
/* Geneve device flags */ /* Geneve device flags */
...@@ -297,6 +298,13 @@ static int geneve_init(struct net_device *dev) ...@@ -297,6 +298,13 @@ static int geneve_init(struct net_device *dev)
return err; return err;
} }
err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
if (err) {
free_percpu(dev->tstats);
gro_cells_destroy(&geneve->gro_cells);
return err;
}
return 0; return 0;
} }
...@@ -304,6 +312,7 @@ static void geneve_uninit(struct net_device *dev) ...@@ -304,6 +312,7 @@ static void geneve_uninit(struct net_device *dev)
{ {
struct geneve_dev *geneve = netdev_priv(dev); struct geneve_dev *geneve = netdev_priv(dev);
dst_cache_destroy(&geneve->dst_cache);
gro_cells_destroy(&geneve->gro_cells); gro_cells_destroy(&geneve->gro_cells);
free_percpu(dev->tstats); free_percpu(dev->tstats);
} }
...@@ -753,7 +762,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ...@@ -753,7 +762,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct ip_tunnel_info *info) struct ip_tunnel_info *info)
{ {
struct geneve_dev *geneve = netdev_priv(dev); struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL; struct rtable *rt = NULL;
bool use_cache = true;
__u8 tos; __u8 tos;
memset(fl4, 0, sizeof(*fl4)); memset(fl4, 0, sizeof(*fl4));
...@@ -764,16 +775,26 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ...@@ -764,16 +775,26 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
fl4->daddr = info->key.u.ipv4.dst; fl4->daddr = info->key.u.ipv4.dst;
fl4->saddr = info->key.u.ipv4.src; fl4->saddr = info->key.u.ipv4.src;
fl4->flowi4_tos = RT_TOS(info->key.tos); fl4->flowi4_tos = RT_TOS(info->key.tos);
dst_cache = &info->dst_cache;
} else { } else {
tos = geneve->tos; tos = geneve->tos;
if (tos == 1) { if (tos == 1) {
const struct iphdr *iip = ip_hdr(skb); const struct iphdr *iip = ip_hdr(skb);
tos = ip_tunnel_get_dsfield(iip, skb); tos = ip_tunnel_get_dsfield(iip, skb);
use_cache = false;
} }
fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_tos = RT_TOS(tos);
fl4->daddr = geneve->remote.sin.sin_addr.s_addr; fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
return rt;
} }
rt = ip_route_output_key(geneve->net, fl4); rt = ip_route_output_key(geneve->net, fl4);
...@@ -786,6 +807,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, ...@@ -786,6 +807,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
ip_rt_put(rt); ip_rt_put(rt);
return ERR_PTR(-ELOOP); return ERR_PTR(-ELOOP);
} }
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
return rt; return rt;
} }
...@@ -798,6 +821,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, ...@@ -798,6 +821,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct geneve_dev *geneve = netdev_priv(dev); struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6; struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
struct dst_cache *dst_cache;
bool use_cache = true;
__u8 prio; __u8 prio;
memset(fl6, 0, sizeof(*fl6)); memset(fl6, 0, sizeof(*fl6));
...@@ -808,16 +833,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, ...@@ -808,16 +833,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->daddr = info->key.u.ipv6.dst; fl6->daddr = info->key.u.ipv6.dst;
fl6->saddr = info->key.u.ipv6.src; fl6->saddr = info->key.u.ipv6.src;
fl6->flowi6_tos = RT_TOS(info->key.tos); fl6->flowi6_tos = RT_TOS(info->key.tos);
dst_cache = &info->dst_cache;
} else { } else {
prio = geneve->tos; prio = geneve->tos;
if (prio == 1) { if (prio == 1) {
const struct iphdr *iip = ip_hdr(skb); const struct iphdr *iip = ip_hdr(skb);
prio = ip_tunnel_get_dsfield(iip, skb); prio = ip_tunnel_get_dsfield(iip, skb);
use_cache = false;
} }
fl6->flowi6_tos = RT_TOS(prio); fl6->flowi6_tos = RT_TOS(prio);
fl6->daddr = geneve->remote.sin6.sin6_addr; fl6->daddr = geneve->remote.sin6.sin6_addr;
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst)
return dst;
} }
if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
...@@ -830,6 +865,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, ...@@ -830,6 +865,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
return ERR_PTR(-ELOOP); return ERR_PTR(-ELOOP);
} }
if (use_cache)
dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
return dst; return dst;
} }
#endif #endif
...@@ -1272,6 +1309,8 @@ static int geneve_configure(struct net *net, struct net_device *dev, ...@@ -1272,6 +1309,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
return -EPERM; return -EPERM;
} }
dst_cache_reset(&geneve->dst_cache);
err = register_netdevice(dev); err = register_netdevice(dev);
if (err) if (err)
return err; return err;
......
...@@ -480,6 +480,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, ...@@ -480,6 +480,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list); rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
if (!rd) if (!rd)
return 0; return 0;
dst_cache_reset(&rd->dst_cache);
rd->remote_ip = *ip; rd->remote_ip = *ip;
rd->remote_port = port; rd->remote_port = port;
rd->remote_vni = vni; rd->remote_vni = vni;
...@@ -501,6 +503,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, ...@@ -501,6 +503,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd = kmalloc(sizeof(*rd), GFP_ATOMIC); rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL) if (rd == NULL)
return -ENOBUFS; return -ENOBUFS;
if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
kfree(rd);
return -ENOBUFS;
}
rd->remote_ip = *ip; rd->remote_ip = *ip;
rd->remote_port = port; rd->remote_port = port;
rd->remote_vni = vni; rd->remote_vni = vni;
...@@ -749,8 +757,10 @@ static void vxlan_fdb_free(struct rcu_head *head) ...@@ -749,8 +757,10 @@ static void vxlan_fdb_free(struct rcu_head *head)
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
struct vxlan_rdst *rd, *nd; struct vxlan_rdst *rd, *nd;
list_for_each_entry_safe(rd, nd, &f->remotes, list) list_for_each_entry_safe(rd, nd, &f->remotes, list) {
dst_cache_destroy(&rd->dst_cache);
kfree(rd); kfree(rd);
}
kfree(f); kfree(f);
} }
...@@ -1754,11 +1764,24 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, ...@@ -1754,11 +1764,24 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos, struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr) __be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
struct ip_tunnel_info *info)
{ {
struct rtable *rt = NULL; struct rtable *rt = NULL;
bool use_cache = false;
struct flowi4 fl4; struct flowi4 fl4;
/* when the ip_tunnel_info is availble, the tos used for lookup is
* packet independent, so we can use the cache
*/
if (!skb->mark && (!tos || info)) {
use_cache = true;
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
memset(&fl4, 0, sizeof(fl4)); memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif; fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos); fl4.flowi4_tos = RT_TOS(tos);
...@@ -1768,8 +1791,11 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, ...@@ -1768,8 +1791,11 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
rt = ip_route_output_key(vxlan->net, &fl4); rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt)) if (!IS_ERR(rt)) {
*saddr = fl4.saddr; *saddr = fl4.saddr;
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
}
return rt; return rt;
} }
...@@ -1777,12 +1803,19 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, ...@@ -1777,12 +1803,19 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, struct sk_buff *skb, int oif,
const struct in6_addr *daddr, const struct in6_addr *daddr,
struct in6_addr *saddr) struct in6_addr *saddr,
struct dst_cache *dst_cache)
{ {
struct dst_entry *ndst; struct dst_entry *ndst;
struct flowi6 fl6; struct flowi6 fl6;
int err; int err;
if (!skb->mark) {
ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst)
return ndst;
}
memset(&fl6, 0, sizeof(fl6)); memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif; fl6.flowi6_oif = oif;
fl6.daddr = *daddr; fl6.daddr = *daddr;
...@@ -1797,6 +1830,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, ...@@ -1797,6 +1830,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err); return ERR_PTR(err);
*saddr = fl6.saddr; *saddr = fl6.saddr;
if (!skb->mark)
dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst; return ndst;
} }
#endif #endif
...@@ -1849,6 +1884,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, ...@@ -1849,6 +1884,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc) struct vxlan_rdst *rdst, bool did_rsc)
{ {
struct dst_cache *dst_cache;
struct ip_tunnel_info *info; struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk; struct sock *sk;
...@@ -1873,6 +1909,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1873,6 +1909,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni; vni = rdst->remote_vni;
dst = &rdst->remote_ip; dst = &rdst->remote_ip;
dst_cache = &rdst->dst_cache;
} else { } else {
if (!info) { if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n", WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
...@@ -1887,6 +1924,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1887,6 +1924,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
else else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip; dst = &remote_ip;
dst_cache = &info->dst_cache;
} }
if (vxlan_addr_any(dst)) { if (vxlan_addr_any(dst)) {
...@@ -1938,7 +1976,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1938,7 +1976,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
rt = vxlan_get_route(vxlan, skb, rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos, rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr, &saddr); dst->sin.sin_addr.s_addr, &saddr,
dst_cache, info);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr); &dst->sin.sin_addr.s_addr);
...@@ -1990,7 +2029,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1990,7 +2029,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = vxlan6_get_route(vxlan, skb, ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, rdst ? rdst->remote_ifindex : 0,
&dst->sin6.sin6_addr, &saddr); &dst->sin6.sin6_addr, &saddr,
dst_cache);
if (IS_ERR(ndst)) { if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n", netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr); &dst->sin6.sin6_addr);
...@@ -2331,7 +2371,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ...@@ -2331,7 +2371,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL; return -EINVAL;
rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
info->key.u.ipv4.dst, info->key.u.ipv4.dst,
&info->key.u.ipv4.src); &info->key.u.ipv4.src, NULL, info);
if (IS_ERR(rt)) if (IS_ERR(rt))
return PTR_ERR(rt); return PTR_ERR(rt);
ip_rt_put(rt); ip_rt_put(rt);
...@@ -2343,7 +2383,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ...@@ -2343,7 +2383,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL; return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0, ndst = vxlan6_get_route(vxlan, skb, 0,
&info->key.u.ipv6.dst, &info->key.u.ipv6.dst,
&info->key.u.ipv6.src); &info->key.u.ipv6.src, NULL);
if (IS_ERR(ndst)) if (IS_ERR(ndst))
return PTR_ERR(ndst); return PTR_ERR(ndst);
dst_release(ndst); dst_release(ndst);
......
#ifndef _NET_DST_CACHE_H
#define _NET_DST_CACHE_H
#include <linux/jiffies.h>
#include <net/dst.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
struct dst_cache {
struct dst_cache_pcpu __percpu *cache;
unsigned long reset_ts;
};
/**
* dst_cache_get - perform cache lookup
* @dst_cache: the cache
*
* The caller should use dst_cache_get_ip4() if it need to retrieve the
* source address to be used when xmitting to the cached dst.
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
/**
* dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
/**
* dst_cache_set_ip4 - store the ipv4 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr);
#if IS_ENABLED(CONFIG_IPV6)
/**
* dst_cache_set_ip6 - store the ipv6 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr);
/**
* dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr);
#endif
/**
* dst_cache_reset - invalidate the cache contents
* @dst_cache: the cache
*
* This do not free the cached dst to avoid races and contentions.
* the dst will be freed on later cache lookup.
*/
static inline void dst_cache_reset(struct dst_cache *dst_cache)
{
dst_cache->reset_ts = jiffies;
}
/**
* dst_cache_init - initialize the cache, allocating the required storage
* @dst_cache: the cache
* @gfp: allocation flags
*/
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
/**
* dst_cache_destroy - empty the cache and free the allocated storage
* @dst_cache: the cache
*
* No synchronization is enforced: it must be called only when the cache
* is unsed.
*/
void dst_cache_destroy(struct dst_cache *dst_cache);
#endif
...@@ -62,6 +62,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, ...@@ -62,6 +62,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
sizeof(a->u.tun_info) + a->u.tun_info.options_len); sizeof(a->u.tun_info) + a->u.tun_info.options_len);
} }
void metadata_dst_free(struct metadata_dst *);
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/if_tunnel.h> #include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h> #include <linux/ip6_tunnel.h>
#include <net/ip_tunnels.h> #include <net/ip_tunnels.h>
#include <net/dst_cache.h>
#define IP6TUNNEL_ERR_TIMEO (30*HZ) #define IP6TUNNEL_ERR_TIMEO (30*HZ)
...@@ -33,12 +34,6 @@ struct __ip6_tnl_parm { ...@@ -33,12 +34,6 @@ struct __ip6_tnl_parm {
__be32 o_key; __be32 o_key;
}; };
struct ip6_tnl_dst {
seqlock_t lock;
struct dst_entry __rcu *dst;
u32 cookie;
};
/* IPv6 tunnel */ /* IPv6 tunnel */
struct ip6_tnl { struct ip6_tnl {
struct ip6_tnl __rcu *next; /* next tunnel in list */ struct ip6_tnl __rcu *next; /* next tunnel in list */
...@@ -46,7 +41,7 @@ struct ip6_tnl { ...@@ -46,7 +41,7 @@ struct ip6_tnl {
struct net *net; /* netns for packet i/o */ struct net *net; /* netns for packet i/o */
struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
struct flowi fl; /* flowi template for xmit */ struct flowi fl; /* flowi template for xmit */
struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ struct dst_cache dst_cache; /* cached dst */
int err_count; int err_count;
unsigned long err_time; unsigned long err_time;
...@@ -66,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim { ...@@ -66,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim {
__u8 encap_limit; /* tunnel encapsulation limit */ __u8 encap_limit; /* tunnel encapsulation limit */
} __packed; } __packed;
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
int ip6_tnl_dst_init(struct ip6_tnl *t);
void ip6_tnl_dst_destroy(struct ip6_tnl *t);
void ip6_tnl_dst_reset(struct ip6_tnl *t);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
const struct in6_addr *raddr); const struct in6_addr *raddr);
int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/lwtunnel.h> #include <net/lwtunnel.h>
#include <net/dst_cache.h>
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h> #include <net/ipv6.h>
...@@ -57,6 +58,9 @@ struct ip_tunnel_key { ...@@ -57,6 +58,9 @@ struct ip_tunnel_key {
struct ip_tunnel_info { struct ip_tunnel_info {
struct ip_tunnel_key key; struct ip_tunnel_key key;
#ifdef CONFIG_DST_CACHE
struct dst_cache dst_cache;
#endif
u8 options_len; u8 options_len;
u8 mode; u8 mode;
}; };
...@@ -85,11 +89,6 @@ struct ip_tunnel_prl_entry { ...@@ -85,11 +89,6 @@ struct ip_tunnel_prl_entry {
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };
struct ip_tunnel_dst {
struct dst_entry __rcu *dst;
__be32 saddr;
};
struct metadata_dst; struct metadata_dst;
struct ip_tunnel { struct ip_tunnel {
...@@ -108,7 +107,7 @@ struct ip_tunnel { ...@@ -108,7 +107,7 @@ struct ip_tunnel {
int tun_hlen; /* Precalculated header length */ int tun_hlen; /* Precalculated header length */
int mlink; int mlink;
struct ip_tunnel_dst __percpu *dst_cache; struct dst_cache dst_cache;
struct ip_tunnel_parm parms; struct ip_tunnel_parm parms;
...@@ -247,7 +246,6 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -247,7 +246,6 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p); struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id); void ip_tunnel_setup(struct net_device *dev, int net_id);
void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
int ip_tunnel_encap_setup(struct ip_tunnel *t, int ip_tunnel_encap_setup(struct ip_tunnel *t,
struct ip_tunnel_encap *ipencap); struct ip_tunnel_encap *ipencap);
......
...@@ -148,6 +148,7 @@ struct vxlan_rdst { ...@@ -148,6 +148,7 @@ struct vxlan_rdst {
u32 remote_ifindex; u32 remote_ifindex;
struct list_head list; struct list_head list;
struct rcu_head rcu; struct rcu_head rcu;
struct dst_cache dst_cache;
}; };
struct vxlan_config { struct vxlan_config {
......
...@@ -392,6 +392,10 @@ config LWTUNNEL ...@@ -392,6 +392,10 @@ config LWTUNNEL
weight tunnel endpoint. Tunnel encapsulation parameters are stored weight tunnel endpoint. Tunnel encapsulation parameters are stored
with light weight tunnel state associated with fib routes. with light weight tunnel state associated with fib routes.
config DST_CACHE
bool "dst cache"
default n
endif # if NET endif # if NET
# Used by archs to tell that they support BPF_JIT # Used by archs to tell that they support BPF_JIT
......
...@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o ...@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
...@@ -265,7 +265,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) ...@@ -265,7 +265,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
lwtstate_put(dst->lwtstate); lwtstate_put(dst->lwtstate);
if (dst->flags & DST_METADATA) if (dst->flags & DST_METADATA)
kfree(dst); metadata_dst_free((struct metadata_dst *)dst);
else else
kmem_cache_free(dst->ops->kmem_cachep, dst); kmem_cache_free(dst->ops->kmem_cachep, dst);
...@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) ...@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
} }
EXPORT_SYMBOL_GPL(metadata_dst_alloc); EXPORT_SYMBOL_GPL(metadata_dst_alloc);
void metadata_dst_free(struct metadata_dst *md_dst)
{
#ifdef CONFIG_DST_CACHE
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
kfree(md_dst);
}
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
{ {
int cpu; int cpu;
......
/*
* net/core/dst_cache.c - dst entry cache
*
* Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <net/dst_cache.h>
#include <net/route.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
#include <uapi/linux/in.h>
struct dst_cache_pcpu {
unsigned long refresh_ts;
struct dst_entry *dst;
u32 cookie;
union {
struct in_addr in_saddr;
struct in6_addr in6_saddr;
};
};
void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
dst_release(dst_cache->dst);
if (dst)
dst_hold(dst);
dst_cache->cookie = cookie;
dst_cache->dst = dst;
}
struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
struct dst_cache_pcpu *idst)
{
struct dst_entry *dst;
dst = idst->dst;
if (!dst)
goto fail;
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
goto fail;
}
return dst;
fail:
idst->refresh_ts = jiffies;
return NULL;
}
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
{
if (!dst_cache->cache)
return NULL;
return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
}
EXPORT_SYMBOL_GPL(dst_cache_get);
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in_saddr.s_addr;
return container_of(dst, struct rtable, dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(idst, dst, 0);
idst->in_saddr.s_addr = saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
idst->in6_saddr = *addr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in6_saddr;
return dst;
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
#endif
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
{
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
gfp | __GFP_ZERO);
if (!dst_cache->cache)
return -ENOMEM;
dst_cache_reset(dst_cache);
return 0;
}
EXPORT_SYMBOL_GPL(dst_cache_init);
void dst_cache_destroy(struct dst_cache *dst_cache)
{
int i;
if (!dst_cache->cache)
return;
for_each_possible_cpu(i)
dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
free_percpu(dst_cache->cache);
}
EXPORT_SYMBOL_GPL(dst_cache_destroy);
...@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX ...@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
config NET_IP_TUNNEL config NET_IP_TUNNEL
tristate tristate
select DST_CACHE
default n default n
config NET_IPGRE config NET_IPGRE
......
...@@ -540,9 +540,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -540,9 +540,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb; goto err_free_skb;
key = &tun_info->key; key = &tun_info->key;
rt = gre_get_rt(skb, dev, &fl, key); rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
if (IS_ERR(rt)) NULL;
goto err_free_skb; if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
if (!skb->mark)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
}
tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
......
...@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) ...@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS); IP_TNL_HASH_BITS);
} }
static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
struct dst_entry *dst, __be32 saddr)
{
struct dst_entry *old_dst;
dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
idst->saddr = saddr;
}
static noinline void tunnel_dst_set(struct ip_tunnel *t,
struct dst_entry *dst, __be32 saddr)
{
__tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
}
static void tunnel_dst_reset(struct ip_tunnel *t)
{
tunnel_dst_set(t, NULL, 0);
}
void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
}
EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
u32 cookie, __be32 *saddr)
{
struct ip_tunnel_dst *idst;
struct dst_entry *dst;
rcu_read_lock();
idst = raw_cpu_ptr(t->dst_cache);
dst = rcu_dereference(idst->dst);
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
dst = NULL;
if (dst) {
if (!dst->obsolete || dst->ops->check(dst, cookie)) {
*saddr = idst->saddr;
} else {
tunnel_dst_reset(t);
dst_release(dst);
dst = NULL;
}
}
rcu_read_unlock();
return (struct rtable *)dst;
}
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key) __be16 flags, __be32 key)
{ {
...@@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ...@@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) { if (!IS_ERR(rt)) {
tdev = rt->dst.dev; tdev = rt->dst.dev;
tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
fl4.saddr);
ip_rt_put(rt); ip_rt_put(rt);
} }
if (dev->type != ARPHRD_ETHER) if (dev->type != ARPHRD_ETHER)
...@@ -729,7 +675,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -729,7 +675,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error; goto tx_error;
rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
NULL;
if (!rt) { if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4); rt = ip_route_output_key(tunnel->net, &fl4);
...@@ -739,7 +686,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -739,7 +686,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error; goto tx_error;
} }
if (connected) if (connected)
tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
fl4.saddr);
} }
if (rt->dst.dev == dev) { if (rt->dst.dev == dev) {
...@@ -836,7 +784,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, ...@@ -836,7 +784,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu) if (set_mtu)
dev->mtu = mtu; dev->mtu = mtu;
} }
ip_tunnel_dst_reset_all(t); dst_cache_reset(&t->dst_cache);
netdev_state_change(dev); netdev_state_change(dev);
} }
...@@ -961,7 +909,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) ...@@ -961,7 +909,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells); gro_cells_destroy(&tunnel->gro_cells);
free_percpu(tunnel->dst_cache); dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
free_netdev(dev); free_netdev(dev);
} }
...@@ -1155,15 +1103,15 @@ int ip_tunnel_init(struct net_device *dev) ...@@ -1155,15 +1103,15 @@ int ip_tunnel_init(struct net_device *dev)
if (!dev->tstats) if (!dev->tstats)
return -ENOMEM; return -ENOMEM;
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (!tunnel->dst_cache) { if (err) {
free_percpu(dev->tstats); free_percpu(dev->tstats);
return -ENOMEM; return err;
} }
err = gro_cells_init(&tunnel->gro_cells, dev); err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) { if (err) {
free_percpu(tunnel->dst_cache); dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
return err; return err;
} }
...@@ -1193,7 +1141,7 @@ void ip_tunnel_uninit(struct net_device *dev) ...@@ -1193,7 +1141,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev) if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(itn, netdev_priv(dev)); ip_tunnel_del(itn, netdev_priv(dev));
ip_tunnel_dst_reset_all(tunnel); dst_cache_reset(&tunnel->dst_cache);
} }
EXPORT_SYMBOL_GPL(ip_tunnel_uninit); EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
......
...@@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE ...@@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE
config IPV6_TUNNEL config IPV6_TUNNEL
tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
select INET6_TUNNEL select INET6_TUNNEL
select DST_CACHE
---help--- ---help---
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473. RFC 2473.
......
...@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) ...@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t); ip6gre_tunnel_unlink(ign, t);
ip6_tnl_dst_reset(t); dst_cache_reset(&t->dst_cache);
dev_put(dev); dev_put(dev);
} }
...@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
} }
if (!fl6->flowi6_mark) if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_get(tunnel); dst = dst_cache_get(&tunnel->dst_cache);
if (!dst) { if (!dst) {
dst = ip6_route_output(net, NULL, fl6); dst = ip6_route_output(net, NULL, fl6);
...@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
} }
if (!fl6->flowi6_mark && ndst) if (!fl6->flowi6_mark && ndst)
ip6_tnl_dst_set(tunnel, ndst); dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
proto = NEXTHDR_GRE; proto = NEXTHDR_GRE;
...@@ -1009,7 +1009,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, ...@@ -1009,7 +1009,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_key = p->o_key; t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags; t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags; t->parms.o_flags = p->o_flags;
ip6_tnl_dst_reset(t); dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu); ip6gre_tnl_link_config(t, set_mtu);
return 0; return 0;
} }
...@@ -1219,7 +1219,7 @@ static void ip6gre_dev_free(struct net_device *dev) ...@@ -1219,7 +1219,7 @@ static void ip6gre_dev_free(struct net_device *dev)
{ {
struct ip6_tnl *t = netdev_priv(dev); struct ip6_tnl *t = netdev_priv(dev);
ip6_tnl_dst_destroy(t); dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
free_netdev(dev); free_netdev(dev);
} }
...@@ -1257,7 +1257,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) ...@@ -1257,7 +1257,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!dev->tstats) if (!dev->tstats)
return -ENOMEM; return -ENOMEM;
ret = ip6_tnl_dst_init(tunnel); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret) { if (ret) {
free_percpu(dev->tstats); free_percpu(dev->tstats);
dev->tstats = NULL; dev->tstats = NULL;
......
...@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) ...@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
return &dev->stats; return &dev->stats;
} }
/*
* Locking : hash tables are protected by RCU and RTNL
*/
static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
struct dst_entry *dst)
{
write_seqlock_bh(&idst->lock);
dst_release(rcu_dereference_protected(
idst->dst,
lockdep_is_held(&idst->lock.lock)));
if (dst) {
dst_hold(dst);
idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
} else {
idst->cookie = 0;
}
rcu_assign_pointer(idst->dst, dst);
write_sequnlock_bh(&idst->lock);
}
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
{
struct ip6_tnl_dst *idst;
struct dst_entry *dst;
unsigned int seq;
u32 cookie;
idst = raw_cpu_ptr(t->dst_cache);
rcu_read_lock();
do {
seq = read_seqbegin(&idst->lock);
dst = rcu_dereference(idst->dst);
cookie = idst->cookie;
} while (read_seqretry(&idst->lock, seq));
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
dst = NULL;
rcu_read_unlock();
if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
ip6_tnl_per_cpu_dst_set(idst, NULL);
dst_release(dst);
dst = NULL;
}
return dst;
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
int i;
for_each_possible_cpu(i)
ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
{
ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
void ip6_tnl_dst_destroy(struct ip6_tnl *t)
{
if (!t->dst_cache)
return;
ip6_tnl_dst_reset(t);
free_percpu(t->dst_cache);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
int ip6_tnl_dst_init(struct ip6_tnl *t)
{
int i;
t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
if (!t->dst_cache)
return -ENOMEM;
for_each_possible_cpu(i)
seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
return 0;
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
/** /**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point * @remote: the address of the tunnel exit-point
...@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev) ...@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{ {
struct ip6_tnl *t = netdev_priv(dev); struct ip6_tnl *t = netdev_priv(dev);
ip6_tnl_dst_destroy(t); dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
free_netdev(dev); free_netdev(dev);
} }
...@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) ...@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else else
ip6_tnl_unlink(ip6n, t); ip6_tnl_unlink(ip6n, t);
ip6_tnl_dst_reset(t); dst_cache_reset(&t->dst_cache);
dev_put(dev); dev_put(dev);
} }
...@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ...@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh); neigh_release(neigh);
} else if (!fl6->flowi6_mark) } else if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_get(t); dst = dst_cache_get(&t->dst_cache);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
goto tx_err_link_failure; goto tx_err_link_failure;
...@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ...@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
} }
if (!fl6->flowi6_mark && ndst) if (!fl6->flowi6_mark && ndst)
ip6_tnl_dst_set(t, ndst); dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
skb->transport_header = skb->network_header; skb->transport_header = skb->network_header;
...@@ -1366,7 +1275,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) ...@@ -1366,7 +1275,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.flowinfo = p->flowinfo; t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link; t->parms.link = p->link;
t->parms.proto = p->proto; t->parms.proto = p->proto;
ip6_tnl_dst_reset(t); dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t); ip6_tnl_link_config(t);
return 0; return 0;
} }
...@@ -1637,7 +1546,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) ...@@ -1637,7 +1546,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
if (!dev->tstats) if (!dev->tstats)
return -ENOMEM; return -ENOMEM;
ret = ip6_tnl_dst_init(t); ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret) { if (ret) {
free_percpu(dev->tstats); free_percpu(dev->tstats);
dev->tstats = NULL; dev->tstats = NULL;
......
...@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) ...@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.i_key = p->i_key; t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key; t->parms.o_key = p->o_key;
t->parms.proto = p->proto; t->parms.proto = p->proto;
ip6_tnl_dst_reset(t); dst_cache_reset(&t->dst_cache);
vti6_link_config(t); vti6_link_config(t);
return 0; return 0;
} }
......
...@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ...@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL); ipip6_tunnel_del_prl(tunnel, NULL);
} }
ip_tunnel_dst_reset_all(tunnel); dst_cache_reset(&tunnel->dst_cache);
dev_put(dev); dev_put(dev);
} }
...@@ -1093,7 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) ...@@ -1093,7 +1093,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link; t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev); ipip6_tunnel_bind_dev(t->dev);
} }
ip_tunnel_dst_reset_all(t); dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev); netdev_state_change(t->dev);
} }
...@@ -1124,7 +1124,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, ...@@ -1124,7 +1124,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
ip_tunnel_dst_reset_all(t); dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev); netdev_state_change(t->dev);
return 0; return 0;
} }
...@@ -1278,7 +1278,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ...@@ -1278,7 +1278,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break; break;
} }
ip_tunnel_dst_reset_all(t); dst_cache_reset(&t->dst_cache);
netdev_state_change(dev); netdev_state_change(dev);
break; break;
...@@ -1339,7 +1339,7 @@ static void ipip6_dev_free(struct net_device *dev) ...@@ -1339,7 +1339,7 @@ static void ipip6_dev_free(struct net_device *dev)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
free_percpu(tunnel->dst_cache); dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats); free_percpu(dev->tstats);
free_netdev(dev); free_netdev(dev);
} }
...@@ -1372,6 +1372,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) ...@@ -1372,6 +1372,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
int err;
tunnel->dev = dev; tunnel->dev = dev;
tunnel->net = dev_net(dev); tunnel->net = dev_net(dev);
...@@ -1382,10 +1383,10 @@ static int ipip6_tunnel_init(struct net_device *dev) ...@@ -1382,10 +1383,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
if (!dev->tstats) if (!dev->tstats)
return -ENOMEM; return -ENOMEM;
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (!tunnel->dst_cache) { if (err) {
free_percpu(dev->tstats); free_percpu(dev->tstats);
return -ENOMEM; return err;
} }
return 0; return 0;
......
...@@ -10,6 +10,7 @@ config OPENVSWITCH ...@@ -10,6 +10,7 @@ config OPENVSWITCH
select LIBCRC32C select LIBCRC32C
select MPLS select MPLS
select NET_MPLS_GSO select NET_MPLS_GSO
select DST_CACHE
---help--- ---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features environments. In addition to supporting a variety of features
......
...@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, ...@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (!tun_dst) if (!tun_dst)
return -ENOMEM; return -ENOMEM;
err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
if (err) {
dst_release((struct dst_entry *)tun_dst);
return err;
}
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
sizeof(*ovs_tun), log); sizeof(*ovs_tun), log);
if (IS_ERR(a)) { if (IS_ERR(a)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment