Commit 7f81ff04 authored by David S. Miller's avatar David S. Miller

Merge branch 'xfrm-remove-flow-cache'

Florian Westphal says:

====================
xfrm: remove flow cache

After RCU-ification of ipsec packet path there are no major scalability
issues anymore without flow cache.

We still incur a performance hit, which comes mostly from the extra xfrm
dst allocation/freeing.
The last patch in the series adds a simple percpu cache to avoid the
extra allocation if a packet matched the same policies as last one.

The main concern with this is that we will see performance drops,
especially with large numbers of policies/SAs.

However, during hallway discussions at nfws 2017 it seemed the issues
with flow caching outweight the removal downsides, and that it
might be best to just 'remove it' and see where the practical issues
(if any) will appear.

It should now be possible to also remove the genid member in the policies
as we don't hold bundles for prolonged time anymore, but I think
this change is controversial (and intrusive) enough as-is, so defer
that to a later point in time.

Changes since last rfc:

- fix build failures due to implicit interrupt.h includes
- rework last patch (pcpu cache):
 * avoid xchg()
 * check policies for walk.dead = 1 instead of more costly bundle_ok().
 * flush pcpu bundles when sa/policies get removed, to allow module
   references to go away (suggested by Ilan Tayari)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6ddb4fdf ec30d78c
......@@ -1291,8 +1291,7 @@ tag - INTEGER
xfrm4_gc_thresh - INTEGER
The threshold at which we will start garbage collecting for IPv4
destination cache entries. At twice this value the system will
refuse new allocations. The value must be set below the flowcache
limit (4096 * number of online cpus) to take effect.
refuse new allocations.
igmp_link_local_mcast_reports - BOOLEAN
Enable IGMP reports for link local multicast groups in the
......@@ -1778,8 +1777,7 @@ ratelimit - INTEGER
xfrm6_gc_thresh - INTEGER
The threshold at which we will start garbage collecting for IPv6
destination cache entries. At twice this value the system will
refuse new allocations. The value must be set below the flowcache
limit (4096 * number of online cpus) to take effect.
refuse new allocations.
IPv6 Update by:
......
......@@ -20,7 +20,7 @@
#include <linux/if_arcnet.h>
#ifdef __KERNEL__
#include <linux/irqreturn.h>
#include <linux/interrupt.h>
/*
* RECON_THRESHOLD is the maximum number of RECON messages to receive
......
......@@ -129,6 +129,7 @@
#include <net/dcbnl.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#define XGBE_DRV_NAME "amd-xgbe"
#define XGBE_DRV_VERSION "1.0.3"
......
......@@ -17,6 +17,7 @@
#include <linux/netdevice.h>
#include <linux/tcp.h>
#include <linux/interrupt.h>
#include "dwc-xlgmac.h"
#include "dwc-xlgmac-reg.h"
......
......@@ -66,6 +66,7 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
......
......@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
return 0;
}
#define FLOW_DIR_IN 0
#define FLOW_DIR_OUT 1
#define FLOW_DIR_FWD 2
struct net;
struct sock;
struct flow_cache_ops;
struct flow_cache_object {
const struct flow_cache_ops *ops;
};
struct flow_cache_ops {
struct flow_cache_object *(*get)(struct flow_cache_object *);
int (*check)(struct flow_cache_object *);
void (*delete)(struct flow_cache_object *);
};
typedef struct flow_cache_object *(*flow_resolve_t)(
struct net *net, const struct flowi *key, u16 family,
u8 dir, struct flow_cache_object *oldobj, void *ctx);
struct flow_cache_object *flow_cache_lookup(struct net *net,
const struct flowi *key, u16 family,
u8 dir, flow_resolve_t resolver,
void *ctx);
int flow_cache_init(struct net *net);
void flow_cache_fini(struct net *net);
void flow_cache_hp_init(void);
void flow_cache_flush(struct net *net);
void flow_cache_flush_deferred(struct net *net);
extern atomic_t flow_cache_genid;
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
......
#ifndef _NET_FLOWCACHE_H
#define _NET_FLOWCACHE_H
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/notifier.h>
struct flow_cache_percpu {
struct hlist_head *hash_table;
unsigned int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
struct tasklet_struct flush_tasklet;
};
struct flow_cache {
u32 hash_shift;
struct flow_cache_percpu __percpu *percpu;
struct hlist_node node;
unsigned int low_watermark;
unsigned int high_watermark;
struct timer_list rnd_timer;
};
#endif /* _NET_FLOWCACHE_H */
......@@ -6,7 +6,6 @@
#include <linux/workqueue.h>
#include <linux/xfrm.h>
#include <net/dst_ops.h>
#include <net/flowcache.h>
struct ctl_table_header;
......@@ -73,16 +72,6 @@ struct netns_xfrm {
spinlock_t xfrm_state_lock;
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
/* flow cache part */
struct flow_cache flow_cache_global;
atomic_t flow_cache_genid;
struct list_head flow_cache_gc_list;
atomic_t flow_cache_gc_count;
spinlock_t flow_cache_gc_lock;
struct work_struct flow_cache_gc_work;
struct work_struct flow_cache_flush_work;
struct mutex flow_flush_sem;
};
#endif
......@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
void km_policy_notify(struct xfrm_policy *xp, int dir,
const struct km_event *c);
void xfrm_policy_cache_flush(void);
void km_state_notify(struct xfrm_state *x, const struct km_event *c);
struct xfrm_tmpl;
......@@ -563,7 +564,6 @@ struct xfrm_policy {
refcount_t refcnt;
struct timer_list timer;
struct flow_cache_object flo;
atomic_t genid;
u32 priority;
u32 index;
......@@ -978,7 +978,6 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
struct flow_cache_object flo;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
......@@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
}
}
void xfrm_garbage_collect(struct net *net);
void xfrm_garbage_collect_deferred(struct net *net);
#else
static inline void xfrm_sk_free_policy(struct sock *sk) {}
......@@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
{
return 1;
}
static inline void xfrm_garbage_collect(struct net *net)
{
}
#endif
static __inline__
......
......@@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
......
This diff is collapsed.
......@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net,
};
static bool is_vti_tunnel(const struct net_device *dev)
{
return dev->netdev_ops == &vti_netdev_ops;
}
static int vti_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ip_tunnel *tunnel = netdev_priv(dev);
if (!is_vti_tunnel(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_DOWN:
if (!net_eq(tunnel->net, dev_net(dev)))
xfrm_garbage_collect(tunnel->net);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block vti_notifier_block __read_mostly = {
.notifier_call = vti_device_event,
};
static int __init vti_init(void)
{
const char *msg;
......@@ -618,8 +591,6 @@ static int __init vti_init(void)
pr_info("IPv4 over IPsec tunneling driver\n");
register_netdevice_notifier(&vti_notifier_block);
msg = "tunnel device";
err = register_pernet_device(&vti_net_ops);
if (err < 0)
......@@ -652,7 +623,6 @@ static int __init vti_init(void)
xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops);
pernet_dev_failed:
unregister_netdevice_notifier(&vti_notifier_block);
pr_err("vti init: failed to register %s\n", msg);
return err;
}
......@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
unregister_netdevice_notifier(&vti_notifier_block);
}
module_init(vti_init);
......
......@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_tos = iph->tos;
}
static inline int xfrm4_garbage_collect(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
xfrm_garbage_collect_deferred(net);
return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
......@@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = INT_MAX,
.gc_thresh = 32768,
};
static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
......
......@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.priority = 100,
};
static bool is_vti6_tunnel(const struct net_device *dev)
{
return dev->netdev_ops == &vti6_netdev_ops;
}
static int vti6_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ip6_tnl *t = netdev_priv(dev);
if (!is_vti6_tunnel(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_DOWN:
if (!net_eq(t->net, dev_net(dev)))
xfrm_garbage_collect(t->net);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block vti6_notifier_block __read_mostly = {
.notifier_call = vti6_device_event,
};
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
......@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
const char *msg;
int err;
register_netdevice_notifier(&vti6_notifier_block);
msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
......@@ -1216,7 +1187,6 @@ static int __init vti6_tunnel_init(void)
xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
pernet_dev_failed:
unregister_netdevice_notifier(&vti6_notifier_block);
pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
......@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
unregister_netdevice_notifier(&vti6_notifier_block);
}
module_init(vti6_tunnel_init);
......
......@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
}
}
static inline int xfrm6_garbage_collect(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
xfrm_garbage_collect_deferred(net);
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
......@@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
.redirect = xfrm6_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
.gc_thresh = INT_MAX,
.gc_thresh = 32768,
};
static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
......
......@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out:
xfrm_pol_put(xp);
if (err == 0)
xfrm_garbage_collect(net);
return err;
}
......@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out:
xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err;
}
......@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
int err, err2;
err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
if (!err)
xfrm_garbage_collect(net);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
......
......@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
static int xfrm_dev_unregister(struct net_device *dev)
{
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
......@@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
if (dev->features & NETIF_F_HW_ESP)
xfrm_dev_state_flush(dev_net(dev), dev, true);
xfrm_garbage_collect(dev_net(dev));
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
......
This diff is collapsed.
......@@ -724,9 +724,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
}
}
}
if (cnt)
if (cnt) {
err = 0;
xfrm_policy_cache_flush();
}
out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return err;
......
......@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out:
xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err;
}
......@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
return err;
}
xfrm_garbage_collect(net);
c.data.type = type;
c.event = nlh->nlmsg_type;
......
......@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
struct net *net;
rtnl_lock();
for_each_net(net) {
atomic_inc(&net->xfrm.flow_cache_genid);
for_each_net(net)
rt_genid_bump_all(net);
}
rtnl_unlock();
}
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment