Commit 5eb902b8 authored by Kui-Feng Lee's avatar Kui-Feng Lee Committed by David S. Miller

net/ipv6: Remove expired routes with a separated list of routes.

FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree
can be expensive if the number of routes in a table is big, even if most of
them are permanent. Checking routes in a separated list of routes having
expiration will avoid this potential issue.
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarKui-Feng Lee <thinker.li@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 60df43d3
...@@ -173,6 +173,9 @@ struct fib6_info { ...@@ -173,6 +173,9 @@ struct fib6_info {
refcount_t fib6_ref; refcount_t fib6_ref;
unsigned long expires; unsigned long expires;
struct hlist_node gc_link;
struct dst_metrics *fib6_metrics; struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
...@@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) ...@@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0; return rt->fib6_src.plen > 0;
} }
/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
* been added to a table before.
*/
static inline void fib6_clean_expires(struct fib6_info *f6i) static inline void fib6_clean_expires(struct fib6_info *f6i)
{ {
f6i->fib6_flags &= ~RTF_EXPIRES; f6i->fib6_flags &= ~RTF_EXPIRES;
f6i->expires = 0; f6i->expires = 0;
} }
/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
* been added to a table before.
*/
static inline void fib6_set_expires(struct fib6_info *f6i, static inline void fib6_set_expires(struct fib6_info *f6i,
unsigned long expires) unsigned long expires)
{ {
...@@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) ...@@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
static inline void fib6_info_release(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i)
{ {
if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
call_rcu(&f6i->rcu, fib6_info_destroy_rcu); call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
} }
enum fib6_walk_state { enum fib6_walk_state {
...@@ -382,6 +393,7 @@ struct fib6_table { ...@@ -382,6 +393,7 @@ struct fib6_table {
struct inet_peer_base tb6_peers; struct inet_peer_base tb6_peers;
unsigned int flags; unsigned int flags;
unsigned int fib_seq; unsigned int fib_seq;
struct hlist_head tb6_gc_hlist; /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
}; };
...@@ -498,6 +510,38 @@ void fib6_gc_cleanup(void); ...@@ -498,6 +510,38 @@ void fib6_gc_cleanup(void);
int fib6_init(void); int fib6_init(void);
/* Add the route to the gc list if it is not already there
*
* The callers should hold f6i->fib6_table->tb6_lock.
*/
static inline void fib6_add_gc_list(struct fib6_info *f6i)
{
/* If fib6_node is null, the f6i is not in (or removed from) the
* table.
*
* There is a gap between finding the f6i from the table and
* calling this function without the protection of the tb6_lock.
* This check makes sure the f6i is not added to the gc list when
* it is not on the table.
*/
if (!rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock)))
return;
if (hlist_unhashed(&f6i->gc_link))
hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
}
/* Remove the route from the gc list if it is on the list.
*
* The callers should hold f6i->fib6_table->tb6_lock.
*/
static inline void fib6_remove_gc_list(struct fib6_info *f6i)
{
if (!hlist_unhashed(&f6i->gc_link))
hlist_del_init(&f6i->gc_link);
}
struct ipv6_route_iter { struct ipv6_route_iter {
struct seq_net_private p; struct seq_net_private p;
struct fib6_walker w; struct fib6_walker w;
......
...@@ -1255,6 +1255,7 @@ static void ...@@ -1255,6 +1255,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
bool del_rt, bool del_peer) bool del_rt, bool del_peer)
{ {
struct fib6_table *table;
struct fib6_info *f6i; struct fib6_info *f6i;
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr, f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
...@@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, ...@@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
if (del_rt) if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else { else {
if (!(f6i->fib6_flags & RTF_EXPIRES)) if (!(f6i->fib6_flags & RTF_EXPIRES)) {
table = f6i->fib6_table;
spin_lock_bh(&table->tb6_lock);
fib6_set_expires(f6i, expires); fib6_set_expires(f6i, expires);
fib6_add_gc_list(f6i);
spin_unlock_bh(&table->tb6_lock);
}
fib6_info_release(f6i); fib6_info_release(f6i);
} }
} }
...@@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr); ...@@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{ {
struct prefix_info *pinfo; struct prefix_info *pinfo;
struct fib6_table *table;
__u32 valid_lft; __u32 valid_lft;
__u32 prefered_lft; __u32 prefered_lft;
int addr_type, err; int addr_type, err;
...@@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) ...@@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (valid_lft == 0) { if (valid_lft == 0) {
ip6_del_rt(net, rt, false); ip6_del_rt(net, rt, false);
rt = NULL; rt = NULL;
} else if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
fib6_set_expires(rt, jiffies + rt_expires);
} else { } else {
fib6_clean_expires(rt); table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
fib6_set_expires(rt, jiffies + rt_expires);
fib6_add_gc_list(rt);
} else {
fib6_clean_expires(rt);
fib6_remove_gc_list(rt);
}
spin_unlock_bh(&table->tb6_lock);
} }
} else if (valid_lft) { } else if (valid_lft) {
clock_t expires = 0; clock_t expires = 0;
...@@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ...@@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags, unsigned long expires, u32 flags,
bool modify_peer) bool modify_peer)
{ {
struct fib6_table *table;
struct fib6_info *f6i; struct fib6_info *f6i;
u32 prio; u32 prio;
...@@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ...@@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->rt_priority, ifp->idev->dev, ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL); expires, flags, GFP_KERNEL);
} else { } else {
if (!expires) table = f6i->fib6_table;
spin_lock_bh(&table->tb6_lock);
if (!expires) {
fib6_clean_expires(f6i); fib6_clean_expires(f6i);
else fib6_remove_gc_list(f6i);
} else {
fib6_set_expires(f6i, expires); fib6_set_expires(f6i, expires);
fib6_add_gc_list(f6i);
}
spin_unlock_bh(&table->tb6_lock);
fib6_info_release(f6i); fib6_info_release(f6i);
} }
......
...@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) ...@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings); INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1); refcount_set(&f6i->fib6_ref, 1);
INIT_HLIST_NODE(&f6i->gc_link);
return f6i; return f6i;
} }
...@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) ...@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry); net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers); inet_peer_base_init(&table->tb6_peers);
INIT_HLIST_HEAD(&table->tb6_gc_hlist);
} }
return table; return table;
...@@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, ...@@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock)); lockdep_is_held(&table->tb6_lock));
} }
} }
fib6_clean_expires(rt);
fib6_remove_gc_list(rt);
} }
/* /*
...@@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
rt->fib6_nsiblings = 0; rt->fib6_nsiblings = 0;
if (!(iter->fib6_flags & RTF_EXPIRES)) if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST; return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter); fib6_clean_expires(iter);
else fib6_remove_gc_list(iter);
} else {
fib6_set_expires(iter, rt->expires); fib6_set_expires(iter, rt->expires);
fib6_add_gc_list(iter);
}
if (rt->fib6_pmtu) if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU, fib6_metric_set(iter, RTAX_MTU,
...@@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, ...@@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh) if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list); list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
if (rt->fib6_flags & RTF_EXPIRES)
fib6_add_gc_list(rt);
fib6_start_gc(info->nl_net, rt); fib6_start_gc(info->nl_net, rt);
} }
...@@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net) ...@@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection * Garbage collection
*/ */
static int fib6_age(struct fib6_info *rt, void *arg) static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{ {
struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies; unsigned long now = jiffies;
/* /*
...@@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg) ...@@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0; return 0;
} }
static void fib6_gc_table(struct net *net,
struct fib6_table *tb6,
struct fib6_gc_args *gc_args)
{
struct fib6_info *rt;
struct hlist_node *n;
struct nl_info info = {
.nl_net = net,
.skip_notify = false,
};
hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
if (fib6_age(rt, gc_args) == -1)
fib6_del(rt, &info);
}
static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
{
struct fib6_table *table;
struct hlist_head *head;
unsigned int h;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
spin_lock_bh(&table->tb6_lock);
fib6_gc_table(net, table, gc_args);
spin_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
}
void fib6_run_gc(unsigned long expires, struct net *net, bool force) void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{ {
struct fib6_gc_args gc_args; struct fib6_gc_args gc_args;
...@@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) ...@@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval; net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0; gc_args.more = 0;
fib6_clean_all(net, fib6_age, &gc_args); fib6_gc_all(net, &gc_args);
now = jiffies; now = jiffies;
net->ipv6.ip6_rt_last_gc = now; net->ipv6.ip6_rt_last_gc = now;
...@@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net) ...@@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags = net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES #ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
...@@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net) ...@@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags = net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif #endif
fib6_tables_init(net); fib6_tables_init(net);
......
...@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) ...@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts; struct ndisc_options ndopts;
struct fib6_info *rt = NULL; struct fib6_info *rt = NULL;
struct inet6_dev *in6_dev; struct inet6_dev *in6_dev;
struct fib6_table *table;
u32 defrtr_usr_metric; u32 defrtr_usr_metric;
unsigned int pref = 0; unsigned int pref = 0;
__u32 old_if_flags; __u32 old_if_flags;
...@@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) ...@@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
} }
if (rt) if (rt) {
table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
fib6_set_expires(rt, jiffies + (HZ * lifetime)); fib6_set_expires(rt, jiffies + (HZ * lifetime));
fib6_add_gc_list(rt);
spin_unlock_bh(&table->tb6_lock);
}
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
ra_msg->icmph.icmp6_hop_limit) { ra_msg->icmph.icmp6_hop_limit) {
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
......
...@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, ...@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt; struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix; struct in6_addr prefix_buf, *prefix;
struct fib6_table *table;
unsigned int pref; unsigned int pref;
unsigned long lifetime; unsigned long lifetime;
struct fib6_info *rt; struct fib6_info *rt;
...@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, ...@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) { if (rt) {
if (!addrconf_finite_timeout(lifetime)) table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt); fib6_clean_expires(rt);
else fib6_remove_gc_list(rt);
} else {
fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_set_expires(rt, jiffies + HZ * lifetime);
fib6_add_gc_list(rt);
}
spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt); fib6_info_release(rt);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment