Commit 98376387 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: fix route cache rebuilds

We added an automatic route cache rebuilding in commit 1080d709
but had to correct few bugs. One of the assumption of original patch,
was that entries where kept sorted in a given way.

This assumption is known to be wrong (commit 1ddbcb00 gave an
explanation of this and corrected a leak) and expensive to respect.

Paweł Staszewski reported to me one of his machine got its routing cache
disabled after few messages like :

[ 2677.850065] Route hash chain too long!
[ 2677.850080] Adjust your secret_interval!
[82839.662993] Route hash chain too long!
[82839.662996] Adjust your secret_interval!
[155843.731650] Route hash chain too long!
[155843.731664] Adjust your secret_interval!
[155843.811881] Route hash chain too long!
[155843.811891] Adjust your secret_interval!
[155843.858209] vlan0811: 5 rebuilds is over limit, route caching
disabled
[155843.858212] Route hash chain too long!
[155843.858213] Adjust your secret_interval!

This is because rt_intern_hash() might be fooled when computing a chain
length, because multiple entries with same keys can differ because of
TOS (or mark/oif) bits.

In the rare case the fast algorithm see a too long chain, and before
taking expensive path, we call a helper function in order to not count
duplicates of same routes, that only differ with tos/mark/oif bits. This
helper works with data already in cpu cache and is not be very
expensive, despite its O(N^2) implementation.

Paweł Staszewski sucessfully tested this patch on his loaded router.
Reported-and-tested-by: default avatarPaweł Staszewski <pstaszewski@itcare.pl>
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Acked-by: default avatarNeil Horman <nhorman@tuxdriver.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1515faf2
...@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); ...@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb); static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
static int rt_garbage_collect(struct dst_ops *ops); static int rt_garbage_collect(struct dst_ops *ops);
static void rt_emergency_hash_rebuild(struct net *net);
static struct dst_ops ipv4_dst_ops = { static struct dst_ops ipv4_dst_ops = {
...@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context) ...@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
#define FRACT_BITS 3 #define FRACT_BITS 3
#define ONE (1UL << FRACT_BITS) #define ONE (1UL << FRACT_BITS)
/*
* Given a hash chain and an item in this hash chain,
* find if a previous entry has the same hash_inputs
* (but differs on tos, mark or oif)
* Returns 0 if an alias is found.
* Returns ONE if rth has no alias before itself.
*/
static int has_noalias(const struct rtable *head, const struct rtable *rth)
{
const struct rtable *aux = head;
while (aux != rth) {
if (compare_hash_inputs(&aux->fl, &rth->fl))
return 0;
aux = aux->u.dst.rt_next;
}
return ONE;
}
static void rt_check_expire(void) static void rt_check_expire(void)
{ {
static unsigned int rover; static unsigned int rover;
unsigned int i = rover, goal; unsigned int i = rover, goal;
struct rtable *rth, *aux, **rthp; struct rtable *rth, **rthp;
unsigned long samples = 0; unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0; unsigned long sum = 0, sum2 = 0;
unsigned long delta; unsigned long delta;
...@@ -835,15 +853,7 @@ static void rt_check_expire(void) ...@@ -835,15 +853,7 @@ static void rt_check_expire(void)
* attributes don't unfairly skew * attributes don't unfairly skew
* the length computation * the length computation
*/ */
for (aux = rt_hash_table[i].chain;;) { length += has_noalias(rt_hash_table[i].chain, rth);
if (aux == rth) {
length += ONE;
break;
}
if (compare_hash_inputs(&aux->fl, &rth->fl))
break;
aux = aux->u.dst.rt_next;
}
continue; continue;
} }
} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
...@@ -1073,6 +1083,21 @@ static int rt_garbage_collect(struct dst_ops *ops) ...@@ -1073,6 +1083,21 @@ static int rt_garbage_collect(struct dst_ops *ops)
out: return 0; out: return 0;
} }
/*
* Returns number of entries in a hash chain that have different hash_inputs
*/
static int slow_chain_length(const struct rtable *head)
{
int length = 0;
const struct rtable *rth = head;
while (rth) {
length += has_noalias(head, rth);
rth = rth->u.dst.rt_next;
}
return length >> FRACT_BITS;
}
static int rt_intern_hash(unsigned hash, struct rtable *rt, static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb) struct rtable **rp, struct sk_buff *skb)
{ {
...@@ -1185,7 +1210,8 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, ...@@ -1185,7 +1210,8 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt,
rt_free(cand); rt_free(cand);
} }
} else { } else {
if (chain_length > rt_chain_length_max) { if (chain_length > rt_chain_length_max &&
slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
struct net *net = dev_net(rt->u.dst.dev); struct net *net = dev_net(rt->u.dst.dev);
int num = ++net->ipv4.current_rt_cache_rebuild_count; int num = ++net->ipv4.current_rt_cache_rebuild_count;
if (!rt_caching(dev_net(rt->u.dst.dev))) { if (!rt_caching(dev_net(rt->u.dst.dev))) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment