Commit 460ead97 authored by David S. Miller's avatar David S. Miller

[IPV4]: Fix fib_hash performance problems with huge route tables.

parent 094694d0
...@@ -125,13 +125,6 @@ config IP_ROUTE_VERBOSE ...@@ -125,13 +125,6 @@ config IP_ROUTE_VERBOSE
handled by the klogd daemon which is responsible for kernel messages handled by the klogd daemon which is responsible for kernel messages
("man klogd"). ("man klogd").
config IP_ROUTE_LARGE_TABLES
bool "IP: large routing tables"
depends on IP_ADVANCED_ROUTER
help
If you have routing zones that grow to more than about 64 entries,
you may want to say Y here to speed up the routing process.
config IP_PNP config IP_PNP
bool "IP: kernel level autoconfiguration" bool "IP: kernel level autoconfiguration"
depends on INET depends on INET
......
...@@ -89,7 +89,7 @@ struct fn_zone ...@@ -89,7 +89,7 @@ struct fn_zone
int fz_nent; /* Number of entries */ int fz_nent; /* Number of entries */
int fz_divisor; /* Hash divisor */ int fz_divisor; /* Hash divisor */
u32 fz_hashmask; /* (1<<fz_divisor) - 1 */ u32 fz_hashmask; /* (fz_divisor - 1) */
#define FZ_HASHMASK(fz) ((fz)->fz_hashmask) #define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
int fz_order; /* Zone order */ int fz_order; /* Zone order */
...@@ -149,9 +149,30 @@ static __inline__ int fn_key_leq(fn_key_t a, fn_key_t b) ...@@ -149,9 +149,30 @@ static __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED; static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
#define FZ_MAX_DIVISOR 1024 #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *))
#ifdef CONFIG_IP_ROUTE_LARGE_TABLES static unsigned long size_to_order(unsigned long size)
{
unsigned long order;
for (order = 0; order < MAX_ORDER; order++) {
if ((PAGE_SIZE << order) >= size)
break;
}
return order;
}
static struct fib_node **fz_hash_alloc(int divisor)
{
unsigned long size = divisor * sizeof(struct fib_node *);
if (divisor <= 1024) {
return kmalloc(size, GFP_KERNEL);
} else {
return (struct fib_node **)
__get_free_pages(GFP_KERNEL, size_to_order(size));
}
}
/* The fib hash lock must be held when this is called. */ /* The fib hash lock must be held when this is called. */
static __inline__ void fn_rebuild_zone(struct fn_zone *fz, static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
...@@ -174,6 +195,15 @@ static __inline__ void fn_rebuild_zone(struct fn_zone *fz, ...@@ -174,6 +195,15 @@ static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
} }
} }
static void fz_hash_free(struct fib_node **hash, int divisor)
{
if (divisor <= 1024)
kfree(hash);
else
free_pages((unsigned long) hash,
size_to_order(divisor * sizeof(struct fib_node *)));
}
static void fn_rehash_zone(struct fn_zone *fz) static void fn_rehash_zone(struct fn_zone *fz)
{ {
struct fib_node **ht, **old_ht; struct fib_node **ht, **old_ht;
...@@ -185,24 +215,30 @@ static void fn_rehash_zone(struct fn_zone *fz) ...@@ -185,24 +215,30 @@ static void fn_rehash_zone(struct fn_zone *fz)
switch (old_divisor) { switch (old_divisor) {
case 16: case 16:
new_divisor = 256; new_divisor = 256;
new_hashmask = 0xFF;
break; break;
case 256: case 256:
new_divisor = 1024; new_divisor = 1024;
new_hashmask = 0x3FF;
break; break;
default: default:
printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
return; printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
return;
}
new_divisor = (old_divisor << 1);
break;
} }
new_hashmask = (new_divisor - 1);
#if RT_CACHE_DEBUG >= 2 #if RT_CACHE_DEBUG >= 2
printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
#endif #endif
ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); ht = fz_hash_alloc(new_divisor);
if (ht) { if (ht) {
memset(ht, 0, new_divisor*sizeof(struct fib_node*)); memset(ht, 0, new_divisor*sizeof(struct fib_node*));
write_lock_bh(&fib_hash_lock); write_lock_bh(&fib_hash_lock);
old_ht = fz->fz_hash; old_ht = fz->fz_hash;
fz->fz_hash = ht; fz->fz_hash = ht;
...@@ -210,10 +246,10 @@ static void fn_rehash_zone(struct fn_zone *fz) ...@@ -210,10 +246,10 @@ static void fn_rehash_zone(struct fn_zone *fz)
fz->fz_divisor = new_divisor; fz->fz_divisor = new_divisor;
fn_rebuild_zone(fz, old_ht, old_divisor); fn_rebuild_zone(fz, old_ht, old_divisor);
write_unlock_bh(&fib_hash_lock); write_unlock_bh(&fib_hash_lock);
kfree(old_ht);
fz_hash_free(old_ht, old_divisor);
} }
} }
#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */
static void fn_free_node(struct fib_node * f) static void fn_free_node(struct fib_node * f)
{ {
...@@ -233,12 +269,11 @@ fn_new_zone(struct fn_hash *table, int z) ...@@ -233,12 +269,11 @@ fn_new_zone(struct fn_hash *table, int z)
memset(fz, 0, sizeof(struct fn_zone)); memset(fz, 0, sizeof(struct fn_zone));
if (z) { if (z) {
fz->fz_divisor = 16; fz->fz_divisor = 16;
fz->fz_hashmask = 0xF;
} else { } else {
fz->fz_divisor = 1; fz->fz_divisor = 1;
fz->fz_hashmask = 0;
} }
fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); fz->fz_hashmask = (fz->fz_divisor - 1);
fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
if (!fz->fz_hash) { if (!fz->fz_hash) {
kfree(fz); kfree(fz);
return NULL; return NULL;
...@@ -467,12 +502,10 @@ rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0); ...@@ -467,12 +502,10 @@ rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
if ((fi = fib_create_info(r, rta, n, &err)) == NULL) if ((fi = fib_create_info(r, rta, n, &err)) == NULL)
return err; return err;
#ifdef CONFIG_IP_ROUTE_LARGE_TABLES if (fz->fz_nent > (fz->fz_divisor<<1) &&
if (fz->fz_nent > (fz->fz_divisor<<2) &&
fz->fz_divisor < FZ_MAX_DIVISOR && fz->fz_divisor < FZ_MAX_DIVISOR &&
(z==32 || (1<<z) > fz->fz_divisor)) (z==32 || (1<<z) > fz->fz_divisor))
fn_rehash_zone(fz); fn_rehash_zone(fz);
#endif
fp = fz_chain_p(key, fz); fp = fz_chain_p(key, fz);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment