Commit eed75f19 authored by Harald Welte's avatar Harald Welte Committed by Arnaldo Carvalho de Melo

[NETFILTER] ip_conntrack: Make "hashsize" conntrack parameter writable

It's fairly simple to resize the hash table, but currently you need to
remove and reinsert the module.  That's bad (we lose connection
state).  Harald has even offered to write a daemon which sets this
based on load.
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarHarald Welte <laforge@netfilter.org>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@mandriva.com>
parent d50a6b56
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h> #include <linux/netfilter_ipv4/listhelp.h>
#define IP_CONNTRACK_VERSION "2.3" #define IP_CONNTRACK_VERSION "2.4"
#if 0 #if 0
#define DEBUGP printk #define DEBUGP printk
...@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); ...@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
static int ip_conntrack_hash_rnd_initted; static int ip_conntrack_hash_rnd_initted;
static unsigned int ip_conntrack_hash_rnd; static unsigned int ip_conntrack_hash_rnd;
static u_int32_t static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
hash_conntrack(const struct ip_conntrack_tuple *tuple) unsigned int size, unsigned int rnd)
{ {
#if 0
dump_tuple(tuple);
#endif
return (jhash_3words(tuple->src.ip, return (jhash_3words(tuple->src.ip,
(tuple->dst.ip ^ tuple->dst.protonum), (tuple->dst.ip ^ tuple->dst.protonum),
(tuple->src.u.all | (tuple->dst.u.all << 16)), (tuple->src.u.all | (tuple->dst.u.all << 16)),
ip_conntrack_hash_rnd) % ip_conntrack_htable_size); rnd) % size);
}
static u_int32_t
hash_conntrack(const struct ip_conntrack_tuple *tuple)
{
return __hash_conntrack(tuple, ip_conntrack_htable_size,
ip_conntrack_hash_rnd);
} }
int int
...@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data) ...@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
return 1; return 1;
} }
static void free_conntrack_hash(void) static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
{ {
if (ip_conntrack_vmalloc) if (vmalloced)
vfree(ip_conntrack_hash); vfree(hash);
else else
free_pages((unsigned long)ip_conntrack_hash, free_pages((unsigned long)hash,
get_order(sizeof(struct list_head) get_order(sizeof(struct list_head) * size));
* ip_conntrack_htable_size));
} }
void ip_conntrack_flush() void ip_conntrack_flush()
...@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void) ...@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
ip_conntrack_flush(); ip_conntrack_flush();
kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep);
free_conntrack_hash(); free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
ip_conntrack_htable_size);
nf_unregister_sockopt(&so_getorigdst); nf_unregister_sockopt(&so_getorigdst);
} }
static int hashsize; static struct list_head *alloc_hashtable(int size, int *vmalloced)
module_param(hashsize, int, 0400); {
struct list_head *hash;
unsigned int i;
*vmalloced = 0;
hash = (void*)__get_free_pages(GFP_KERNEL,
get_order(sizeof(struct list_head)
* size));
if (!hash) {
*vmalloced = 1;
printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
hash = vmalloc(sizeof(struct list_head) * size);
}
if (hash)
for (i = 0; i < size; i++)
INIT_LIST_HEAD(&hash[i]);
return hash;
}
int set_hashsize(const char *val, struct kernel_param *kp)
{
int i, bucket, hashsize, vmalloced;
int old_vmalloced, old_size;
int rnd;
struct list_head *hash, *old_hash;
struct ip_conntrack_tuple_hash *h;
/* On boot, we can set this without any fancy locking. */
if (!ip_conntrack_htable_size)
return param_set_int(val, kp);
hashsize = simple_strtol(val, NULL, 0);
if (!hashsize)
return -EINVAL;
hash = alloc_hashtable(hashsize, &vmalloced);
if (!hash)
return -ENOMEM;
/* We have to rehash for the new table anyway, so we also can
* use a new random seed */
get_random_bytes(&rnd, 4);
write_lock_bh(&ip_conntrack_lock);
for (i = 0; i < ip_conntrack_htable_size; i++) {
while (!list_empty(&ip_conntrack_hash[i])) {
h = list_entry(ip_conntrack_hash[i].next,
struct ip_conntrack_tuple_hash, list);
list_del(&h->list);
bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
list_add_tail(&h->list, &hash[bucket]);
}
}
old_size = ip_conntrack_htable_size;
old_vmalloced = ip_conntrack_vmalloc;
old_hash = ip_conntrack_hash;
ip_conntrack_htable_size = hashsize;
ip_conntrack_vmalloc = vmalloced;
ip_conntrack_hash = hash;
ip_conntrack_hash_rnd = rnd;
write_unlock_bh(&ip_conntrack_lock);
free_conntrack_hash(old_hash, old_vmalloced, old_size);
return 0;
}
module_param_call(hashsize, set_hashsize, param_get_uint,
&ip_conntrack_htable_size, 0600);
int __init ip_conntrack_init(void) int __init ip_conntrack_init(void)
{ {
...@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void) ...@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 256 buckets. >= 1GB machines have 8192 buckets. */ * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
if (hashsize) { if (!ip_conntrack_htable_size) {
ip_conntrack_htable_size = hashsize;
} else {
ip_conntrack_htable_size ip_conntrack_htable_size
= (((num_physpages << PAGE_SHIFT) / 16384) = (((num_physpages << PAGE_SHIFT) / 16384)
/ sizeof(struct list_head)); / sizeof(struct list_head));
...@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void) ...@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
return ret; return ret;
} }
/* AK: the hash table is twice as big than needed because it ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
uses list_head. it would be much nicer to caches to use a &ip_conntrack_vmalloc);
single pointer list head here. */
ip_conntrack_vmalloc = 0;
ip_conntrack_hash
=(void*)__get_free_pages(GFP_KERNEL,
get_order(sizeof(struct list_head)
*ip_conntrack_htable_size));
if (!ip_conntrack_hash) {
ip_conntrack_vmalloc = 1;
printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
ip_conntrack_hash = vmalloc(sizeof(struct list_head)
* ip_conntrack_htable_size);
}
if (!ip_conntrack_hash) { if (!ip_conntrack_hash) {
printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
goto err_unreg_sockopt; goto err_unreg_sockopt;
...@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void) ...@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
write_unlock_bh(&ip_conntrack_lock); write_unlock_bh(&ip_conntrack_lock);
for (i = 0; i < ip_conntrack_htable_size; i++)
INIT_LIST_HEAD(&ip_conntrack_hash[i]);
/* For use by ipt_REJECT */ /* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach; ip_ct_attach = ip_conntrack_attach;
...@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void) ...@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
err_free_conntrack_slab: err_free_conntrack_slab:
kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_cachep);
err_free_hash: err_free_hash:
free_conntrack_hash(); free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
ip_conntrack_htable_size);
err_unreg_sockopt: err_unreg_sockopt:
nf_unregister_sockopt(&so_getorigdst); nf_unregister_sockopt(&so_getorigdst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment