Commit 7453596a authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Limit hashtable sizes

From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>

The issue of exceedingly large hash tables has been discussed on the
mailing list a while back, but seems to slip through the cracks.

What we found is it's not a problem for x86 (and most other
architectures) because __get_free_pages won't be able to get anything
beyond order MAX_ORDER-1 (10) which means at most those hash tables are
4MB each (assume 4K page size).  However, on ia64, in order to support
larger hugeTLB page size, the MAX_ORDER is bumped up to 18, which now
means a 2GB upper limits enforced by the page allocator (assume 16K page
size).  PPC64 is another example that bumps up MAX_ORDER.

Last time I checked, the tcp ehash table is taking a whooping (insane!)
2GB on one of our large machine.  dentry and inode hash tables also take
considerable amount of memory.

Setting the size of these tables is difficult: they need to be constrained on
many-zone ia64 machines, but this could cause significant performance
problems when there are (for example) 100 million dentries in cache.
Large-memory machines which do not slice that memory up into huge numbers of
zones do not need to run the risk of this slowdown.

So the sizing algorithms remain essentially unchanged, and boot-time options
are provided which permit the tables to be scaled down.
parent 86c1b9ae
...@@ -293,6 +293,9 @@ running once the system is up. ...@@ -293,6 +293,9 @@ running once the system is up.
devfs= [DEVFS] devfs= [DEVFS]
See Documentation/filesystems/devfs/boot-options. See Documentation/filesystems/devfs/boot-options.
dhash_entries= [KNL]
Set number of hash buckets for dentry cache.
digi= [HW,SERIAL] digi= [HW,SERIAL]
IO parameters + enable/disable command. IO parameters + enable/disable command.
...@@ -441,6 +444,9 @@ running once the system is up. ...@@ -441,6 +444,9 @@ running once the system is up.
idle= [HW] idle= [HW]
Format: idle=poll or idle=halt Format: idle=poll or idle=halt
ihash_entries= [KNL]
Set number of hash buckets for inode cache.
in2000= [HW,SCSI] in2000= [HW,SCSI]
See header of drivers/scsi/in2000.c. See header of drivers/scsi/in2000.c.
...@@ -890,6 +896,9 @@ running once the system is up. ...@@ -890,6 +896,9 @@ running once the system is up.
resume= [SWSUSP] Specify the partition device for software suspension resume= [SWSUSP] Specify the partition device for software suspension
rhash_entries= [KNL,NET]
Set number of hash buckets for route cache
riscom8= [HW,SERIAL] riscom8= [HW,SERIAL]
Format: <io_board1>[,<io_board2>[,...<io_boardN>]] Format: <io_board1>[,<io_board2>[,...<io_boardN>]]
...@@ -1152,6 +1161,9 @@ running once the system is up. ...@@ -1152,6 +1161,9 @@ running once the system is up.
tgfx_2= See Documentation/input/joystick-parport.txt. tgfx_2= See Documentation/input/joystick-parport.txt.
tgfx_3= tgfx_3=
thash_entries= [KNL,NET]
Set number of hash buckets for TCP connection
tipar= [HW] tipar= [HW]
See header of drivers/char/tipar.c. See header of drivers/char/tipar.c.
......
...@@ -1531,6 +1531,16 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name) ...@@ -1531,6 +1531,16 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
return ino; return ino;
} }
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
if (!str)
return 0;
dhash_entries = simple_strtoul(str, &str, 0);
return 1;
}
__setup("dhash_entries=", set_dhash_entries);
static void __init dcache_init(unsigned long mempages) static void __init dcache_init(unsigned long mempages)
{ {
struct hlist_head *d; struct hlist_head *d;
...@@ -1556,11 +1566,13 @@ static void __init dcache_init(unsigned long mempages) ...@@ -1556,11 +1566,13 @@ static void __init dcache_init(unsigned long mempages)
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
#if PAGE_SHIFT < 13 if (!dhash_entries)
mempages >>= (13 - PAGE_SHIFT); dhash_entries = PAGE_SHIFT < 13 ?
#endif mempages >> (13 - PAGE_SHIFT) :
mempages *= sizeof(struct hlist_head); mempages << (PAGE_SHIFT - 13);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
dhash_entries *= sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
; ;
do { do {
......
...@@ -1327,6 +1327,16 @@ void wake_up_inode(struct inode *inode) ...@@ -1327,6 +1327,16 @@ void wake_up_inode(struct inode *inode)
wake_up_all(wq); wake_up_all(wq);
} }
static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
if (!str)
return 0;
ihash_entries = simple_strtoul(str, &str, 0);
return 1;
}
__setup("ihash_entries=", set_ihash_entries);
/* /*
* Initialize the waitqueues and inode hash table. * Initialize the waitqueues and inode hash table.
*/ */
...@@ -1340,9 +1350,13 @@ void __init inode_init(unsigned long mempages) ...@@ -1340,9 +1350,13 @@ void __init inode_init(unsigned long mempages)
for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++) for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
init_waitqueue_head(&i_wait_queue_heads[i].wqh); init_waitqueue_head(&i_wait_queue_heads[i].wqh);
mempages >>= (14 - PAGE_SHIFT); if (!ihash_entries)
mempages *= sizeof(struct hlist_head); ihash_entries = PAGE_SHIFT < 14 ?
for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++) mempages >> (14 - PAGE_SHIFT) :
mempages << (PAGE_SHIFT - 14);
ihash_entries *= sizeof(struct hlist_head);
for (order = 0; ((1UL << order) << PAGE_SHIFT) < ihash_entries; order++)
; ;
do { do {
......
...@@ -2717,6 +2717,16 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, ...@@ -2717,6 +2717,16 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
#endif /* CONFIG_NET_CLS_ROUTE */ #endif /* CONFIG_NET_CLS_ROUTE */
static __initdata unsigned long rhash_entries;
static int __init set_rhash_entries(char *str)
{
if (!str)
return 0;
rhash_entries = simple_strtoul(str, &str, 0);
return 1;
}
__setup("rhash_entries=", set_rhash_entries);
int __init ip_rt_init(void) int __init ip_rt_init(void)
{ {
int i, order, goal, rc = 0; int i, order, goal, rc = 0;
...@@ -2743,7 +2753,10 @@ int __init ip_rt_init(void) ...@@ -2743,7 +2753,10 @@ int __init ip_rt_init(void)
panic("IP: failed to allocate ip_dst_cache\n"); panic("IP: failed to allocate ip_dst_cache\n");
goal = num_physpages >> (26 - PAGE_SHIFT); goal = num_physpages >> (26 - PAGE_SHIFT);
if (!rhash_entries)
goal = min(10, goal);
else
goal = (rhash_entries * sizeof(struct rt_hash_bucket)) >> PAGE_SHIFT;
for (order = 0; (1UL << order) < goal; order++) for (order = 0; (1UL << order) < goal; order++)
/* NOTHING */; /* NOTHING */;
......
...@@ -2570,6 +2570,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, ...@@ -2570,6 +2570,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
extern void __skb_cb_too_small_for_tcp(int, int); extern void __skb_cb_too_small_for_tcp(int, int);
extern void tcpdiag_init(void); extern void tcpdiag_init(void);
static __initdata unsigned long thash_entries;
static int __init set_thash_entries(char *str)
{
if (!str)
return 0;
thash_entries = simple_strtoul(str, &str, 0);
return 1;
}
__setup("thash_entries=", set_thash_entries);
void __init tcp_init(void) void __init tcp_init(void)
{ {
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
...@@ -2611,6 +2621,10 @@ void __init tcp_init(void) ...@@ -2611,6 +2621,10 @@ void __init tcp_init(void)
else else
goal = num_physpages >> (23 - PAGE_SHIFT); goal = num_physpages >> (23 - PAGE_SHIFT);
if (!thash_entries)
goal = min(10UL, goal);
else
goal = (thash_entries * sizeof(struct tcp_ehash_bucket)) >> PAGE_SHIFT;
for (order = 0; (1UL << order) < goal; order++) for (order = 0; (1UL << order) < goal; order++)
; ;
do { do {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment