Commit e330572f authored by Brent Casavant's avatar Brent Casavant Committed by Linus Torvalds

[PATCH] filesystem hashes: NUMA interleaving

The following patch modifies the dentry cache and inode cache to enable the
use of vmalloc to alleviate boottime memory allocation imbalances on NUMA
systems, utilizing flags to the alloc_large_system_hash routine in order to
centralize the enabling of this behavior.

In general, for each hash, we check at the early allocation point whether
hash distribution is enabled, and if so we defer allocation.  At the late
allocation point we perform the allocation if it was not earlier deferred. 
These late allocation points are the same points utilized prior to the
addition of alloc_large_system_hash to the kernel.
Signed-off-by: default avatarBrent Casavant <bcasavan@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent dcee73c4
...@@ -1574,6 +1574,12 @@ static void __init dcache_init_early(void) ...@@ -1574,6 +1574,12 @@ static void __init dcache_init_early(void)
{ {
int loop; int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
dentry_hashtable = dentry_hashtable =
alloc_large_system_hash("Dentry cache", alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head), sizeof(struct hlist_head),
...@@ -1590,6 +1596,8 @@ static void __init dcache_init_early(void) ...@@ -1590,6 +1596,8 @@ static void __init dcache_init_early(void)
static void __init dcache_init(unsigned long mempages) static void __init dcache_init(unsigned long mempages)
{ {
int loop;
/* /*
* A constructor could be added for stable state like the lists, * A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature * but it is probably not worth it because of the cache nature
...@@ -1602,6 +1610,23 @@ static void __init dcache_init(unsigned long mempages) ...@@ -1602,6 +1610,23 @@ static void __init dcache_init(unsigned long mempages)
NULL, NULL); NULL, NULL);
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
return;
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head),
dhash_entries,
13,
0,
&d_hash_shift,
&d_hash_mask,
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
} }
/* SLAB cache for __getname() consumers */ /* SLAB cache for __getname() consumers */
......
...@@ -1328,6 +1328,12 @@ void __init inode_init_early(void) ...@@ -1328,6 +1328,12 @@ void __init inode_init_early(void)
{ {
int loop; int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
inode_hashtable = inode_hashtable =
alloc_large_system_hash("Inode-cache", alloc_large_system_hash("Inode-cache",
sizeof(struct hlist_head), sizeof(struct hlist_head),
...@@ -1344,10 +1350,29 @@ void __init inode_init_early(void) ...@@ -1344,10 +1350,29 @@ void __init inode_init_early(void)
void __init inode_init(unsigned long mempages) void __init inode_init(unsigned long mempages)
{ {
int loop;
/* inode slab cache */ /* inode slab cache */
inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
0, SLAB_PANIC, init_once, NULL); 0, SLAB_PANIC, init_once, NULL);
set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
return;
inode_hashtable =
alloc_large_system_hash("Inode-cache",
sizeof(struct hlist_head),
ihash_entries,
14,
0,
&i_hash_shift,
&i_hash_mask,
0);
for (loop = 0; loop < (1 << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
} }
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment