Commit fb3f7887 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Christoph Hellwig

[XFS] Under heavy load, there are not enough hash buckets to deal with

the number of metadata buffers. Use the same techniques as the
regular linux buffer cache here.

use more hash buckets for holding xfs metadata, and use the same
hash algorithm as the regular buffer cache.

SGI Modid: 2.5.x-xfs:slinx:139997a
parent 8af79dc1
......@@ -1009,13 +1009,13 @@ init_xfs_fs( void )
if (error < 0)
return error;
si_meminfo(&si);
xfs_physmem = si.totalram;
error = pagebuf_init();
if (error < 0)
goto out;
si_meminfo(&si);
xfs_physmem = si.totalram;
vn_init();
xfs_init();
dmapi_init();
......
......@@ -155,35 +155,37 @@ struct pbstats pbstats;
* Pagebuf hashing
*/
#define NBITS 5
#define NHASH (1<<NBITS)
/* This structure must be a power of 2 long for the hash to work */
typedef struct {
struct list_head pb_hash;
int pb_count;
spinlock_t pb_hash_lock;
} pb_hash_t;
STATIC pb_hash_t pbhash[NHASH];
static pb_hash_t *pbhash;
static unsigned int pb_hash_mask;
static unsigned int pb_hash_shift;
static unsigned int pb_order;
#define pb_hash(pb) &pbhash[pb->pb_hash_index]
STATIC int
/*
* This hash is the same one as used on the Linux buffer cache,
* see fs/buffer.c
*/
#define _hashfn(dev,block) \
((((dev)<<(pb_hash_shift - 6)) ^ ((dev)<<(pb_hash_shift - 9))) ^ \
(((block)<<(pb_hash_shift - 6)) ^ ((block) >> 13) ^ \
((block) << (pb_hash_shift - 12))))
static inline int
_bhash(
dev_t dev,
loff_t base)
{
int bit, hval;
base >>= 9;
/*
* dev_t is 16 bits, loff_t is always 64 bits
*/
base ^= dev;
for (bit = hval = 0; base != 0 && bit < sizeof(base) * 8; bit += NBITS) {
hval ^= (int)base & (NHASH-1);
base >>= NBITS;
}
return hval;
return (_hashfn(dev, base) & pb_hash_mask);
}
/*
......@@ -1862,7 +1864,39 @@ pagebuf_shaker(void)
int __init
pagebuf_init(void)
{
int i;
int order, mempages, i;
unsigned int nr_hash;
extern int xfs_physmem;
mempages = xfs_physmem >>= 16;
mempages *= sizeof(pb_hash_t);
for (order = 0; (1 << order) < mempages; order++)
;
if (order > 3) order = 3; /* cap us at 2K buckets */
do {
unsigned long tmp;
nr_hash = (PAGE_SIZE << order) / sizeof(pb_hash_t);
nr_hash = 1 << (ffs(nr_hash) - 1);
pb_hash_mask = (nr_hash - 1);
tmp = nr_hash;
pb_hash_shift = 0;
while((tmp >>= 1UL) != 0UL)
pb_hash_shift++;
pbhash = (pb_hash_t *)
__get_free_pages(GFP_KERNEL, order);
pb_order = order;
} while (pbhash == NULL && --order > 0);
printk("pagebuf cache hash table entries: %d (order: %d, %ld bytes)\n",
nr_hash, order, (PAGE_SIZE << order));
for(i = 0; i < nr_hash; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1);
......@@ -1880,11 +1914,6 @@ pagebuf_init(void)
return -ENOMEM;
}
for (i = 0; i < NHASH; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
#ifdef PAGEBUF_TRACE
pb_trace.buf = (pagebuf_trace_t *)kmalloc(
PB_TRACE_BUFSIZE * sizeof(pagebuf_trace_t), GFP_KERNEL);
......@@ -1911,6 +1940,7 @@ pagebuf_terminate(void)
kmem_cache_destroy(pagebuf_cache);
kmem_shake_deregister(pagebuf_shaker);
free_pages((unsigned long)pbhash, pb_order);
unregister_sysctl_table(pagebuf_table_header);
#ifdef CONFIG_PROC_FS
......
......@@ -215,8 +215,8 @@ typedef struct page_buf_s {
unsigned short pb_error; /* error code on I/O */
unsigned short pb_page_count; /* size of page array */
unsigned short pb_offset; /* page offset in first page */
unsigned short pb_hash_index; /* hash table index */
unsigned char pb_locked; /* page array is locked */
unsigned char pb_hash_index; /* hash table index */
struct page **pb_pages; /* array of page pointers */
struct page *pb_page_array[PB_PAGES]; /* inline pages */
#ifdef PAGEBUF_LOCK_TRACKING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment