Commit fce1456a authored by Greg Banks's avatar Greg Banks Committed by Linus Torvalds

[PATCH] knfsd: make nfsd readahead params cache SMP-friendly

Make the nfsd read-ahead params cache more SMP-friendly by changing the single
global list and lock into a fixed 16-bucket hashtable with per-bucket locks.
This reduces spinlock contention in nfsd_read() on read-heavy workloads on
multiprocessor servers.

Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K
streaming reads at full line rate.  The server had 128 nfsd threads, which
sizes the RA cache at 256 entries, of which only a handful were used.  Flat
profiling shows nfsd_read(), including the inlined nfsd_get_raparms(), taking
10.4% of each CPU.  This patch drops the contribution from nfsd() to 1.71% for
each CPU.
Signed-off-by: default avatarGreg Banks <gnb@melbourne.sgi.com>
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 596bbe53
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <linux/nfsd_idmap.h> #include <linux/nfsd_idmap.h>
#include <linux/security.h> #include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */ #endif /* CONFIG_NFSD_V4 */
#include <linux/jhash.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -81,10 +82,19 @@ struct raparms { ...@@ -81,10 +82,19 @@ struct raparms {
dev_t p_dev; dev_t p_dev;
int p_set; int p_set;
struct file_ra_state p_ra; struct file_ra_state p_ra;
unsigned int p_hindex;
}; };
struct raparm_hbucket {
struct raparms *pb_head;
spinlock_t pb_lock;
} ____cacheline_aligned_in_smp;
static struct raparms * raparml; static struct raparms * raparml;
static struct raparms * raparm_cache; #define RAPARM_HASH_BITS 4
#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/* /*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed * Called from nfsd_lookup and encode_dirent. Check if we have crossed
...@@ -743,16 +753,20 @@ nfsd_sync_dir(struct dentry *dp) ...@@ -743,16 +753,20 @@ nfsd_sync_dir(struct dentry *dp)
* Obtain the readahead parameters for the file * Obtain the readahead parameters for the file
* specified by (dev, ino). * specified by (dev, ino).
*/ */
static DEFINE_SPINLOCK(ra_lock);
static inline struct raparms * static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino) nfsd_get_raparms(dev_t dev, ino_t ino)
{ {
struct raparms *ra, **rap, **frap = NULL; struct raparms *ra, **rap, **frap = NULL;
int depth = 0; int depth = 0;
unsigned int hash;
struct raparm_hbucket *rab;
hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
rab = &raparm_hash[hash];
spin_lock(&ra_lock); spin_lock(&rab->pb_lock);
for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev) if (ra->p_ino == ino && ra->p_dev == dev)
goto found; goto found;
depth++; depth++;
...@@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ...@@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
} }
depth = nfsdstats.ra_size*11/10; depth = nfsdstats.ra_size*11/10;
if (!frap) { if (!frap) {
spin_unlock(&ra_lock); spin_unlock(&rab->pb_lock);
return NULL; return NULL;
} }
rap = frap; rap = frap;
...@@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ...@@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
ra->p_dev = dev; ra->p_dev = dev;
ra->p_ino = ino; ra->p_ino = ino;
ra->p_set = 0; ra->p_set = 0;
ra->p_hindex = hash;
found: found:
if (rap != &raparm_cache) { if (rap != &rab->pb_head) {
*rap = ra->p_next; *rap = ra->p_next;
ra->p_next = raparm_cache; ra->p_next = rab->pb_head;
raparm_cache = ra; rab->pb_head = ra;
} }
ra->p_count++; ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
spin_unlock(&ra_lock); spin_unlock(&rab->pb_lock);
return ra; return ra;
} }
...@@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, ...@@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write back readahead params */ /* Write back readahead params */
if (ra) { if (ra) {
spin_lock(&ra_lock); struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra; ra->p_ra = file->f_ra;
ra->p_set = 1; ra->p_set = 1;
ra->p_count--; ra->p_count--;
spin_unlock(&ra_lock); spin_unlock(&rab->pb_lock);
} }
if (err >= 0) { if (err >= 0) {
...@@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) ...@@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
void void
nfsd_racache_shutdown(void) nfsd_racache_shutdown(void)
{ {
if (!raparm_cache) if (!raparml)
return; return;
dprintk("nfsd: freeing readahead buffers.\n"); dprintk("nfsd: freeing readahead buffers.\n");
kfree(raparml); kfree(raparml);
raparm_cache = raparml = NULL; raparml = NULL;
} }
/* /*
* Initialize readahead param cache * Initialize readahead param cache
...@@ -1846,19 +1862,31 @@ int ...@@ -1846,19 +1862,31 @@ int
nfsd_racache_init(int cache_size) nfsd_racache_init(int cache_size)
{ {
int i; int i;
int j = 0;
int nperbucket;
if (raparm_cache)
if (raparml)
return 0; return 0;
if (cache_size < 2*RAPARM_HASH_SIZE)
cache_size = 2*RAPARM_HASH_SIZE;
raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
if (raparml != NULL) { if (raparml != NULL) {
dprintk("nfsd: allocating %d readahead buffers.\n", dprintk("nfsd: allocating %d readahead buffers.\n",
cache_size); cache_size);
for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
raparm_hash[i].pb_head = NULL;
spin_lock_init(&raparm_hash[i].pb_lock);
}
nperbucket = cache_size >> RAPARM_HASH_BITS;
memset(raparml, 0, sizeof(struct raparms) * cache_size); memset(raparml, 0, sizeof(struct raparms) * cache_size);
for (i = 0; i < cache_size - 1; i++) { for (i = 0; i < cache_size - 1; i++) {
raparml[i].p_next = raparml + i + 1; if (i % nperbucket == 0)
raparm_hash[j++].pb_head = raparml + i;
if (i % nperbucket < nperbucket-1)
raparml[i].p_next = raparml + i + 1;
} }
raparm_cache = raparml;
} else { } else {
printk(KERN_WARNING printk(KERN_WARNING
"nfsd: Could not allocate memory read-ahead cache.\n"); "nfsd: Could not allocate memory read-ahead cache.\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment