Commit d47b295e authored by Chuck Lever's avatar Chuck Lever

NFSD: Use rhashtable for managing nfs4_file objects

fh_match() is costly, especially when filehandles are large (as is
the case for NFSv4). It needs to be used sparingly when searching
data structures. Unfortunately, with common workloads, I see
multiple thousands of objects stored in file_hashtbl[], which has
just 256 buckets, making its bucket hash chains quite lengthy.

Walking long hash chains with the state_lock held blocks other
activity that needs that lock. Sizable hash chains are a common
occurrance once the server has handed out some delegations, for
example -- IIUC, each delegated file is held open on the server by
an nfs4_file object.

To help mitigate the cost of searching with fh_match(), replace the
nfs4_file hash table with an rhashtable, which can dynamically
resize its bucket array to minimize hash chain length.

The result of this modification is an improvement in the latency of
NFSv4 operations, and the reduction of nfsd CPU utilization due to
eliminating the cost of multiple calls to fh_match() and reducing
the CPU cache misses incurred while walking long hash chains in the
nfs4_file hash table.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Reviewed-by: default avatarNeilBrown <neilb@suse.de>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
parent 15424748
...@@ -44,7 +44,9 @@ ...@@ -44,7 +44,9 @@
#include <linux/jhash.h> #include <linux/jhash.h>
#include <linux/string_helpers.h> #include <linux/string_helpers.h>
#include <linux/fsnotify.h> #include <linux/fsnotify.h>
#include <linux/rhashtable.h>
#include <linux/nfs_ssc.h> #include <linux/nfs_ssc.h>
#include "xdr4.h" #include "xdr4.h"
#include "xdr4cb.h" #include "xdr4cb.h"
#include "vfs.h" #include "vfs.h"
...@@ -589,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) ...@@ -589,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
void void
put_nfs4_file(struct nfs4_file *fi) put_nfs4_file(struct nfs4_file *fi)
{ {
might_lock(&state_lock); if (refcount_dec_and_test(&fi->fi_ref)) {
if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
nfsd4_file_hash_remove(fi); nfsd4_file_hash_remove(fi);
spin_unlock(&state_lock);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
...@@ -718,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) ...@@ -718,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
return ret & OWNER_HASH_MASK; return ret & OWNER_HASH_MASK;
} }
/* hash table for nfs4_file */ static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
#define FILE_HASH_BITS 8
#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
static unsigned int file_hashval(const struct svc_fh *fh) static const struct rhashtable_params nfs4_file_rhash_params = {
{ .key_len = sizeof_field(struct nfs4_file, fi_inode),
struct inode *inode = d_inode(fh->fh_dentry); .key_offset = offsetof(struct nfs4_file, fi_inode),
.head_offset = offsetof(struct nfs4_file, fi_rlist),
/* XXX: why not (here & in file cache) use inode? */ /*
return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); * Start with a single page hash table to reduce resizing churn
} * on light workloads.
*/
static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; .min_size = 256,
.automatic_shrinking = true,
};
/* /*
* Check if courtesy clients have conflicting access and resolve it if possible * Check if courtesy clients have conflicting access and resolve it if possible
...@@ -4685,12 +4685,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) ...@@ -4685,12 +4685,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
static noinline_for_stack struct nfs4_file * static noinline_for_stack struct nfs4_file *
nfsd4_file_hash_lookup(const struct svc_fh *fhp) nfsd4_file_hash_lookup(const struct svc_fh *fhp)
{ {
unsigned int hashval = file_hashval(fhp); struct inode *inode = d_inode(fhp->fh_dentry);
struct rhlist_head *tmp, *list;
struct nfs4_file *fi; struct nfs4_file *fi;
rcu_read_lock(); rcu_read_lock();
hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, list = rhltable_lookup(&nfs4_file_rhltable, &inode,
lockdep_is_held(&state_lock)) { nfs4_file_rhash_params);
rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
if (refcount_inc_not_zero(&fi->fi_ref)) { if (refcount_inc_not_zero(&fi->fi_ref)) {
rcu_read_unlock(); rcu_read_unlock();
...@@ -4704,40 +4706,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp) ...@@ -4704,40 +4706,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp)
/* /*
* On hash insertion, identify entries with the same inode but * On hash insertion, identify entries with the same inode but
* distinct filehandles. They will all be in the same hash bucket * distinct filehandles. They will all be on the list returned
* because nfs4_file's are hashed by the address in the fi_inode * by rhltable_lookup().
* field. *
* inode->i_lock prevents racing insertions from adding an entry
* for the same inode/fhp pair twice.
*/ */
static noinline_for_stack struct nfs4_file * static noinline_for_stack struct nfs4_file *
nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
{ {
unsigned int hashval = file_hashval(fhp); struct inode *inode = d_inode(fhp->fh_dentry);
struct rhlist_head *tmp, *list;
struct nfs4_file *ret = NULL; struct nfs4_file *ret = NULL;
bool alias_found = false; bool alias_found = false;
struct nfs4_file *fi; struct nfs4_file *fi;
int err;
spin_lock(&state_lock); rcu_read_lock();
hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, spin_lock(&inode->i_lock);
lockdep_is_held(&state_lock)) {
list = rhltable_lookup(&nfs4_file_rhltable, &inode,
nfs4_file_rhash_params);
rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
if (refcount_inc_not_zero(&fi->fi_ref)) if (refcount_inc_not_zero(&fi->fi_ref))
ret = fi; ret = fi;
} else if (d_inode(fhp->fh_dentry) == fi->fi_inode) } else
fi->fi_aliased = alias_found = true; fi->fi_aliased = alias_found = true;
} }
if (likely(ret == NULL)) { if (ret)
goto out_unlock;
nfsd4_file_init(fhp, new); nfsd4_file_init(fhp, new);
hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
nfs4_file_rhash_params);
if (err)
goto out_unlock;
new->fi_aliased = alias_found; new->fi_aliased = alias_found;
ret = new; ret = new;
}
spin_unlock(&state_lock); out_unlock:
spin_unlock(&inode->i_lock);
rcu_read_unlock();
return ret; return ret;
} }
static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
{ {
hlist_del_rcu(&fi->fi_hash); rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
nfs4_file_rhash_params);
} }
/* /*
...@@ -5628,6 +5646,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf ...@@ -5628,6 +5646,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* If not found, create the nfs4_file struct * If not found, create the nfs4_file struct
*/ */
fp = nfsd4_file_hash_insert(open->op_file, current_fh); fp = nfsd4_file_hash_insert(open->op_file, current_fh);
if (unlikely(!fp))
return nfserr_jukebox;
if (fp != open->op_file) { if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp); status = nfs4_check_deleg(cl, open, &dp);
if (status) if (status)
...@@ -8042,10 +8062,16 @@ nfs4_state_start(void) ...@@ -8042,10 +8062,16 @@ nfs4_state_start(void)
{ {
int ret; int ret;
ret = nfsd4_create_callback_queue(); ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
if (ret) if (ret)
return ret; return ret;
ret = nfsd4_create_callback_queue();
if (ret) {
rhltable_destroy(&nfs4_file_rhltable);
return ret;
}
set_max_delegations(); set_max_delegations();
return 0; return 0;
} }
...@@ -8076,6 +8102,7 @@ nfs4_state_shutdown_net(struct net *net) ...@@ -8076,6 +8102,7 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net); nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net); nfs4_state_destroy_net(net);
rhltable_destroy(&nfs4_file_rhltable);
#ifdef CONFIG_NFSD_V4_2_INTER_SSC #ifdef CONFIG_NFSD_V4_2_INTER_SSC
nfsd4_ssc_shutdown_umount(nn); nfsd4_ssc_shutdown_umount(nn);
#endif #endif
......
...@@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { ...@@ -536,16 +536,13 @@ struct nfs4_clnt_odstate {
* inode can have multiple filehandles associated with it, so there is * inode can have multiple filehandles associated with it, so there is
* (potentially) a many to one relationship between this struct and struct * (potentially) a many to one relationship between this struct and struct
* inode. * inode.
*
* These are hashed by filehandle in the file_hashtbl, which is protected by
* the global state_lock spinlock.
*/ */
struct nfs4_file { struct nfs4_file {
refcount_t fi_ref; refcount_t fi_ref;
struct inode * fi_inode; struct inode * fi_inode;
bool fi_aliased; bool fi_aliased;
spinlock_t fi_lock; spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash on fi_fhandle */ struct rhlist_head fi_rlist;
struct list_head fi_stateids; struct list_head fi_stateids;
union { union {
struct list_head fi_delegations; struct list_head fi_delegations;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment