Commit 0aaaf5c4 authored by Chuck Lever's avatar Chuck Lever Committed by Trond Myklebust

NFS: Cache state owners after files are closed

Servers have a finite amount of memory to store NFSv4 open and lock
owners.  Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification.  Thus clients should be careful to reuse
state owners when possible.

Currently Linux is not too careful.  When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released.  The next OPEN allocates a new one.  A
workload that serially opens and closes files can run through a large
number of open owners this way.

When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time.  Garbage collect before
looking for a state owner.  This makes state owners for active users
available for re-use.

Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed.  Also be
sure not to reclaim unused state owners during state recovery.

This change has benefits for the client as well.  For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one.  This reduces wire traffic
and thus open(2) latency.  Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test.  With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.

As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.

[ At some point we should rationalize the use of the nfs_server
->destroy method. ]
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 414adf14
...@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server) ...@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server)
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
} }
static void nfs4_destroy_server(struct nfs_server *server)
{
nfs4_purge_state_owners(server);
}
#else #else
static void nfs4_shutdown_client(struct nfs_client *clp) static void nfs4_shutdown_client(struct nfs_client *clp)
{ {
...@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void) ...@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations); INIT_LIST_HEAD(&server->delegations);
INIT_LIST_HEAD(&server->layouts); INIT_LIST_HEAD(&server->layouts);
INIT_LIST_HEAD(&server->state_owners_lru);
atomic_set(&server->active, 0); atomic_set(&server->active, 0);
...@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, ...@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
nfs_server_insert_lists(server); nfs_server_insert_lists(server);
server->mount_time = jiffies; server->mount_time = jiffies;
server->destroy = nfs4_destroy_server;
out: out:
nfs_free_fattr(fattr); nfs_free_fattr(fattr);
return error; return error;
...@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, ...@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
/* Copy data from the source */ /* Copy data from the source */
server->nfs_client = source->nfs_client; server->nfs_client = source->nfs_client;
server->destroy = source->destroy;
atomic_inc(&server->nfs_client->cl_count); atomic_inc(&server->nfs_client->cl_count);
nfs_server_copy_userdata(server, source); nfs_server_copy_userdata(server, source);
......
...@@ -94,6 +94,8 @@ struct nfs_unique_id { ...@@ -94,6 +94,8 @@ struct nfs_unique_id {
struct nfs4_state_owner { struct nfs4_state_owner {
struct nfs_unique_id so_owner_id; struct nfs_unique_id so_owner_id;
struct nfs_server *so_server; struct nfs_server *so_server;
struct list_head so_lru;
unsigned long so_expires;
struct rb_node so_server_node; struct rb_node so_server_node;
struct rpc_cred *so_cred; /* Associated cred */ struct rpc_cred *so_cred; /* Associated cred */
...@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) ...@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern void nfs4_purge_state_owners(struct nfs_server *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct nfs4_state *, fmode_t); extern void nfs4_close_state(struct nfs4_state *, fmode_t);
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/jiffies.h>
#include "nfs4_fs.h" #include "nfs4_fs.h"
#include "callback.h" #include "callback.h"
...@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) ...@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
else if (cred > sp->so_cred) else if (cred > sp->so_cred)
p = &parent->rb_right; p = &parent->rb_right;
else { else {
if (!list_empty(&sp->so_lru))
list_del_init(&sp->so_lru);
atomic_inc(&sp->so_count); atomic_inc(&sp->so_count);
return sp; return sp;
} }
...@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) ...@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
else if (new->so_cred > sp->so_cred) else if (new->so_cred > sp->so_cred)
p = &parent->rb_right; p = &parent->rb_right;
else { else {
if (!list_empty(&sp->so_lru))
list_del_init(&sp->so_lru);
atomic_inc(&sp->so_count); atomic_inc(&sp->so_count);
return sp; return sp;
} }
...@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void) ...@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void)
spin_lock_init(&sp->so_sequence.lock); spin_lock_init(&sp->so_sequence.lock);
INIT_LIST_HEAD(&sp->so_sequence.list); INIT_LIST_HEAD(&sp->so_sequence.list);
atomic_set(&sp->so_count, 1); atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
return sp; return sp;
} }
...@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) ...@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
} }
} }
static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
rpc_destroy_wait_queue(&sp->so_sequence.wait);
put_rpccred(sp->so_cred);
kfree(sp);
}
static void nfs4_gc_state_owners(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *tmp;
unsigned long time_min, time_max;
LIST_HEAD(doomed);
spin_lock(&clp->cl_lock);
time_max = jiffies;
time_min = (long)time_max - (long)clp->cl_lease_time;
list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
/* NB: LRU is sorted so that oldest is at the head */
if (time_in_range(sp->so_expires, time_min, time_max))
break;
list_move(&sp->so_lru, &doomed);
nfs4_remove_state_owner_locked(sp);
}
spin_unlock(&clp->cl_lock);
list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
list_del(&sp->so_lru);
nfs4_free_state_owner(sp);
}
}
/** /**
* nfs4_get_state_owner - Look up a state owner given a credential * nfs4_get_state_owner - Look up a state owner given a credential
* @server: nfs_server to search * @server: nfs_server to search
...@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, ...@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
sp = nfs4_find_state_owner_locked(server, cred); sp = nfs4_find_state_owner_locked(server, cred);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (sp != NULL) if (sp != NULL)
return sp; goto out;
new = nfs4_alloc_state_owner(); new = nfs4_alloc_state_owner();
if (new == NULL) if (new == NULL)
return NULL; goto out;
new->so_server = server; new->so_server = server;
new->so_cred = cred; new->so_cred = cred;
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
...@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, ...@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
rpc_destroy_wait_queue(&new->so_sequence.wait); rpc_destroy_wait_queue(&new->so_sequence.wait);
kfree(new); kfree(new);
} }
out:
nfs4_gc_state_owners(server);
return sp; return sp;
} }
/** /**
* nfs4_put_state_owner - Release a nfs4_state_owner * nfs4_put_state_owner - Release a nfs4_state_owner
* @sp: state owner data to release * @sp: state owner data to release
*
*/ */
void nfs4_put_state_owner(struct nfs4_state_owner *sp) void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{ {
struct nfs_client *clp = sp->so_server->nfs_client; struct nfs_server *server = sp->so_server;
struct rpc_cred *cred = sp->so_cred; struct nfs_client *clp = server->nfs_client;
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return; return;
nfs4_remove_state_owner_locked(sp);
if (!RB_EMPTY_NODE(&sp->so_server_node)) {
sp->so_expires = jiffies;
list_add_tail(&sp->so_lru, &server->state_owners_lru);
spin_unlock(&clp->cl_lock);
} else {
nfs4_remove_state_owner_locked(sp);
spin_unlock(&clp->cl_lock);
nfs4_free_state_owner(sp);
}
}
/**
* nfs4_purge_state_owners - Release all cached state owners
* @server: nfs_server with cached state owners to release
*
* Called at umount time. Remaining state owners will be on
* the LRU with ref count of zero.
*/
void nfs4_purge_state_owners(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *tmp;
LIST_HEAD(doomed);
spin_lock(&clp->cl_lock);
list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
list_move(&sp->so_lru, &doomed);
nfs4_remove_state_owner_locked(sp);
}
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
rpc_destroy_wait_queue(&sp->so_sequence.wait);
put_rpccred(cred); list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
kfree(sp); list_del(&sp->so_lru);
nfs4_free_state_owner(sp);
}
} }
static struct nfs4_state * static struct nfs4_state *
...@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov ...@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
restart: restart:
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
nfs4_purge_state_owners(server);
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
for (pos = rb_first(&server->state_owners); for (pos = rb_first(&server->state_owners);
pos != NULL; pos != NULL;
......
...@@ -153,6 +153,7 @@ struct nfs_server { ...@@ -153,6 +153,7 @@ struct nfs_server {
struct rb_root openowner_id; struct rb_root openowner_id;
struct rb_root lockowner_id; struct rb_root lockowner_id;
#endif #endif
struct list_head state_owners_lru;
struct list_head layouts; struct list_head layouts;
struct list_head delegations; struct list_head delegations;
void (*destroy)(struct nfs_server *); void (*destroy)(struct nfs_server *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment