Commit 4a0e73e6 authored by Chuck Lever's avatar Chuck Lever

NFSD: Leave open files out of the filecache LRU

There have been reports of problems when running fstests generic/531
against Linux NFS servers with NFSv4. The NFS server that hosts the
test's SCRATCH_DEV suffers from CPU soft lock-ups during the test.
Analysis shows that:

fs/nfsd/filecache.c
 482                 ret = list_lru_walk(&nfsd_file_lru,
 483                                 nfsd_file_lru_cb,
 484                                 &head, LONG_MAX);

causes nfsd_file_gc() to walk the entire length of the filecache LRU
list every time it is called (which is quite frequently). The walk
holds a spinlock the entire time that prevents other nfsd threads
from accessing the filecache.

What's more, for NFSv4 workloads, none of the items that are visited
during this walk may be evicted, since they are all files that are
held OPEN by NFS clients.

Address this by ensuring that open files are not kept on the LRU
list.
Reported-by: default avatarFrank van der Linden <fllinden@amazon.com>
Reported-by: default avatarWang Yugui <wangyugui@e16-tech.com>
Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=386Suggested-by: default avatarTrond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent c46203ac
...@@ -263,6 +263,7 @@ nfsd_file_flush(struct nfsd_file *nf) ...@@ -263,6 +263,7 @@ nfsd_file_flush(struct nfsd_file *nf)
static void nfsd_file_lru_add(struct nfsd_file *nf) static void nfsd_file_lru_add(struct nfsd_file *nf)
{ {
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
trace_nfsd_file_lru_add(nf); trace_nfsd_file_lru_add(nf);
} }
...@@ -292,7 +293,6 @@ nfsd_file_unhash(struct nfsd_file *nf) ...@@ -292,7 +293,6 @@ nfsd_file_unhash(struct nfsd_file *nf)
{ {
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
nfsd_file_do_unhash(nf); nfsd_file_do_unhash(nf);
nfsd_file_lru_remove(nf);
return true; return true;
} }
return false; return false;
...@@ -313,6 +313,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp ...@@ -313,6 +313,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp
if (refcount_dec_not_one(&nf->nf_ref)) if (refcount_dec_not_one(&nf->nf_ref))
return true; return true;
nfsd_file_lru_remove(nf);
list_add(&nf->nf_lru, dispose); list_add(&nf->nf_lru, dispose);
return true; return true;
} }
...@@ -324,6 +325,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) ...@@ -324,6 +325,7 @@ nfsd_file_put_noref(struct nfsd_file *nf)
if (refcount_dec_and_test(&nf->nf_ref)) { if (refcount_dec_and_test(&nf->nf_ref)) {
WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
nfsd_file_lru_remove(nf);
nfsd_file_free(nf); nfsd_file_free(nf);
} }
} }
...@@ -333,7 +335,7 @@ nfsd_file_put(struct nfsd_file *nf) ...@@ -333,7 +335,7 @@ nfsd_file_put(struct nfsd_file *nf)
{ {
might_sleep(); might_sleep();
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); nfsd_file_lru_add(nf);
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf); nfsd_file_flush(nf);
nfsd_file_put_noref(nf); nfsd_file_put_noref(nf);
...@@ -433,8 +435,18 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) ...@@ -433,8 +435,18 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
} }
} }
/* /**
* nfsd_file_lru_cb - Examine an entry on the LRU list
* @item: LRU entry to examine
* @lru: controlling LRU
* @lock: LRU list lock (unused)
* @arg: dispose list
*
* Note this can deadlock with nfsd_file_cache_purge. * Note this can deadlock with nfsd_file_cache_purge.
*
* Return values:
* %LRU_REMOVED: @item was removed from the LRU
* %LRU_SKIP: @item cannot be evicted
*/ */
static enum lru_status static enum lru_status
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
...@@ -456,8 +468,9 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, ...@@ -456,8 +468,9 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
* That order is deliberate to ensure that we can do this locklessly. * That order is deliberate to ensure that we can do this locklessly.
*/ */
if (refcount_read(&nf->nf_ref) > 1) { if (refcount_read(&nf->nf_ref) > 1) {
list_lru_isolate(lru, &nf->nf_lru);
trace_nfsd_file_gc_in_use(nf); trace_nfsd_file_gc_in_use(nf);
return LRU_SKIP; return LRU_REMOVED;
} }
/* /*
...@@ -1014,6 +1027,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1014,6 +1027,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto retry; goto retry;
} }
nfsd_file_lru_remove(nf);
this_cpu_inc(nfsd_file_cache_hits); this_cpu_inc(nfsd_file_cache_hits);
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
...@@ -1035,7 +1049,6 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1035,7 +1049,6 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
refcount_inc(&nf->nf_ref); refcount_inc(&nf->nf_ref);
__set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
__set_bit(NFSD_FILE_PENDING, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
nfsd_file_lru_add(nf);
hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
++nfsd_file_hashtbl[hashval].nfb_count; ++nfsd_file_hashtbl[hashval].nfb_count;
nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
...@@ -1060,6 +1073,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1060,6 +1073,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/ */
if (status != nfs_ok || inode->i_nlink == 0) { if (status != nfs_ok || inode->i_nlink == 0) {
bool do_free; bool do_free;
nfsd_file_lru_remove(nf);
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
do_free = nfsd_file_unhash(nf); do_free = nfsd_file_unhash(nf);
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
......
...@@ -927,7 +927,9 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \ ...@@ -927,7 +927,9 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \
TP_ARGS(nf)) TP_ARGS(nf))
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment