Commit 6d29d7fe authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
 "We introduce 'courteous server' in this release. Previously NFSD would
  purge open and lock state for an unresponsive client after one lease
  period (typically 90 seconds). Now, after one lease period, another
  client can open and lock those files and the unresponsive client's
  lease is purged; otherwise if the unresponsive client's open and lock
  state is uncontended, the server retains that open and lock state for
  up to 24 hours, allowing the client's workload to resume after a
  lengthy network partition.

  A longstanding issue with NFSv4 file creation is also addressed.
  Previously a file creation can fail internally, returning an error to
  the client, but leave the newly created file in place as an artifact.
  The file creation code path has been reorganized so that internal
  failures and race conditions are less likely to result in an unwanted
  file creation.

  A fault injector has been added to help exercise paths that are run
  during kernel metadata cache invalidation. These caches contain
  information maintained by user space about exported filesystems. Many
  of our test workloads do not trigger cache invalidation.

  There is one patch that is needed to support PREEMPT_RT and a fix for
  an ancient 'sleep while spin-locked' splat that seems to have become
  easier to hit since v5.18-rc3"

* tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (36 commits)
  NFSD: nfsd_file_put() can sleep
  NFSD: Add documenting comment for nfsd4_release_lockowner()
  NFSD: Modernize nfsd4_release_lockowner()
  NFSD: Fix possible sleep during nfsd4_release_lockowner()
  nfsd: destroy percpu stats counters after reply cache shutdown
  nfsd: Fix null-ptr-deref in nfsd_fill_super()
  nfsd: Unregister the cld notifier when laundry_wq create failed
  SUNRPC: Use RMW bitops in single-threaded hot paths
  NFSD: Clean up the show_nf_flags() macro
  NFSD: Trace filecache opens
  NFSD: Move documenting comment for nfsd4_process_open2()
  NFSD: Fix whitespace
  NFSD: Remove dprintk call sites from tail of nfsd4_open()
  NFSD: Instantiate a struct file when creating a regular NFSv4 file
  NFSD: Clean up nfsd_open_verified()
  NFSD: Remove do_nfsd_create()
  NFSD: Refactor NFSv4 OPEN(CREATE)
  NFSD: Refactor NFSv3 CREATE
  NFSD: Refactor nfsd_create_setattr()
  NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
  ...
parents 7f50d4df 08af54b3
......@@ -434,6 +434,8 @@ prototypes::
void (*lm_break)(struct file_lock *); /* break_lease callback */
int (*lm_change)(struct file_lock **, int);
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
locking rules:
......@@ -445,6 +447,8 @@ lm_grant: no no no
lm_break: yes no no
lm_change yes no no
lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
====================== ============= ================= =========
buffer_head
......
......@@ -300,6 +300,34 @@ void locks_release_private(struct file_lock *fl)
}
EXPORT_SYMBOL_GPL(locks_release_private);
/**
* locks_owner_has_blockers - Check for blocking lock requests
* @flctx: file lock context
* @owner: lock owner
*
* Return values:
* %true: @owner has at least one blocker
* %false: @owner has no blockers
*/
bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner)
{
struct file_lock *fl;
spin_lock(&flctx->flc_lock);
list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
if (fl->fl_owner != owner)
continue;
if (!list_empty(&fl->fl_blocked_requests)) {
spin_unlock(&flctx->flc_lock);
return true;
}
}
spin_unlock(&flctx->flc_lock);
return false;
}
EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
......@@ -874,6 +902,8 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
struct file_lock *cfl;
struct file_lock_context *ctx;
struct inode *inode = locks_inode(filp);
void *owner;
void (*func)(void);
ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
......@@ -881,12 +911,23 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
return;
}
retry:
spin_lock(&ctx->flc_lock);
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
if (posix_locks_conflict(fl, cfl)) {
locks_copy_conflock(fl, cfl);
goto out;
if (!posix_locks_conflict(fl, cfl))
continue;
if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
owner = cfl->fl_lmops->lm_mod_owner;
func = cfl->fl_lmops->lm_expire_lock;
__module_get(owner);
spin_unlock(&ctx->flc_lock);
(*func)();
module_put(owner);
goto retry;
}
locks_copy_conflock(fl, cfl);
goto out;
}
fl->fl_type = F_UNLCK;
out:
......@@ -1060,6 +1101,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
int error;
bool added = false;
LIST_HEAD(dispose);
void *owner;
void (*func)(void);
ctx = locks_get_lock_context(inode, request->fl_type);
if (!ctx)
......@@ -1078,6 +1121,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
new_fl2 = locks_alloc_lock();
}
retry:
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
......@@ -1089,6 +1133,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
if (!posix_locks_conflict(request, fl))
continue;
if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
owner = fl->fl_lmops->lm_mod_owner;
func = fl->fl_lmops->lm_expire_lock;
__module_get(owner);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
(*func)();
module_put(owner);
goto retry;
}
if (conflock)
locks_copy_conflock(conflock, fl);
error = -EAGAIN;
......
......@@ -303,6 +303,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
void
nfsd_file_put(struct nfsd_file *nf)
{
might_sleep();
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
......@@ -899,9 +901,9 @@ nfsd_file_is_cached(struct inode *inode)
return ret;
}
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
static __be32
nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf, bool open)
{
__be32 status;
struct net *net = SVC_NET(rqstp);
......@@ -996,10 +998,14 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_file_gc();
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
status = nfsd_open_verified(rqstp, fhp, S_IFREG,
may_flags, &nf->nf_file);
else
if (nf->nf_mark) {
if (open) {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
} else
status = nfs_ok;
} else
status = nfserr_jukebox;
/*
* If construction failed, or we raced with a call to unlink()
......@@ -1019,6 +1025,40 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
/**
* nfsd_file_acquire - Get a struct nfsd_file with an open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file to be opened
* @may_flags: NFSD_MAY_ settings for the file
* @pnf: OUT: new or found "struct nfsd_file" object
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
}
/**
* nfsd_file_create - Get a struct nfsd_file, do not open
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
* @pnf: OUT: new or found "struct nfsd_file" object
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
}
/*
* Note that fields may be added, removed or reordered in the future. Programs
* scraping this file for info should test the labels to ensure they're
......
......@@ -59,5 +59,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
int nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */
......@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/magic.h>
#include <linux/namei.h>
#include "cache.h"
#include "xdr3.h"
......@@ -220,17 +221,132 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
}
/*
* With NFSv3, CREATE processing is a lot easier than with NFSv2.
* At least in theory; we'll see how it fares in practice when the
* first reports about SunOS compatibility problems start to pour in...
* Implement NFSv3's unchecked, guarded, and exclusive CREATE
* semantics for regular files. Except for the created file,
* this operation is stateless on the server.
*
* Upon return, caller must release @fhp and @resfhp.
*/
static __be32
nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct nfsd3_createargs *argp)
{
struct iattr *iap = &argp->attrs;
struct dentry *parent, *child;
__u32 v_mtime, v_atime;
struct inode *inode;
__be32 status;
int host_err;
if (isdotent(argp->name, argp->len))
return nfserr_exist;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (status != nfs_ok)
return status;
parent = fhp->fh_dentry;
inode = d_inode(parent);
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
fh_lock_nested(fhp, I_MUTEX_PARENT);
child = lookup_one_len(argp->name, parent, argp->len);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
}
if (d_really_is_negative(child)) {
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (status != nfs_ok)
goto out;
}
status = fh_compose(resfhp, fhp->fh_export, child, fhp);
if (status != nfs_ok)
goto out;
v_mtime = 0;
v_atime = 0;
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
u32 *verifier = (u32 *)argp->verf;
/*
* Solaris 7 gets confused (bugid 4218508) if these have
* the high bit set, as do xfs filesystems without the
* "bigtime" feature. So just clear the high bits.
*/
v_mtime = verifier[0] & 0x7fffffff;
v_atime = verifier[1] & 0x7fffffff;
}
if (d_really_is_positive(child)) {
status = nfs_ok;
switch (argp->createmode) {
case NFS3_CREATE_UNCHECKED:
if (!d_is_reg(child))
break;
iap->ia_valid &= ATTR_SIZE;
goto set_attr;
case NFS3_CREATE_GUARDED:
status = nfserr_exist;
break;
case NFS3_CREATE_EXCLUSIVE:
if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
d_inode(child)->i_atime.tv_sec == v_atime &&
d_inode(child)->i_size == 0) {
break;
}
status = nfserr_exist;
}
goto out;
}
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
goto out;
}
/* A newly created file already has a file size of zero. */
if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
iap->ia_valid &= ~ATTR_SIZE;
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
ATTR_MTIME_SET | ATTR_ATIME_SET;
iap->ia_mtime.tv_sec = v_mtime;
iap->ia_atime.tv_sec = v_atime;
iap->ia_mtime.tv_nsec = 0;
iap->ia_atime.tv_nsec = 0;
}
set_attr:
status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
out:
fh_unlock(fhp);
if (child && !IS_ERR(child))
dput(child);
fh_drop_write(fhp);
return status;
}
static __be32
nfsd3_proc_create(struct svc_rqst *rqstp)
{
struct nfsd3_createargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
svc_fh *dirfhp, *newfhp = NULL;
struct iattr *attr;
svc_fh *dirfhp, *newfhp;
dprintk("nfsd: CREATE(3) %s %.*s\n",
SVCFH_fmt(&argp->fh),
......@@ -239,21 +355,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
attr = &argp->attrs;
/* Unfudge the mode bits */
attr->ia_mode &= ~S_IFMT;
if (!(attr->ia_valid & ATTR_MODE)) {
attr->ia_valid |= ATTR_MODE;
attr->ia_mode = S_IFREG;
} else {
attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
}
/* Now create the file and set attributes */
resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
attr, newfhp, argp->createmode,
(u32 *)argp->verf, NULL, NULL);
resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
return rpc_success;
}
......
......@@ -37,6 +37,8 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/namei.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_ssc.h>
......@@ -235,6 +237,183 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
&resfh->fh_handle);
}
static inline bool nfsd4_create_is_exclusive(int createmode)
{
return createmode == NFS4_CREATE_EXCLUSIVE ||
createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
static __be32
nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
struct nfsd4_open *open)
{
struct file *filp;
struct path path;
int oflags;
oflags = O_CREAT | O_LARGEFILE;
switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
case NFS4_SHARE_ACCESS_WRITE:
oflags |= O_WRONLY;
break;
case NFS4_SHARE_ACCESS_BOTH:
oflags |= O_RDWR;
break;
default:
oflags |= O_RDONLY;
}
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = child;
filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
current_cred());
if (IS_ERR(filp))
return nfserrno(PTR_ERR(filp));
open->op_filp = filp;
return nfs_ok;
}
/*
* Implement NFSv4's unchecked, guarded, and exclusive create
* semantics for regular files. Open state for this new file is
* subsequently fabricated in nfsd4_process_open2().
*
* Upon return, caller must release @fhp and @resfhp.
*/
static __be32
nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct nfsd4_open *open)
{
struct iattr *iap = &open->op_iattr;
struct dentry *parent, *child;
__u32 v_mtime, v_atime;
struct inode *inode;
__be32 status;
int host_err;
if (isdotent(open->op_fname, open->op_fnamelen))
return nfserr_exist;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (status != nfs_ok)
return status;
parent = fhp->fh_dentry;
inode = d_inode(parent);
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
fh_lock_nested(fhp, I_MUTEX_PARENT);
child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
}
if (d_really_is_negative(child)) {
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (status != nfs_ok)
goto out;
}
status = fh_compose(resfhp, fhp->fh_export, child, fhp);
if (status != nfs_ok)
goto out;
v_mtime = 0;
v_atime = 0;
if (nfsd4_create_is_exclusive(open->op_createmode)) {
u32 *verifier = (u32 *)open->op_verf.data;
/*
* Solaris 7 gets confused (bugid 4218508) if these have
* the high bit set, as do xfs filesystems without the
* "bigtime" feature. So just clear the high bits. If this
* is ever changed to use different attrs for storing the
* verifier, then do_open_lookup() will also need to be
* fixed accordingly.
*/
v_mtime = verifier[0] & 0x7fffffff;
v_atime = verifier[1] & 0x7fffffff;
}
if (d_really_is_positive(child)) {
status = nfs_ok;
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
if (!d_is_reg(child))
break;
/*
* In NFSv4, we don't want to truncate the file
* now. This would be wrong if the OPEN fails for
* some other reason. Furthermore, if the size is
* nonzero, we should ignore it according to spec!
*/
open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
!iap->ia_size;
break;
case NFS4_CREATE_GUARDED:
status = nfserr_exist;
break;
case NFS4_CREATE_EXCLUSIVE:
if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
d_inode(child)->i_atime.tv_sec == v_atime &&
d_inode(child)->i_size == 0) {
open->op_created = true;
break; /* subtle */
}
status = nfserr_exist;
break;
case NFS4_CREATE_EXCLUSIVE4_1:
if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
d_inode(child)->i_atime.tv_sec == v_atime &&
d_inode(child)->i_size == 0) {
open->op_created = true;
goto set_attr; /* subtle */
}
status = nfserr_exist;
}
goto out;
}
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
status = nfsd4_vfs_create(fhp, child, open);
if (status != nfs_ok)
goto out;
open->op_created = true;
/* A newly created file already has a file size of zero. */
if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
iap->ia_valid &= ~ATTR_SIZE;
if (nfsd4_create_is_exclusive(open->op_createmode)) {
iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
ATTR_MTIME_SET|ATTR_ATIME_SET;
iap->ia_mtime.tv_sec = v_mtime;
iap->ia_atime.tv_sec = v_atime;
iap->ia_mtime.tv_nsec = 0;
iap->ia_atime.tv_nsec = 0;
}
set_attr:
status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
out:
fh_unlock(fhp);
if (child && !IS_ERR(child))
dput(child);
fh_drop_write(fhp);
return status;
}
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
......@@ -264,16 +443,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* yes | yes | GUARDED4 | GUARDED4
*/
/*
* Note: create modes (UNCHECKED,GUARDED...) are the same
* in NFSv4 as in v3 except EXCLUSIVE4_1.
*/
current->fs->umask = open->op_umask;
status = do_nfsd_create(rqstp, current_fh, open->op_fname,
open->op_fnamelen, &open->op_iattr,
*resfh, open->op_createmode,
(u32 *)open->op_verf.data,
&open->op_truncate, &open->op_created);
status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
current->fs->umask = 0;
if (!status && open->op_label.len)
......@@ -284,7 +455,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* use the returned bitmask to indicate which attributes
* we used to store the verifier:
*/
if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
FATTR4_WORD1_TIME_MODIFY);
} else
......@@ -375,6 +546,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
(int)open->op_fnamelen, open->op_fname,
open->op_openowner);
open->op_filp = NULL;
/* This check required by spec. */
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
......@@ -427,43 +600,35 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_NULL:
status = do_open_lookup(rqstp, cstate, open, &resfh);
if (status)
goto out;
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
status = nfs4_check_open_reclaim(cstate->clp);
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
reclaim = true;
fallthrough;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
if (status)
goto out;
resfh = &cstate->current_fh;
break;
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
dprintk("NFSD: unsupported OPEN claim type %d\n",
open->op_claim_type);
status = nfserr_notsupp;
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_NULL:
status = do_open_lookup(rqstp, cstate, open, &resfh);
if (status)
goto out;
default:
dprintk("NFSD: Invalid OPEN claim type %d\n",
open->op_claim_type);
status = nfserr_inval;
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
status = nfs4_check_open_reclaim(cstate->clp);
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
reclaim = true;
fallthrough;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
if (status)
goto out;
resfh = &cstate->current_fh;
break;
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
status = nfserr_notsupp;
goto out;
default:
status = nfserr_inval;
goto out;
}
/*
* nfsd4_process_open2() does the actual opening of the file. If
* successful, it (1) truncates the file if open->op_truncate was
* set, (2) sets open->op_stateid, (3) sets open->op_delegation.
*/
status = nfsd4_process_open2(rqstp, resfh, open);
WARN(status && open->op_created,
"nfsd4_process_open2 failed to open newly-created file! status=%u\n",
......@@ -471,6 +636,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (reclaim && !status)
nn->somebody_reclaimed = true;
out:
if (open->op_filp) {
fput(open->op_filp);
open->op_filp = NULL;
}
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
fh_put(resfh);
......@@ -801,7 +970,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
__clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
......@@ -2481,11 +2650,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
cstate->minorversion = args->minorversion;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
__clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
......@@ -2600,7 +2770,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
out:
cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
__set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}
......
This diff is collapsed.
......@@ -2411,7 +2411,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
__clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
return true;
}
......
......@@ -206,7 +206,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
struct svc_cacherep *rp;
unsigned int i;
nfsd_reply_cache_stats_destroy(nn);
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
for (i = 0; i < nn->drc_hashsize; i++) {
......@@ -217,6 +216,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
......
......@@ -1535,20 +1535,25 @@ static int __init init_nfsd(void)
retval = create_proc_exports_entry();
if (retval)
goto out_free_lockd;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
goto out_free_exports;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
goto out_free_filesystem;
goto out_free_exports;
retval = register_cld_notifier();
if (retval)
goto out_free_subsys;
retval = nfsd4_create_laundry_wq();
if (retval)
goto out_free_cld;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
goto out_free_all;
return 0;
out_free_all:
nfsd4_destroy_laundry_wq();
out_free_cld:
unregister_cld_notifier();
out_free_subsys:
unregister_pernet_subsys(&nfsd_net_ops);
out_free_filesystem:
unregister_filesystem(&nfsd_fs_type);
out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
......@@ -1566,6 +1571,8 @@ static int __init init_nfsd(void)
static void __exit exit_nfsd(void)
{
unregister_filesystem(&nfsd_fs_type);
nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
......@@ -1575,7 +1582,6 @@ static void __exit exit_nfsd(void)
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
unregister_filesystem(&nfsd_fs_type);
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
......
......@@ -162,6 +162,8 @@ void nfs4_state_shutdown_net(struct net *net);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
int nfsd4_create_laundry_wq(void);
void nfsd4_destroy_laundry_wq(void);
#else
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
......@@ -175,6 +177,8 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
return false;
}
static inline int nfsd4_create_laundry_wq(void) { return 0; };
static inline void nfsd4_destroy_laundry_wq(void) {};
#endif
/*
......@@ -336,6 +340,7 @@ void nfsd_lockd_shutdown(void);
#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
/*
* The following attributes are currently not supported by the NFSv4 server:
......
......@@ -149,6 +149,7 @@ struct nfs4_delegation {
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
};
#define cb_to_delegation(cb) \
......@@ -282,6 +283,28 @@ struct nfsd4_sessionid {
#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
/*
* State Meaning Where set
* --------------------------------------------------------------------------
* | NFSD4_ACTIVE | Confirmed, active | Default |
* |------------------- ----------------------------------------------------|
* | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist |
* | | Lease/lock/share | |
* | | reservation conflict | |
* | | can cause Courtesy | |
* | | client to be expired | |
* |------------------------------------------------------------------------|
* | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat |
* | | expired by Laundromat| try_to_expire_client |
* | | due to conflict | |
* |------------------------------------------------------------------------|
*/
enum {
NFSD4_ACTIVE = 0,
NFSD4_COURTESY,
NFSD4_EXPIRABLE,
};
/*
* struct nfs4_client - one per client. Clientids live here.
*
......@@ -385,6 +408,9 @@ struct nfs4_client {
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
atomic_t cl_cb_inflight; /* Outstanding callbacks */
unsigned int cl_state;
atomic_t cl_delegs_in_recall;
};
/* struct nfs4_client_reset
......@@ -702,4 +728,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn);
static inline bool try_to_expire_client(struct nfs4_client *clp)
{
cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
return clp->cl_state == NFSD4_EXPIRABLE;
}
#endif /* NFSD4_STATE_H */
......@@ -692,12 +692,6 @@ DEFINE_CLID_EVENT(confirmed_r);
/*
* from fs/nfsd/filecache.h
*/
TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
......@@ -784,6 +778,34 @@ TRACE_EVENT(nfsd_file_acquire,
__entry->nf_file, __entry->status)
);
TRACE_EVENT(nfsd_file_open,
TP_PROTO(struct nfsd_file *nf, __be32 status),
TP_ARGS(nf, status),
TP_STRUCT__entry(
__field(unsigned int, nf_hashval)
__field(void *, nf_inode) /* cannot be dereferenced */
__field(int, nf_ref)
__field(unsigned long, nf_flags)
__field(unsigned long, nf_may)
__field(void *, nf_file) /* cannot be dereferenced */
),
TP_fast_assign(
__entry->nf_hashval = nf->nf_hashval;
__entry->nf_inode = nf->nf_inode;
__entry->nf_ref = refcount_read(&nf->nf_ref);
__entry->nf_flags = nf->nf_flags;
__entry->nf_may = nf->nf_may;
__entry->nf_file = nf->nf_file;
),
TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
__entry->nf_hashval,
__entry->nf_inode,
__entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nfsd_may_flags(__entry->nf_may),
__entry->nf_file)
)
DECLARE_EVENT_CLASS(nfsd_file_search_class,
TP_PROTO(struct inode *inode, unsigned int hash, int found),
TP_ARGS(inode, hash, found),
......
......@@ -827,14 +827,23 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
return err;
}
/**
* nfsd_open_verified - Open a regular file for the filecache
* @rqstp: RPC request
* @fhp: NFS filehandle of the file to open
* @may_flags: internal permission flags
* @filp: OUT: open "struct file *"
*
* Returns an nfsstat value in network byte order.
*/
__be32
nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
struct file **filp)
{
__be32 err;
validate_process_creds();
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
validate_process_creds();
return err;
}
......@@ -849,17 +858,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
struct page **pp = rqstp->rq_next_page;
struct page *page = buf->page;
if (rqstp->rq_res.page_len == 0) {
svc_rqst_replace_page(rqstp, page);
svc_rqst_replace_page(rqstp, buf->page);
if (rqstp->rq_res.page_len == 0)
rqstp->rq_res.page_base = buf->offset;
} else if (page != pp[-1]) {
svc_rqst_replace_page(rqstp, page);
}
rqstp->rq_res.page_len += sd->len;
return sd->len;
}
......@@ -1187,14 +1190,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
return err;
}
static __be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
struct iattr *iap)
/**
* nfsd_create_setattr - Set a created file's attributes
* @rqstp: RPC transaction being executed
* @fhp: NFS filehandle of parent directory
* @resfhp: NFS filehandle of new object
* @iap: requested attributes of new object
*
* Returns nfs_ok on success, or an nfsstat in network byte order.
*/
__be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct iattr *iap)
{
__be32 status;
/*
* Mode has already been set earlier in create:
* Mode has already been set by file creation.
*/
iap->ia_valid &= ~ATTR_MODE;
/*
* Setting uid/gid works only for root. Irix appears to
* send along the gid on create when it tries to implement
......@@ -1202,10 +1217,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
*/
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
/*
* Callers expect new file metadata to be committed even
* if the attributes have not changed.
*/
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
/* Callers expect file metadata to be committed here */
return nfserrno(commit_metadata(resfhp));
status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
else
status = nfserrno(commit_metadata(resfhp));
/*
* Transactional filesystems had a chance to commit changes
* for both parent and child simultaneously making the
* following commit_metadata a noop in many cases.
*/
if (!status)
status = nfserrno(commit_metadata(fhp));
/*
* Update the new filehandle to pick up the new attributes.
*/
if (!status)
status = fh_update(resfhp);
return status;
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
......@@ -1232,7 +1268,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild;
struct inode *dirp;
__be32 err;
__be32 err2;
int host_err;
dentry = fhp->fh_dentry;
......@@ -1305,22 +1340,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err < 0)
goto out_nfserr;
err = nfsd_create_setattr(rqstp, resfhp, iap);
err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
/*
* nfsd_create_setattr already committed the child. Transactional
* filesystems had a chance to commit changes for both parent and
* child simultaneously making the following commit_metadata a
* noop.
*/
err2 = nfserrno(commit_metadata(fhp));
if (err2)
err = err2;
/*
* Update the file handle to get the new inode info.
*/
if (!err)
err = fh_update(resfhp);
out:
dput(dchild);
return err;
......@@ -1375,172 +1396,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
rdev, resfhp);
}
/*
* NFSv3 and NFSv4 version of nfsd_create
*/
__be32
do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
struct svc_fh *resfhp, int createmode, u32 *verifier,
bool *truncp, bool *created)
{
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
int host_err;
__u32 v_mtime=0, v_atime=0;
err = nfserr_perm;
if (!flen)
goto out;
err = nfserr_exist;
if (isdotent(fname, flen))
goto out;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (err)
goto out;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
host_err = fh_want_write(fhp);
if (host_err)
goto out_nfserr;
fh_lock_nested(fhp, I_MUTEX_PARENT);
/*
* Compose the response file handle.
*/
dchild = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
goto out_nfserr;
/* If file doesn't exist, check for permissions to create one */
if (d_really_is_negative(dchild)) {
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
}
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err)
goto out;
if (nfsd_create_is_exclusive(createmode)) {
/* solaris7 gets confused (bugid 4218508) if these have
* the high bit set, as do xfs filesystems without the
* "bigtime" feature. So just clear the high bits. If this is
* ever changed to use different attrs for storing the
* verifier, then do_open_lookup() will also need to be fixed
* accordingly.
*/
v_mtime = verifier[0]&0x7fffffff;
v_atime = verifier[1]&0x7fffffff;
}
if (d_really_is_positive(dchild)) {
err = 0;
switch (createmode) {
case NFS3_CREATE_UNCHECKED:
if (! d_is_reg(dchild))
goto out;
else if (truncp) {
/* in nfsv4, we need to treat this case a little
* differently. we don't want to truncate the
* file now; this would be wrong if the OPEN
* fails for some other reason. furthermore,
* if the size is nonzero, we should ignore it
* according to spec!
*/
*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
}
else {
iap->ia_valid &= ATTR_SIZE;
goto set_attr;
}
break;
case NFS3_CREATE_EXCLUSIVE:
if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
*created = true;
break;
}
fallthrough;
case NFS4_CREATE_EXCLUSIVE4_1:
if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
*created = true;
goto set_attr;
}
fallthrough;
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
fh_drop_write(fhp);
goto out;
}
if (!IS_POSIXACL(dirp))
iap->ia_mode &= ~current_umask();
host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
if (host_err < 0) {
fh_drop_write(fhp);
goto out_nfserr;
}
if (created)
*created = true;
nfsd_check_ignore_resizing(iap);
if (nfsd_create_is_exclusive(createmode)) {
/* Cram the verifier into atime/mtime */
iap->ia_valid = ATTR_MTIME|ATTR_ATIME
| ATTR_MTIME_SET|ATTR_ATIME_SET;
/* XXX someone who knows this better please fix it for nsec */
iap->ia_mtime.tv_sec = v_mtime;
iap->ia_atime.tv_sec = v_atime;
iap->ia_mtime.tv_nsec = 0;
iap->ia_atime.tv_nsec = 0;
}
set_attr:
err = nfsd_create_setattr(rqstp, resfhp, iap);
/*
* nfsd_create_setattr already committed the child
* (and possibly also the parent).
*/
if (!err)
err = nfserrno(commit_metadata(fhp));
/*
* Update the filehandle to get the new inode info.
*/
if (!err)
err = fh_update(resfhp);
out:
fh_unlock(fhp);
if (dchild && !IS_ERR(dchild))
dput(dchild);
fh_drop_write(fhp);
return err;
out_nfserr:
err = nfserrno(host_err);
goto out;
}
/*
* Read a symlink. On entry, *lenp must contain the maximum path length that
* fits into the buffer. On return, it contains the true length.
......
......@@ -69,10 +69,8 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct iattr *iap);
__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
u64 offset, u32 count, __be32 *verf);
#ifdef CONFIG_NFSD_V4
......@@ -88,7 +86,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *,
int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
......@@ -159,10 +157,4 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
AT_STATX_SYNC_AS_STAT));
}
static inline int nfsd_create_is_exclusive(int createmode)
{
return createmode == NFS3_CREATE_EXCLUSIVE
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
#endif /* LINUX_NFSD_VFS_H */
......@@ -273,6 +273,7 @@ struct nfsd4_open {
bool op_truncate; /* used during processing */
bool op_created; /* used during processing */
struct nfs4_openowner *op_openowner; /* used during processing */
struct file *op_filp; /* used during processing */
struct nfs4_file *op_file; /* used during processing */
struct nfs4_ol_stateid *op_stp; /* used during processing */
struct nfs4_clnt_odstate *op_odstate; /* used during processing */
......
......@@ -980,6 +980,48 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
/**
* dentry_create - Create and open a file
* @path: path to create
* @flags: O_ flags
* @mode: mode bits for new file
* @cred: credentials to use
*
* Caller must hold the parent directory's lock, and have prepared
* a negative dentry, placed in @path->dentry, for the new file.
*
* Caller sets @path->mnt to the vfsmount of the filesystem where
* the new file is to be created. The parent directory and the
* negative dentry must reside on the same filesystem instance.
*
* On success, returns a "struct file *". Otherwise a ERR_PTR
* is returned.
*/
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred)
{
struct file *f;
int error;
validate_creds(cred);
f = alloc_empty_file(flags, cred);
if (IS_ERR(f))
return f;
error = vfs_create(mnt_user_ns(path->mnt),
d_inode(path->dentry->d_parent),
path->dentry, mode, true);
if (!error)
error = vfs_open(path, f);
if (unlikely(error)) {
fput(f);
return ERR_PTR(error);
}
return f;
}
EXPORT_SYMBOL(dentry_create);
struct file *open_with_fake_path(const struct path *path, int flags,
struct inode *inode, const struct cred *cred)
{
......
......@@ -1022,6 +1022,7 @@ struct file_lock_operations {
};
struct lock_manager_operations {
void *lm_mod_owner;
fl_owner_t (*lm_get_owner)(fl_owner_t);
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
......@@ -1030,6 +1031,8 @@ struct lock_manager_operations {
int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **);
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *cfl);
void (*lm_expire_lock)(void);
};
struct lock_manager {
......@@ -1167,6 +1170,8 @@ extern void lease_unregister_notifier(struct notifier_block *);
struct files_struct;
extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner);
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
......@@ -1302,6 +1307,11 @@ static inline int lease_modify(struct file_lock *fl, int arg,
struct files_struct;
static inline void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files) {}
static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner)
{
return false;
}
#endif /* !CONFIG_FILE_LOCKING */
static inline struct inode *file_inode(const struct file *f)
......@@ -2630,6 +2640,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
name, flags, mode);
}
extern struct file * dentry_open(const struct path *, int, const struct cred *);
extern struct file *dentry_create(const struct path *path, int flags,
umode_t mode, const struct cred *cred);
extern struct file * open_with_fake_path(const struct path *, int,
struct inode*, const struct cred *);
static inline struct file *file_clone_open(struct file *file)
......
......@@ -121,17 +121,17 @@ struct cache_detail {
struct net *net;
};
/* this must be embedded in any request structure that
* identifies an object that will want a callback on
* a cache fill
*/
struct cache_req {
struct cache_deferred_req *(*defer)(struct cache_req *req);
int thread_wait; /* How long (jiffies) we can block the
* current thread to wait for updates.
*/
unsigned long thread_wait; /* How long (jiffies) we can block the
* current thread to wait for updates.
*/
};
/* this must be embedded in a deferred_request that is being
* delayed awaiting cache-fill
*/
......
......@@ -257,7 +257,6 @@ struct svc_rqst {
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
struct xdr_stream rq_arg_stream;
struct xdr_stream rq_res_stream;
......@@ -397,7 +396,6 @@ struct svc_deferred_req {
size_t daddrlen;
void *xprt_ctxt;
struct cache_deferred_req handle;
size_t xprt_hlen;
int argslen;
__be32 args[];
};
......@@ -506,7 +504,7 @@ int svc_register(const struct svc_serv *, struct net *, const int,
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
const char * svc_proc_name(const struct svc_rqst *rqstp);
int svc_encode_result_payload(struct svc_rqst *rqstp,
......
......@@ -2015,19 +2015,17 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
TP_STRUCT__entry(
__field(const void *, dr)
__field(u32, xid)
__array(__u8, addr, INET6_ADDRSTRLEN + 10)
__sockaddr(addr, dr->addrlen)
),
TP_fast_assign(
__entry->dr = dr;
__entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
(dr->xprt_hlen>>2)));
snprintf(__entry->addr, sizeof(__entry->addr) - 1,
"%pISpc", (struct sockaddr *)&dr->addr);
__entry->xid = be32_to_cpu(*(__be32 *)dr->args);
__assign_sockaddr(addr, &dr->addr, dr->addrlen);
),
TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr,
__entry->xid)
TP_printk("addr=%pISpc dr=%p xid=0x%08x", __get_sockaddr(addr),
__entry->dr, __entry->xid)
);
#define DEFINE_SVC_DEFERRED_EVENT(name) \
......
......@@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
__clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
......@@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
int pad, remaining_len, offset;
u32 rseqno;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
__clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
......
......@@ -33,7 +33,9 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <trace/events/sunrpc.h>
#include "netns.h"
#include "fail.h"
#define RPCDBG_FACILITY RPCDBG_CACHE
......@@ -688,16 +690,30 @@ static void cache_limit_defers(void)
discard->revisit(discard, 1);
}
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
static inline bool cache_defer_immediately(void)
{
return !fail_sunrpc.ignore_cache_wait &&
should_fail(&fail_sunrpc.attr, 1);
}
#else
static inline bool cache_defer_immediately(void)
{
return false;
}
#endif
/* Return true if and only if a deferred request is queued. */
static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq;
if (req->thread_wait) {
if (!cache_defer_immediately()) {
cache_wait_req(req, item);
if (!test_bit(CACHE_PENDING, &item->flags))
return false;
}
dreq = req->defer(req);
if (dreq == NULL)
return false;
......
......@@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_server_disconnect);
debugfs_create_bool("ignore-cache-wait", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_cache_wait);
}
#else
static void fail_sunrpc_init(void)
......
......@@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
struct fault_attr attr;
bool ignore_client_disconnect;
bool ignore_server_disconnect;
bool ignore_cache_wait;
};
extern struct fail_sunrpc_attr fail_sunrpc;
......
......@@ -356,15 +356,21 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
}
}
/*
* Use the mapping mode to choose a pool for a given CPU.
* Used when enqueueing an incoming RPC. Always returns
* a non-NULL pool pointer.
/**
* svc_pool_for_cpu - Select pool to run a thread on this cpu
* @serv: An RPC service
*
* Use the active CPU and the svc_pool_map's mode setting to
* select the svc thread pool to use. Once initialized, the
* svc_pool_map does not change.
*
* Return value:
* A pointer to an svc_pool
*/
struct svc_pool *
svc_pool_for_cpu(struct svc_serv *serv, int cpu)
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
{
struct svc_pool_map *m = &svc_pool_map;
int cpu = raw_smp_processor_id();
unsigned int pidx = 0;
if (serv->sv_nrpools <= 1)
......@@ -1238,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off by GSS integrity and privacy services */
set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
__set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
__set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
__clear_bit(RQ_DROPME, &rqstp->rq_flags);
svc_putu32(resv, rqstp->rq_xid);
......
......@@ -448,7 +448,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
int cpu;
if (!svc_xprt_ready(xprt))
return;
......@@ -461,8 +460,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
pool = svc_pool_for_cpu(xprt->xpt_server);
atomic_long_inc(&pool->sp_stats.packets);
......@@ -485,7 +483,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
rqstp = NULL;
out_unlock:
rcu_read_unlock();
put_cpu();
trace_svc_xprt_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
......@@ -1230,7 +1227,6 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
dr->xprt_hlen = rqstp->rq_xprt_hlen;
dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
rqstp->rq_xprt_ctxt = NULL;
......@@ -1242,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
set_bit(RQ_DROPME, &rqstp->rq_flags);
__set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
......@@ -1258,22 +1254,21 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
trace_svc_defer_recv(dr);
/* setup iov_base past transport header */
rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
rqstp->rq_arg.head[0].iov_base = dr->args;
/* The iov_len does not include the transport header bytes */
rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
rqstp->rq_arg.head[0].iov_len = dr->argslen << 2;
rqstp->rq_arg.page_len = 0;
/* The rq_arg.len includes the transport header bytes */
rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_arg.len = dr->argslen << 2;
rqstp->rq_prot = dr->prot;
memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
rqstp->rq_addrlen = dr->addrlen;
/* Save off transport header len in case we get deferred again */
rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
svc_xprt_received(rqstp->rq_xprt);
return (dr->argslen<<2) - dr->xprt_hlen;
return dr->argslen << 2;
}
......
......@@ -117,15 +117,6 @@ static void svc_reclassify_socket(struct socket *sock)
*/
static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
rqstp->rq_xprt_ctxt = NULL;
skb_free_datagram_locked(svsk->sk_sk, skb);
}
}
/**
......@@ -259,8 +250,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
ssize_t len;
size_t t;
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
......@@ -309,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
set_bit(RQ_SECURE, &rqstp->rq_flags);
__set_bit(RQ_SECURE, &rqstp->rq_flags);
else
clear_bit(RQ_SECURE, &rqstp->rq_flags);
__clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
......@@ -1019,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
set_bit(RQ_LOCAL, &rqstp->rq_flags);
__set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
clear_bit(RQ_LOCAL, &rqstp->rq_flags);
__clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
......
......@@ -831,7 +831,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_err;
if (ret == 0)
goto out_drop;
rqstp->rq_xprt_hlen = 0;
if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
goto out_backchannel;
......
......@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
set_bit(RQ_SECURE, &rqstp->rq_flags);
__set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment