Commit ea8ea737 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "Highlights include:

  Features:
   - Add support for the NFS v4.2 COPY operation
   - Add support for NFS/RDMA over IPv6

  Bugfixes and cleanups:
   - Avoid race that crashes nfs_init_commit()
   - Fix oops in callback path
   - Fix LOCK/OPEN race when unlinking an open file
   - Choose correct stateids when using delegations in setattr, read and
     write
   - Don't send empty SETATTR after OPEN_CREATE
   - xprtrdma: Prevent server from writing a reply into memory client
     has released
   - xprtrdma: Support using Read list and Reply chunk in one RPC call"

* tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (61 commits)
  pnfs: pnfs_update_layout needs to consider if strict iomode checking is on
  nfs/flexfiles: Use the layout segment for reading unless it a IOMODE_RW and reading is disabled
  nfs/flexfiles: Helper function to detect FF_FLAGS_NO_READ_IO
  nfs: avoid race that crashes nfs_init_commit
  NFS: checking for NULL instead of IS_ERR() in nfs_commit_file()
  pnfs: make pnfs_layout_process more robust
  pnfs: rework LAYOUTGET retry handling
  pnfs: lift retry logic from send_layoutget to pnfs_update_layout
  pnfs: fix bad error handling in send_layoutget
  flexfiles: add kerneldoc header to nfs4_ff_layout_prepare_ds
  flexfiles: remove pointless setting of NFS_LAYOUT_RETURN_REQUESTED
  pnfs: only tear down lsegs that precede seqid in LAYOUTRETURN args
  pnfs: keep track of the return sequence number in pnfs_layout_hdr
  pnfs: record sequence in pnfs_layout_segment when it's created
  pnfs: don't merge new ff lsegs with ones that have LAYOUTRETURN bit set
  pNFS/flexfiles: When initing reads or writes, we might have to retry connecting to DSes
  pNFS/flexfiles: When checking for available DSes, conditionally check for MDS io
  pNFS/flexfile: Fix erroneous fall back to read/write through the MDS
  NFS: Reclaim writes via writepage are opportunistic
  NFSv4: Use the right stateid for delegations in setattr, read and write
  ...
parents 0b9210c9 c7d73af2
...@@ -217,7 +217,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, ...@@ -217,7 +217,8 @@ static u32 initiate_file_draining(struct nfs_client *clp,
} }
if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range)) { &args->cbl_range,
be32_to_cpu(args->cbl_stateid.seqid))) {
rv = NFS4_OK; rv = NFS4_OK;
goto unlock; goto unlock;
} }
...@@ -500,8 +501,10 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, ...@@ -500,8 +501,10 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
cps->slot = slot; cps->slot = slot;
/* The ca_maxresponsesize_cached is 0 with no DRC */ /* The ca_maxresponsesize_cached is 0 with no DRC */
if (args->csa_cachethis != 0) if (args->csa_cachethis != 0) {
return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); status = htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
goto out_unlock;
}
/* /*
* Check for pending referring calls. If a match is found, a * Check for pending referring calls. If a match is found, a
......
...@@ -146,10 +146,16 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) ...@@ -146,10 +146,16 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
p = read_buf(xdr, NFS4_STATEID_SIZE); p = read_buf(xdr, NFS4_STATEID_SIZE);
if (unlikely(p == NULL)) if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE); return htonl(NFS4ERR_RESOURCE);
memcpy(stateid, p, NFS4_STATEID_SIZE); memcpy(stateid->data, p, NFS4_STATEID_SIZE);
return 0; return 0;
} }
static __be32 decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_DELEGATION_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
{ {
__be32 *p; __be32 *p;
...@@ -211,7 +217,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, ...@@ -211,7 +217,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
__be32 *p; __be32 *p;
__be32 status; __be32 status;
status = decode_stateid(xdr, &args->stateid); status = decode_delegation_stateid(xdr, &args->stateid);
if (unlikely(status != 0)) if (unlikely(status != 0))
goto out; goto out;
p = read_buf(xdr, 4); p = read_buf(xdr, 4);
...@@ -227,6 +233,11 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, ...@@ -227,6 +233,11 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
} }
#if defined(CONFIG_NFS_V4_1) #if defined(CONFIG_NFS_V4_1)
static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_LAYOUT_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
struct xdr_stream *xdr, struct xdr_stream *xdr,
...@@ -263,7 +274,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, ...@@ -263,7 +274,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
} }
p = xdr_decode_hyper(p, &args->cbl_range.offset); p = xdr_decode_hyper(p, &args->cbl_range.offset);
p = xdr_decode_hyper(p, &args->cbl_range.length); p = xdr_decode_hyper(p, &args->cbl_range.length);
status = decode_stateid(xdr, &args->cbl_stateid); status = decode_layout_stateid(xdr, &args->cbl_stateid);
if (unlikely(status != 0)) if (unlikely(status != 0))
goto out; goto out;
} else if (args->cbl_recall_type == RETURN_FSID) { } else if (args->cbl_recall_type == RETURN_FSID) {
......
...@@ -875,15 +875,16 @@ int nfs_delegations_present(struct nfs_client *clp) ...@@ -875,15 +875,16 @@ int nfs_delegations_present(struct nfs_client *clp)
/** /**
* nfs4_copy_delegation_stateid - Copy inode's state ID information * nfs4_copy_delegation_stateid - Copy inode's state ID information
* @dst: stateid data structure to fill in
* @inode: inode to check * @inode: inode to check
* @flags: delegation type requirement * @flags: delegation type requirement
* @dst: stateid data structure to fill in
* @cred: optional argument to retrieve credential
* *
* Returns "true" and fills in "dst->data" * if inode had a delegation, * Returns "true" and fills in "dst->data" * if inode had a delegation,
* otherwise "false" is returned. * otherwise "false" is returned.
*/ */
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
fmode_t flags) nfs4_stateid *dst, struct rpc_cred **cred)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation; struct nfs_delegation *delegation;
...@@ -896,6 +897,8 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, ...@@ -896,6 +897,8 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
if (ret) { if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid); nfs4_stateid_copy(dst, &delegation->stateid);
nfs_mark_delegation_referenced(delegation); nfs_mark_delegation_referenced(delegation);
if (cred)
*cred = get_rpccred(delegation->cred);
} }
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
......
...@@ -56,7 +56,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); ...@@ -56,7 +56,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_have_delegation(struct inode *inode, fmode_t flags);
......
...@@ -87,6 +87,7 @@ struct nfs_direct_req { ...@@ -87,6 +87,7 @@ struct nfs_direct_req {
int mirror_count; int mirror_count;
ssize_t count, /* bytes actually processed */ ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
bytes_left, /* bytes left to be sent */ bytes_left, /* bytes left to be sent */
io_start, /* start of IO */ io_start, /* start of IO */
error; /* any reported error */ error; /* any reported error */
...@@ -123,6 +124,8 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) ...@@ -123,6 +124,8 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
int i; int i;
ssize_t count; ssize_t count;
WARN_ON_ONCE(dreq->count >= dreq->max_count);
if (dreq->mirror_count == 1) { if (dreq->mirror_count == 1) {
dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
dreq->count += hdr->good_bytes; dreq->count += hdr->good_bytes;
...@@ -275,7 +278,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) ...@@ -275,7 +278,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq) struct nfs_direct_req *dreq)
{ {
cinfo->lock = &dreq->inode->i_lock; cinfo->inode = dreq->inode;
cinfo->mds = &dreq->mds_cinfo; cinfo->mds = &dreq->mds_cinfo;
cinfo->ds = &dreq->ds_cinfo; cinfo->ds = &dreq->ds_cinfo;
cinfo->dreq = dreq; cinfo->dreq = dreq;
...@@ -591,7 +594,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) ...@@ -591,7 +594,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock; goto out_unlock;
dreq->inode = inode; dreq->inode = inode;
dreq->bytes_left = count; dreq->bytes_left = dreq->max_count = count;
dreq->io_start = iocb->ki_pos; dreq->io_start = iocb->ki_pos;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx); l_ctx = nfs_get_lock_context(dreq->ctx);
...@@ -630,13 +633,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode, ...@@ -630,13 +633,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
struct list_head *list, struct list_head *list,
struct nfs_commit_info *cinfo) struct nfs_commit_info *cinfo)
{ {
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
#ifdef CONFIG_NFS_V4_1 #ifdef CONFIG_NFS_V4_1
if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) if (cinfo->ds != NULL && cinfo->ds->nwritten != 0)
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
#endif #endif
nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
} }
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
...@@ -671,13 +674,13 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) ...@@ -671,13 +674,13 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
if (!nfs_pageio_add_request(&desc, req)) { if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_remove_request(req); nfs_list_remove_request(req);
nfs_list_add_request(req, &failed); nfs_list_add_request(req, &failed);
spin_lock(cinfo.lock); spin_lock(&cinfo.inode->i_lock);
dreq->flags = 0; dreq->flags = 0;
if (desc.pg_error < 0) if (desc.pg_error < 0)
dreq->error = desc.pg_error; dreq->error = desc.pg_error;
else else
dreq->error = -EIO; dreq->error = -EIO;
spin_unlock(cinfo.lock); spin_unlock(&cinfo.inode->i_lock);
} }
nfs_release_request(req); nfs_release_request(req);
} }
...@@ -1023,7 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) ...@@ -1023,7 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock; goto out_unlock;
dreq->inode = inode; dreq->inode = inode;
dreq->bytes_left = iov_iter_count(iter); dreq->bytes_left = dreq->max_count = iov_iter_count(iter);
dreq->io_start = pos; dreq->io_start = pos;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx); l_ctx = nfs_get_lock_context(dreq->ctx);
......
...@@ -795,7 +795,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, ...@@ -795,7 +795,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
} }
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
if (cinfo->ds->nbuckets >= size) if (cinfo->ds->nbuckets >= size)
goto out; goto out;
for (i = 0; i < cinfo->ds->nbuckets; i++) { for (i = 0; i < cinfo->ds->nbuckets; i++) {
...@@ -811,7 +811,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, ...@@ -811,7 +811,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
swap(cinfo->ds->buckets, buckets); swap(cinfo->ds->buckets, buckets);
cinfo->ds->nbuckets = size; cinfo->ds->nbuckets = size;
out: out:
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
kfree(buckets); kfree(buckets);
return 0; return 0;
} }
...@@ -890,6 +890,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, ...@@ -890,6 +890,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
0, 0,
NFS4_MAX_UINT64, NFS4_MAX_UINT64,
IOMODE_READ, IOMODE_READ,
false,
GFP_KERNEL); GFP_KERNEL);
if (IS_ERR(pgio->pg_lseg)) { if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_error = PTR_ERR(pgio->pg_lseg);
...@@ -915,6 +916,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, ...@@ -915,6 +916,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
0, 0,
NFS4_MAX_UINT64, NFS4_MAX_UINT64,
IOMODE_RW, IOMODE_RW,
false,
GFP_NOFS); GFP_NOFS);
if (IS_ERR(pgio->pg_lseg)) { if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_error = PTR_ERR(pgio->pg_lseg);
......
This diff is collapsed.
...@@ -10,7 +10,8 @@ ...@@ -10,7 +10,8 @@
#define FS_NFS_NFS4FLEXFILELAYOUT_H #define FS_NFS_NFS4FLEXFILELAYOUT_H
#define FF_FLAGS_NO_LAYOUTCOMMIT 1 #define FF_FLAGS_NO_LAYOUTCOMMIT 1
#define FF_FLAGS_NO_IO_THRU_MDS 2 #define FF_FLAGS_NO_IO_THRU_MDS 2
#define FF_FLAGS_NO_READ_IO 4
#include "../pnfs.h" #include "../pnfs.h"
...@@ -76,9 +77,8 @@ struct nfs4_ff_layout_mirror { ...@@ -76,9 +77,8 @@ struct nfs4_ff_layout_mirror {
u32 fh_versions_cnt; u32 fh_versions_cnt;
struct nfs_fh *fh_versions; struct nfs_fh *fh_versions;
nfs4_stateid stateid; nfs4_stateid stateid;
u32 uid; struct rpc_cred __rcu *ro_cred;
u32 gid; struct rpc_cred __rcu *rw_cred;
struct rpc_cred *cred;
atomic_t ref; atomic_t ref;
spinlock_t lock; spinlock_t lock;
struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat read_stat;
...@@ -153,6 +153,12 @@ ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) ...@@ -153,6 +153,12 @@ ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg)
return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS; return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS;
} }
static inline bool
ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
{
return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO;
}
static inline bool static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{ {
...@@ -192,4 +198,7 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, ...@@ -192,4 +198,7 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
u32 ds_idx, struct rpc_cred *mdscred); u32 ds_idx, struct rpc_cred *mdscred);
bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);
#endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */
...@@ -228,7 +228,8 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, ...@@ -228,7 +228,8 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
return e1->opnum < e2->opnum ? -1 : 1; return e1->opnum < e2->opnum ? -1 : 1;
if (e1->status != e2->status) if (e1->status != e2->status)
return e1->status < e2->status ? -1 : 1; return e1->status < e2->status ? -1 : 1;
ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid)); ret = memcmp(e1->stateid.data, e2->stateid.data,
sizeof(e1->stateid.data));
if (ret != 0) if (ret != 0)
return ret; return ret;
ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
...@@ -302,40 +303,26 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, ...@@ -302,40 +303,26 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
return 0; return 0;
} }
/* currently we only support AUTH_NONE and AUTH_SYS */ static struct rpc_cred *
static rpc_authflavor_t ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror)
{ {
if (mirror->uid == (u32)-1) struct rpc_cred *cred, __rcu **pcred;
return RPC_AUTH_NULL;
return RPC_AUTH_UNIX;
}
/* fetch cred for NFSv3 DS */ if (iomode == IOMODE_READ)
static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, pcred = &mirror->ro_cred;
struct nfs4_pnfs_ds *ds) else
{ pcred = &mirror->rw_cred;
if (ds->ds_clp && !mirror->cred &&
mirror->mirror_ds->ds_versions[0].version == 3) { rcu_read_lock();
struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; do {
struct rpc_cred *cred; cred = rcu_dereference(*pcred);
struct auth_cred acred = { if (!cred)
.uid = make_kuid(&init_user_ns, mirror->uid), break;
.gid = make_kgid(&init_user_ns, mirror->gid),
}; cred = get_rpccred_rcu(cred);
} while(!cred);
/* AUTH_NULL ignores acred */ rcu_read_unlock();
cred = auth->au_ops->lookup_cred(auth, &acred, 0); return cred;
if (IS_ERR(cred)) {
dprintk("%s: lookup_cred failed with %ld\n",
__func__, PTR_ERR(cred));
return PTR_ERR(cred);
} else {
if (cmpxchg(&mirror->cred, NULL, cred))
put_rpccred(cred);
}
}
return 0;
} }
struct nfs_fh * struct nfs_fh *
...@@ -356,7 +343,23 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) ...@@ -356,7 +343,23 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
return fh; return fh;
} }
/* Upon return, either ds is connected, or ds is NULL */ /**
* nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
* @lseg: the layout segment we're operating on
* @ds_idx: index of the DS to use
* @fail_return: return layout on connect failure?
*
* Try to prepare a DS connection to accept an RPC call. This involves
* selecting a mirror to use and connecting the client to it if it's not
* already connected.
*
* Since we only need a single functioning mirror to satisfy a read, we don't
* want to return the layout if there is one. For writes though, any down
* mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish
* between the two cases.
*
* Returns a pointer to a connected DS object on success or NULL on failure.
*/
struct nfs4_pnfs_ds * struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
bool fail_return) bool fail_return)
...@@ -367,7 +370,6 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -367,7 +370,6 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct inode *ino = lseg->pls_layout->plh_inode; struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino); struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload; unsigned int max_payload;
rpc_authflavor_t flavor;
if (!ff_layout_mirror_valid(lseg, mirror)) { if (!ff_layout_mirror_valid(lseg, mirror)) {
pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
...@@ -383,9 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -383,9 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb(); smp_rmb();
if (ds->ds_clp) if (ds->ds_clp)
goto out_update_creds; goto out;
flavor = nfs4_ff_layout_choose_authflavor(mirror);
/* FIXME: For now we assume the server sent only one version of NFS /* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS. * to use for the DS.
...@@ -394,7 +394,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -394,7 +394,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
dataserver_retrans, dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version, mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version, mirror->mirror_ds->ds_versions[0].minor_version,
flavor); RPC_AUTH_UNIX);
/* connect success, check rsize/wsize limit */ /* connect success, check rsize/wsize limit */
if (ds->ds_clp) { if (ds->ds_clp) {
...@@ -410,20 +410,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -410,20 +410,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror, lseg->pls_range.offset, mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO, lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO); OP_ILLEGAL, GFP_NOIO);
if (!fail_return) { if (fail_return || !ff_layout_has_available_ds(lseg))
if (ff_layout_has_available_ds(lseg))
set_bit(NFS_LAYOUT_RETURN_REQUESTED,
&lseg->pls_layout->plh_flags);
else
pnfs_error_mark_layout_for_return(ino, lseg);
} else
pnfs_error_mark_layout_for_return(ino, lseg); pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL; ds = NULL;
goto out;
} }
out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
ds = NULL;
out: out:
return ds; return ds;
} }
...@@ -433,16 +423,15 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -433,16 +423,15 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct rpc_cred *mdscred) struct rpc_cred *mdscred)
{ {
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct rpc_cred *cred = ERR_PTR(-EINVAL); struct rpc_cred *cred;
if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true))
goto out;
if (mirror && mirror->cred) if (mirror) {
cred = mirror->cred; cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
else if (!cred)
cred = mdscred; cred = get_rpccred(mdscred);
out: } else {
cred = get_rpccred(mdscred);
}
return cred; return cred;
} }
...@@ -562,6 +551,18 @@ bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) ...@@ -562,6 +551,18 @@ bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg)
return ff_rw_layout_has_available_ds(lseg); return ff_rw_layout_has_available_ds(lseg);
} }
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg)
{
return ff_layout_no_fallback_to_mds(lseg) ||
ff_layout_has_available_ds(lseg);
}
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg)
{
return lseg->pls_range.iomode == IOMODE_RW &&
ff_layout_no_read_on_rw(lseg);
}
module_param(dataserver_retrans, uint, 0644); module_param(dataserver_retrans, uint, 0644);
MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
"retries a request before it attempts further " "retries a request before it attempts further "
......
...@@ -477,6 +477,7 @@ void nfs_mark_request_commit(struct nfs_page *req, ...@@ -477,6 +477,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
u32 ds_commit_idx); u32 ds_commit_idx);
int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_write_need_commit(struct nfs_pgio_header *);
void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo); int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list, void nfs_retry_commit(struct list_head *page_list,
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
/* nfs4.2proc.c */ /* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_allocate(struct file *, loff_t, loff_t);
ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int); loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *, int nfs42_proc_layoutstats_generic(struct nfs_server *,
......
...@@ -126,6 +126,111 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) ...@@ -126,6 +126,111 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err; return err;
} }
static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
struct nfs_lock_context *src_lock,
struct file *dst, loff_t pos_dst,
struct nfs_lock_context *dst_lock,
size_t count)
{
struct nfs42_copy_args args = {
.src_fh = NFS_FH(file_inode(src)),
.src_pos = pos_src,
.dst_fh = NFS_FH(file_inode(dst)),
.dst_pos = pos_dst,
.count = count,
};
struct nfs42_copy_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
.rpc_argp = &args,
.rpc_resp = &res,
};
struct inode *dst_inode = file_inode(dst);
struct nfs_server *server = NFS_SERVER(dst_inode);
int status;
status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
if (status)
return status;
status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
if (status)
return status;
status = nfs4_call_sync(server->client, server, &msg,
&args.seq_args, &res.seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_COPY;
if (status)
return status;
if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
if (status)
return status;
}
truncate_pagecache_range(dst_inode, pos_dst,
pos_dst + res.write_res.count);
return res.write_res.count;
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
struct file *dst, loff_t pos_dst,
size_t count)
{
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
struct nfs_lock_context *dst_lock;
struct nfs4_exception src_exception = { };
struct nfs4_exception dst_exception = { };
ssize_t err, err2;
if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
return -EOPNOTSUPP;
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
return PTR_ERR(src_lock);
src_exception.inode = file_inode(src);
src_exception.state = src_lock->open_context->state;
dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
if (IS_ERR(dst_lock)) {
err = PTR_ERR(dst_lock);
goto out_put_src_lock;
}
dst_exception.inode = file_inode(dst);
dst_exception.state = dst_lock->open_context->state;
do {
inode_lock(file_inode(dst));
err = _nfs42_proc_copy(src, pos_src, src_lock,
dst, pos_dst, dst_lock, count);
inode_unlock(file_inode(dst));
if (err == -ENOTSUPP) {
err = -EOPNOTSUPP;
break;
}
err2 = nfs4_handle_exception(server, err, &src_exception);
err = nfs4_handle_exception(server, err, &dst_exception);
if (!err)
err = err2;
} while (src_exception.retry || dst_exception.retry);
nfs_put_lock_context(dst_lock);
out_put_src_lock:
nfs_put_lock_context(src_lock);
return err;
}
static loff_t _nfs42_proc_llseek(struct file *filep, static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence) struct nfs_lock_context *lock, loff_t offset, int whence)
{ {
...@@ -232,7 +337,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) ...@@ -232,7 +337,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
* with the current stateid. * with the current stateid.
*/ */
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head); pnfs_free_lseg_list(&head);
} else } else
......
...@@ -9,9 +9,22 @@ ...@@ -9,9 +9,22 @@
#define encode_fallocate_maxsz (encode_stateid_maxsz + \ #define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \ 2 /* offset */ + \
2 /* length */) 2 /* length */)
#define NFS42_WRITE_RES_SIZE (1 /* wr_callback_id size */ +\
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
2 /* wr_count */ + \
1 /* wr_committed */ + \
XDR_QUADLEN(NFS4_VERIFIER_SIZE))
#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ #define encode_allocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz) encode_fallocate_maxsz)
#define decode_allocate_maxsz (op_decode_hdr_maxsz) #define decode_allocate_maxsz (op_decode_hdr_maxsz)
#define encode_copy_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
2 + 2 + 2 + 1 + 1 + 1)
#define decode_copy_maxsz (op_decode_hdr_maxsz + \
NFS42_WRITE_RES_SIZE + \
1 /* cr_consecutive */ + \
1 /* cr_synchronous */)
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz) encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz)
...@@ -49,6 +62,16 @@ ...@@ -49,6 +62,16 @@
decode_putfh_maxsz + \ decode_putfh_maxsz + \
decode_allocate_maxsz + \ decode_allocate_maxsz + \
decode_getattr_maxsz) decode_getattr_maxsz)
#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
encode_copy_maxsz)
#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
decode_copy_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \ encode_putfh_maxsz + \
encode_deallocate_maxsz + \ encode_deallocate_maxsz + \
...@@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr, ...@@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args); encode_fallocate(xdr, args);
} }
static void encode_copy(struct xdr_stream *xdr,
struct nfs42_copy_args *args,
struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
encode_nfs4_stateid(xdr, &args->src_stateid);
encode_nfs4_stateid(xdr, &args->dst_stateid);
encode_uint64(xdr, args->src_pos);
encode_uint64(xdr, args->dst_pos);
encode_uint64(xdr, args->count);
encode_uint32(xdr, 1); /* consecutive = true */
encode_uint32(xdr, 1); /* synchronous = true */
encode_uint32(xdr, 0); /* src server list */
}
static void encode_deallocate(struct xdr_stream *xdr, static void encode_deallocate(struct xdr_stream *xdr,
struct nfs42_falloc_args *args, struct nfs42_falloc_args *args,
struct compound_hdr *hdr) struct compound_hdr *hdr)
...@@ -181,6 +221,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, ...@@ -181,6 +221,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
encode_nops(&hdr); encode_nops(&hdr);
} }
/*
* Encode COPY request
*/
static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs42_copy_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->src_fh, &hdr);
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dst_fh, &hdr);
encode_copy(xdr, args, &hdr);
encode_nops(&hdr);
}
/* /*
* Encode DEALLOCATE request * Encode DEALLOCATE request
*/ */
...@@ -266,6 +326,62 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) ...@@ -266,6 +326,62 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
return decode_op_hdr(xdr, OP_ALLOCATE); return decode_op_hdr(xdr, OP_ALLOCATE);
} }
static int decode_write_response(struct xdr_stream *xdr,
struct nfs42_write_res *res)
{
__be32 *p;
int stateids;
p = xdr_inline_decode(xdr, 4 + 8 + 4);
if (unlikely(!p))
goto out_overflow;
stateids = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &res->count);
res->verifier.committed = be32_to_cpup(p);
return decode_verifier(xdr, &res->verifier.verifier);
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_copy_requirements(struct xdr_stream *xdr,
struct nfs42_copy_res *res) {
__be32 *p;
p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(!p))
goto out_overflow;
res->consecutive = be32_to_cpup(p++);
res->synchronous = be32_to_cpup(p++);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
{
int status;
status = decode_op_hdr(xdr, OP_COPY);
if (status == NFS4ERR_OFFLOAD_NO_REQS) {
status = decode_copy_requirements(xdr, res);
if (status)
return status;
return NFS4ERR_OFFLOAD_NO_REQS;
} else if (status)
return status;
status = decode_write_response(xdr, &res->write_res);
if (status)
return status;
return decode_copy_requirements(xdr, res);
}
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{ {
return decode_op_hdr(xdr, OP_DEALLOCATE); return decode_op_hdr(xdr, OP_DEALLOCATE);
...@@ -330,6 +446,36 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, ...@@ -330,6 +446,36 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
return status; return status;
} }
/*
* Decode COPY response
*/
static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs42_copy_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_savefh(xdr);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_copy(xdr, res);
out:
return status;
}
/* /*
* Decode DEALLOCATE request * Decode DEALLOCATE request
*/ */
......
...@@ -438,8 +438,9 @@ extern void nfs41_handle_server_scope(struct nfs_client *, ...@@ -438,8 +438,9 @@ extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **); struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
fmode_t, const struct nfs_lockowner *); const struct nfs_lockowner *, nfs4_stateid *,
struct rpc_cred **);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
...@@ -496,12 +497,15 @@ extern struct svc_version nfs4_callback_version4; ...@@ -496,12 +497,15 @@ extern struct svc_version nfs4_callback_version4;
static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
{ {
memcpy(dst, src, sizeof(*dst)); memcpy(dst->data, src->data, sizeof(dst->data));
dst->type = src->type;
} }
static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
{ {
return memcmp(dst, src, sizeof(*dst)) == 0; if (dst->type != src->type)
return false;
return memcmp(dst->data, src->data, sizeof(dst->data)) == 0;
} }
static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
......
...@@ -129,6 +129,28 @@ nfs4_file_flush(struct file *file, fl_owner_t id) ...@@ -129,6 +129,28 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
} }
#ifdef CONFIG_NFS_V4_2 #ifdef CONFIG_NFS_V4_2
static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
struct inode *in_inode = file_inode(file_in);
struct inode *out_inode = file_inode(file_out);
int ret;
if (in_inode == out_inode)
return -EINVAL;
/* flush any pending writes */
ret = nfs_sync_inode(in_inode);
if (ret)
return ret;
ret = nfs_sync_inode(out_inode);
if (ret)
return ret;
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{ {
loff_t ret; loff_t ret;
...@@ -243,6 +265,7 @@ const struct file_operations nfs4_file_operations = { ...@@ -243,6 +265,7 @@ const struct file_operations nfs4_file_operations = {
.check_flags = nfs_check_flags, .check_flags = nfs_check_flags,
.setlease = simple_nosetlease, .setlease = simple_nosetlease,
#ifdef CONFIG_NFS_V4_2 #ifdef CONFIG_NFS_V4_2
.copy_file_range = nfs4_copy_file_range,
.llseek = nfs4_file_llseek, .llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate, .fallocate = nfs42_fallocate,
.clone_file_range = nfs42_clone_file_range, .clone_file_range = nfs42_clone_file_range,
......
This diff is collapsed.
...@@ -65,7 +65,10 @@ ...@@ -65,7 +65,10 @@
#define OPENOWNER_POOL_SIZE 8 #define OPENOWNER_POOL_SIZE 8
const nfs4_stateid zero_stateid; const nfs4_stateid zero_stateid = {
.data = { 0 },
.type = NFS4_SPECIAL_STATEID_TYPE,
};
static DEFINE_MUTEX(nfs_clid_init_mutex); static DEFINE_MUTEX(nfs_clid_init_mutex);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
...@@ -985,15 +988,20 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) ...@@ -985,15 +988,20 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
* Byte-range lock aware utility to initialize the stateid of read/write * Byte-range lock aware utility to initialize the stateid of read/write
* requests. * requests.
*/ */
int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, int nfs4_select_rw_stateid(struct nfs4_state *state,
fmode_t fmode, const struct nfs_lockowner *lockowner) fmode_t fmode, const struct nfs_lockowner *lockowner,
nfs4_stateid *dst, struct rpc_cred **cred)
{ {
int ret = nfs4_copy_lock_stateid(dst, state, lockowner); int ret;
if (cred != NULL)
*cred = NULL;
ret = nfs4_copy_lock_stateid(dst, state, lockowner);
if (ret == -EIO) if (ret == -EIO)
/* A lost lock - don't even consider delegations */ /* A lost lock - don't even consider delegations */
goto out; goto out;
/* returns true if delegation stateid found and copied */ /* returns true if delegation stateid found and copied */
if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { if (nfs4_copy_delegation_stateid(state->inode, fmode, dst, cred)) {
ret = 0; ret = 0;
goto out; goto out;
} }
......
...@@ -1520,6 +1520,8 @@ DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); ...@@ -1520,6 +1520,8 @@ DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
{ PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \ { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \
{ PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \ { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \
{ PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \
{ PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \
{ PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \
{ PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" })
TRACE_EVENT(pnfs_update_layout, TRACE_EVENT(pnfs_update_layout,
...@@ -1528,9 +1530,10 @@ TRACE_EVENT(pnfs_update_layout, ...@@ -1528,9 +1530,10 @@ TRACE_EVENT(pnfs_update_layout,
u64 count, u64 count,
enum pnfs_iomode iomode, enum pnfs_iomode iomode,
struct pnfs_layout_hdr *lo, struct pnfs_layout_hdr *lo,
struct pnfs_layout_segment *lseg,
enum pnfs_update_layout_reason reason enum pnfs_update_layout_reason reason
), ),
TP_ARGS(inode, pos, count, iomode, lo, reason), TP_ARGS(inode, pos, count, iomode, lo, lseg, reason),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(u64, fileid) __field(u64, fileid)
...@@ -1540,6 +1543,7 @@ TRACE_EVENT(pnfs_update_layout, ...@@ -1540,6 +1543,7 @@ TRACE_EVENT(pnfs_update_layout,
__field(enum pnfs_iomode, iomode) __field(enum pnfs_iomode, iomode)
__field(int, layoutstateid_seq) __field(int, layoutstateid_seq)
__field(u32, layoutstateid_hash) __field(u32, layoutstateid_hash)
__field(long, lseg)
__field(enum pnfs_update_layout_reason, reason) __field(enum pnfs_update_layout_reason, reason)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -1559,11 +1563,12 @@ TRACE_EVENT(pnfs_update_layout, ...@@ -1559,11 +1563,12 @@ TRACE_EVENT(pnfs_update_layout,
__entry->layoutstateid_seq = 0; __entry->layoutstateid_seq = 0;
__entry->layoutstateid_hash = 0; __entry->layoutstateid_hash = 0;
} }
__entry->lseg = (long)lseg;
), ),
TP_printk( TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x " "fileid=%02x:%02x:%llu fhandle=0x%08x "
"iomode=%s pos=%llu count=%llu " "iomode=%s pos=%llu count=%llu "
"layoutstateid=%d:0x%08x (%s)", "layoutstateid=%d:0x%08x lseg=0x%lx (%s)",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid, (unsigned long long)__entry->fileid,
__entry->fhandle, __entry->fhandle,
...@@ -1571,6 +1576,7 @@ TRACE_EVENT(pnfs_update_layout, ...@@ -1571,6 +1576,7 @@ TRACE_EVENT(pnfs_update_layout,
(unsigned long long)__entry->pos, (unsigned long long)__entry->pos,
(unsigned long long)__entry->count, (unsigned long long)__entry->count,
__entry->layoutstateid_seq, __entry->layoutstateid_hash, __entry->layoutstateid_seq, __entry->layoutstateid_hash,
__entry->lseg,
show_pnfs_update_layout_reason(__entry->reason) show_pnfs_update_layout_reason(__entry->reason)
) )
); );
......
...@@ -4270,6 +4270,24 @@ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) ...@@ -4270,6 +4270,24 @@ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
} }
static int decode_open_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_OPEN_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static int decode_lock_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_LOCK_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_DELEGATION_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
{ {
int status; int status;
...@@ -4278,7 +4296,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) ...@@ -4278,7 +4296,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
if (status != -EIO) if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid); nfs_increment_open_seqid(status, res->seqid);
if (!status) if (!status)
status = decode_stateid(xdr, &res->stateid); status = decode_open_stateid(xdr, &res->stateid);
return status; return status;
} }
...@@ -4937,7 +4955,7 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) ...@@ -4937,7 +4955,7 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
if (status == -EIO) if (status == -EIO)
goto out; goto out;
if (status == 0) { if (status == 0) {
status = decode_stateid(xdr, &res->stateid); status = decode_lock_stateid(xdr, &res->stateid);
if (unlikely(status)) if (unlikely(status))
goto out; goto out;
} else if (status == -NFS4ERR_DENIED) } else if (status == -NFS4ERR_DENIED)
...@@ -4966,7 +4984,7 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) ...@@ -4966,7 +4984,7 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
if (status != -EIO) if (status != -EIO)
nfs_increment_lock_seqid(status, res->seqid); nfs_increment_lock_seqid(status, res->seqid);
if (status == 0) if (status == 0)
status = decode_stateid(xdr, &res->stateid); status = decode_lock_stateid(xdr, &res->stateid);
return status; return status;
} }
...@@ -5016,7 +5034,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr, ...@@ -5016,7 +5034,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
__be32 *p; __be32 *p;
int status; int status;
status = decode_stateid(xdr, &res->delegation); status = decode_delegation_stateid(xdr, &res->delegation);
if (unlikely(status)) if (unlikely(status))
return status; return status;
p = xdr_inline_decode(xdr, 4); p = xdr_inline_decode(xdr, 4);
...@@ -5096,7 +5114,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) ...@@ -5096,7 +5114,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
nfs_increment_open_seqid(status, res->seqid); nfs_increment_open_seqid(status, res->seqid);
if (status) if (status)
return status; return status;
status = decode_stateid(xdr, &res->stateid); status = decode_open_stateid(xdr, &res->stateid);
if (unlikely(status)) if (unlikely(status))
return status; return status;
...@@ -5136,7 +5154,7 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre ...@@ -5136,7 +5154,7 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre
if (status != -EIO) if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid); nfs_increment_open_seqid(status, res->seqid);
if (!status) if (!status)
status = decode_stateid(xdr, &res->stateid); status = decode_open_stateid(xdr, &res->stateid);
return status; return status;
} }
...@@ -5148,7 +5166,7 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re ...@@ -5148,7 +5166,7 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re
if (status != -EIO) if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid); nfs_increment_open_seqid(status, res->seqid);
if (!status) if (!status)
status = decode_stateid(xdr, &res->stateid); status = decode_open_stateid(xdr, &res->stateid);
return status; return status;
} }
...@@ -5838,6 +5856,12 @@ static int decode_sequence(struct xdr_stream *xdr, ...@@ -5838,6 +5856,12 @@ static int decode_sequence(struct xdr_stream *xdr,
} }
#if defined(CONFIG_NFS_V4_1) #if defined(CONFIG_NFS_V4_1)
static int decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
stateid->type = NFS4_LAYOUT_STATEID_TYPE;
return decode_stateid(xdr, stateid);
}
static int decode_getdeviceinfo(struct xdr_stream *xdr, static int decode_getdeviceinfo(struct xdr_stream *xdr,
struct nfs4_getdeviceinfo_res *res) struct nfs4_getdeviceinfo_res *res)
{ {
...@@ -5919,7 +5943,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, ...@@ -5919,7 +5943,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
if (unlikely(!p)) if (unlikely(!p))
goto out_overflow; goto out_overflow;
res->return_on_close = be32_to_cpup(p); res->return_on_close = be32_to_cpup(p);
decode_stateid(xdr, &res->stateid); decode_layout_stateid(xdr, &res->stateid);
p = xdr_inline_decode(xdr, 4); p = xdr_inline_decode(xdr, 4);
if (unlikely(!p)) if (unlikely(!p))
goto out_overflow; goto out_overflow;
...@@ -5985,7 +6009,7 @@ static int decode_layoutreturn(struct xdr_stream *xdr, ...@@ -5985,7 +6009,7 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
goto out_overflow; goto out_overflow;
res->lrs_present = be32_to_cpup(p); res->lrs_present = be32_to_cpup(p);
if (res->lrs_present) if (res->lrs_present)
status = decode_stateid(xdr, &res->stateid); status = decode_layout_stateid(xdr, &res->stateid);
return status; return status;
out_overflow: out_overflow:
print_overflow_msg(__func__, xdr); print_overflow_msg(__func__, xdr);
...@@ -7515,6 +7539,7 @@ struct rpc_procinfo nfs4_procedures[] = { ...@@ -7515,6 +7539,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC(CLONE, enc_clone, dec_clone), PROC(CLONE, enc_clone, dec_clone),
PROC(COPY, enc_copy, dec_copy),
#endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_2 */
}; };
......
...@@ -341,8 +341,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, ...@@ -341,8 +341,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
* long write-back delay. This will be adjusted in * long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */ * update_nfs_request below if the region is not locked. */
req->wb_page = page; req->wb_page = page;
req->wb_index = page_file_index(page); if (page) {
get_page(page); req->wb_index = page_file_index(page);
get_page(page);
}
req->wb_offset = offset; req->wb_offset = offset;
req->wb_pgbase = offset; req->wb_pgbase = offset;
req->wb_bytes = count; req->wb_bytes = count;
......
This diff is collapsed.
...@@ -64,6 +64,7 @@ struct pnfs_layout_segment { ...@@ -64,6 +64,7 @@ struct pnfs_layout_segment {
struct list_head pls_lc_list; struct list_head pls_lc_list;
struct pnfs_layout_range pls_range; struct pnfs_layout_range pls_range;
atomic_t pls_refcount; atomic_t pls_refcount;
u32 pls_seq;
unsigned long pls_flags; unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout; struct pnfs_layout_hdr *pls_layout;
struct work_struct pls_work; struct work_struct pls_work;
...@@ -194,6 +195,7 @@ struct pnfs_layout_hdr { ...@@ -194,6 +195,7 @@ struct pnfs_layout_hdr {
unsigned long plh_flags; unsigned long plh_flags;
nfs4_stateid plh_stateid; nfs4_stateid plh_stateid;
u32 plh_barrier; /* ignore lower seqids */ u32 plh_barrier; /* ignore lower seqids */
u32 plh_return_seq;
enum pnfs_iomode plh_return_iomode; enum pnfs_iomode plh_return_iomode;
loff_t plh_lwb; /* last write byte for layoutcommit */ loff_t plh_lwb; /* last write byte for layoutcommit */
struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
...@@ -226,7 +228,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); ...@@ -226,7 +228,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev, struct pnfs_device *dev,
struct rpc_cred *cred); struct rpc_cred *cred);
extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
/* pnfs.c */ /* pnfs.c */
...@@ -258,16 +260,14 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); ...@@ -258,16 +260,14 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new, const nfs4_stateid *new,
bool update_barrier); bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo,
const struct pnfs_layout_range *range,
struct nfs4_state *open_state);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list, struct list_head *tmp_list,
const struct pnfs_layout_range *recall_range); const struct pnfs_layout_range *recall_range,
u32 seq);
int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list, struct list_head *tmp_list,
const struct pnfs_layout_range *recall_range); const struct pnfs_layout_range *recall_range,
u32 seq);
bool pnfs_roc(struct inode *ino); bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino); void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
...@@ -282,12 +282,13 @@ int _pnfs_return_layout(struct inode *); ...@@ -282,12 +282,13 @@ int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_header *); void pnfs_ld_write_done(struct nfs_pgio_header *);
void pnfs_ld_read_done(struct nfs_pgio_header *); void pnfs_ld_read_done(struct nfs_pgio_header *);
int pnfs_read_resend_pnfs(struct nfs_pgio_header *); void pnfs_read_resend_pnfs(struct nfs_pgio_header *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx, struct nfs_open_context *ctx,
loff_t pos, loff_t pos,
u64 count, u64 count,
enum pnfs_iomode iomode, enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags); gfp_t gfp_flags);
void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo);
......
...@@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); ...@@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
/* The generic layer is about to remove the req from the commit list. /* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference. * If this will make the bucket empty, it will need to put the lseg reference.
* Note this must be called holding the inode (/cinfo) lock * Note this must be called holding i_lock
*/ */
void void
pnfs_generic_clear_request_commit(struct nfs_page *req, pnfs_generic_clear_request_commit(struct nfs_page *req,
...@@ -98,7 +98,7 @@ pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, ...@@ -98,7 +98,7 @@ pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst,
if (!nfs_lock_request(req)) if (!nfs_lock_request(req))
continue; continue;
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
if (cond_resched_lock(cinfo->lock)) if (cond_resched_lock(&cinfo->inode->i_lock))
list_safe_reset_next(req, tmp, wb_list); list_safe_reset_next(req, tmp, wb_list);
nfs_request_remove_commit_list(req, cinfo); nfs_request_remove_commit_list(req, cinfo);
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
...@@ -119,7 +119,7 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, ...@@ -119,7 +119,7 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct list_head *dst = &bucket->committing; struct list_head *dst = &bucket->committing;
int ret; int ret;
lockdep_assert_held(cinfo->lock); lockdep_assert_held(&cinfo->inode->i_lock);
ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max);
if (ret) { if (ret) {
cinfo->ds->nwritten -= ret; cinfo->ds->nwritten -= ret;
...@@ -142,7 +142,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, ...@@ -142,7 +142,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
{ {
int i, rv = 0, cnt; int i, rv = 0, cnt;
lockdep_assert_held(cinfo->lock); lockdep_assert_held(&cinfo->inode->i_lock);
for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
cinfo, max); cinfo, max);
...@@ -161,16 +161,16 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, ...@@ -161,16 +161,16 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct pnfs_layout_segment *freeme; struct pnfs_layout_segment *freeme;
int i; int i;
lockdep_assert_held(cinfo->lock); lockdep_assert_held(&cinfo->inode->i_lock);
restart: restart:
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (pnfs_generic_transfer_commit_list(&b->written, dst, if (pnfs_generic_transfer_commit_list(&b->written, dst,
cinfo, 0)) { cinfo, 0)) {
freeme = b->wlseg; freeme = b->wlseg;
b->wlseg = NULL; b->wlseg = NULL;
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
pnfs_put_lseg(freeme); pnfs_put_lseg(freeme);
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
goto restart; goto restart;
} }
} }
...@@ -186,7 +186,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) ...@@ -186,7 +186,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
LIST_HEAD(pages); LIST_HEAD(pages);
int i; int i;
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
for (i = idx; i < fl_cinfo->nbuckets; i++) { for (i = idx; i < fl_cinfo->nbuckets; i++) {
bucket = &fl_cinfo->buckets[i]; bucket = &fl_cinfo->buckets[i];
if (list_empty(&bucket->committing)) if (list_empty(&bucket->committing))
...@@ -194,12 +194,12 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) ...@@ -194,12 +194,12 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
freeme = bucket->clseg; freeme = bucket->clseg;
bucket->clseg = NULL; bucket->clseg = NULL;
list_splice_init(&bucket->committing, &pages); list_splice_init(&bucket->committing, &pages);
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
nfs_retry_commit(&pages, freeme, cinfo, i); nfs_retry_commit(&pages, freeme, cinfo, i);
pnfs_put_lseg(freeme); pnfs_put_lseg(freeme);
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
} }
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
} }
static unsigned int static unsigned int
...@@ -238,14 +238,31 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, ...@@ -238,14 +238,31 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
struct pnfs_commit_bucket *bucket; struct pnfs_commit_bucket *bucket;
bucket = &cinfo->ds->buckets[data->ds_commit_index]; bucket = &cinfo->ds->buckets[data->ds_commit_index];
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
list_splice_init(&bucket->committing, pages); list_splice_init(&bucket->committing, pages);
data->lseg = bucket->clseg; data->lseg = bucket->clseg;
bucket->clseg = NULL; bucket->clseg = NULL;
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
} }
/* Helper function for pnfs_generic_commit_pagelist to catch an empty
* page list. This can happen when two commits race. */
static bool
pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
struct nfs_commit_data *data,
struct nfs_commit_info *cinfo)
{
if (list_empty(pages)) {
if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
wake_up_atomic_t(&cinfo->mds->rpcs_out);
nfs_commitdata_release(data);
return true;
}
return false;
}
/* This follows nfs_commit_list pretty closely */ /* This follows nfs_commit_list pretty closely */
int int
pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
...@@ -280,6 +297,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, ...@@ -280,6 +297,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_for_each_entry_safe(data, tmp, &list, pages) { list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages); list_del_init(&data->pages);
if (data->ds_commit_index < 0) { if (data->ds_commit_index < 0) {
/* another commit raced with us */
if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
data, cinfo))
continue;
nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_init_commit(data, mds_pages, NULL, cinfo);
nfs_initiate_commit(NFS_CLIENT(inode), data, nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode), NFS_PROTO(data->inode),
...@@ -288,6 +310,12 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, ...@@ -288,6 +310,12 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
LIST_HEAD(pages); LIST_HEAD(pages);
pnfs_fetch_commit_bucket_list(&pages, data, cinfo); pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
/* another commit raced with us */
if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
data, cinfo))
continue;
nfs_init_commit(data, &pages, data->lseg, cinfo); nfs_init_commit(data, &pages, data->lseg, cinfo);
initiate_commit(data, how); initiate_commit(data, how);
} }
...@@ -874,12 +902,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, ...@@ -874,12 +902,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
struct list_head *list; struct list_head *list;
struct pnfs_commit_bucket *buckets; struct pnfs_commit_bucket *buckets;
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
buckets = cinfo->ds->buckets; buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written; list = &buckets[ds_commit_idx].written;
if (list_empty(list)) { if (list_empty(list)) {
if (!pnfs_is_valid_lseg(lseg)) { if (!pnfs_is_valid_lseg(lseg)) {
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
cinfo->completion_ops->resched_write(cinfo, req); cinfo->completion_ops->resched_write(cinfo, req);
return; return;
} }
...@@ -896,7 +924,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, ...@@ -896,7 +924,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
cinfo->ds->nwritten++; cinfo->ds->nwritten++;
nfs_request_add_commit_list_locked(req, list, cinfo); nfs_request_add_commit_list_locked(req, list, cinfo);
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
nfs_mark_page_unstable(req->wb_page, cinfo); nfs_mark_page_unstable(req->wb_page, cinfo);
} }
EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
......
...@@ -191,6 +191,7 @@ static const match_table_t nfs_mount_option_tokens = { ...@@ -191,6 +191,7 @@ static const match_table_t nfs_mount_option_tokens = {
enum { enum {
Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
Opt_xprt_rdma6,
Opt_xprt_err Opt_xprt_err
}; };
...@@ -201,6 +202,7 @@ static const match_table_t nfs_xprt_protocol_tokens = { ...@@ -201,6 +202,7 @@ static const match_table_t nfs_xprt_protocol_tokens = {
{ Opt_xprt_tcp, "tcp" }, { Opt_xprt_tcp, "tcp" },
{ Opt_xprt_tcp6, "tcp6" }, { Opt_xprt_tcp6, "tcp6" },
{ Opt_xprt_rdma, "rdma" }, { Opt_xprt_rdma, "rdma" },
{ Opt_xprt_rdma6, "rdma6" },
{ Opt_xprt_err, NULL } { Opt_xprt_err, NULL }
}; };
...@@ -1456,6 +1458,8 @@ static int nfs_parse_mount_options(char *raw, ...@@ -1456,6 +1458,8 @@ static int nfs_parse_mount_options(char *raw,
mnt->flags |= NFS_MOUNT_TCP; mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break; break;
case Opt_xprt_rdma6:
protofamily = AF_INET6;
case Opt_xprt_rdma: case Opt_xprt_rdma:
/* vector side protocols to TCP */ /* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP; mnt->flags |= NFS_MOUNT_TCP;
...@@ -2408,6 +2412,11 @@ static int nfs_compare_super_address(struct nfs_server *server1, ...@@ -2408,6 +2412,11 @@ static int nfs_compare_super_address(struct nfs_server *server1,
struct nfs_server *server2) struct nfs_server *server2)
{ {
struct sockaddr *sap1, *sap2; struct sockaddr *sap1, *sap2;
struct rpc_xprt *xprt1 = server1->client->cl_xprt;
struct rpc_xprt *xprt2 = server2->client->cl_xprt;
if (!net_eq(xprt1->xprt_net, xprt2->xprt_net))
return 0;
sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr;
sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr;
......
...@@ -245,8 +245,7 @@ static void nfs_mark_uptodate(struct nfs_page *req) ...@@ -245,8 +245,7 @@ static void nfs_mark_uptodate(struct nfs_page *req)
static int wb_priority(struct writeback_control *wbc) static int wb_priority(struct writeback_control *wbc)
{ {
int ret = 0; int ret = 0;
if (wbc->for_reclaim)
return FLUSH_HIGHPRI | FLUSH_COND_STABLE;
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
ret = FLUSH_COND_STABLE; ret = FLUSH_COND_STABLE;
return ret; return ret;
...@@ -737,7 +736,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) ...@@ -737,7 +736,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
head = req->wb_head; head = req->wb_head;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (likely(!PageSwapCache(head->wb_page))) { if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
set_page_private(head->wb_page, 0); set_page_private(head->wb_page, 0);
ClearPagePrivate(head->wb_page); ClearPagePrivate(head->wb_page);
smp_mb__after_atomic(); smp_mb__after_atomic();
...@@ -759,7 +758,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) ...@@ -759,7 +758,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
static void static void
nfs_mark_request_dirty(struct nfs_page *req) nfs_mark_request_dirty(struct nfs_page *req)
{ {
__set_page_dirty_nobuffers(req->wb_page); if (req->wb_page)
__set_page_dirty_nobuffers(req->wb_page);
} }
/* /*
...@@ -804,7 +804,7 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, ...@@ -804,7 +804,7 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
* number of outstanding requests requiring a commit as well as * number of outstanding requests requiring a commit as well as
* the MM page stats. * the MM page stats.
* *
* The caller must hold the cinfo->lock, and the nfs_page lock. * The caller must hold cinfo->inode->i_lock, and the nfs_page lock.
*/ */
void void
nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
...@@ -832,10 +832,11 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); ...@@ -832,10 +832,11 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
void void
nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{ {
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
nfs_mark_page_unstable(req->wb_page, cinfo); if (req->wb_page)
nfs_mark_page_unstable(req->wb_page, cinfo);
} }
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
...@@ -864,7 +865,7 @@ EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); ...@@ -864,7 +865,7 @@ EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode) struct inode *inode)
{ {
cinfo->lock = &inode->i_lock; cinfo->inode = inode;
cinfo->mds = &NFS_I(inode)->commit_info; cinfo->mds = &NFS_I(inode)->commit_info;
cinfo->ds = pnfs_get_ds_info(inode); cinfo->ds = pnfs_get_ds_info(inode);
cinfo->dreq = NULL; cinfo->dreq = NULL;
...@@ -967,7 +968,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo) ...@@ -967,7 +968,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
return cinfo->mds->ncommit; return cinfo->mds->ncommit;
} }
/* cinfo->lock held by caller */ /* cinfo->inode->i_lock held by caller */
int int
nfs_scan_commit_list(struct list_head *src, struct list_head *dst, nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max) struct nfs_commit_info *cinfo, int max)
...@@ -979,7 +980,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, ...@@ -979,7 +980,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
if (!nfs_lock_request(req)) if (!nfs_lock_request(req))
continue; continue;
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
if (cond_resched_lock(cinfo->lock)) if (cond_resched_lock(&cinfo->inode->i_lock))
list_safe_reset_next(req, tmp, wb_list); list_safe_reset_next(req, tmp, wb_list);
nfs_request_remove_commit_list(req, cinfo); nfs_request_remove_commit_list(req, cinfo);
nfs_list_add_request(req, dst); nfs_list_add_request(req, dst);
...@@ -1005,7 +1006,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, ...@@ -1005,7 +1006,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
{ {
int ret = 0; int ret = 0;
spin_lock(cinfo->lock); spin_lock(&cinfo->inode->i_lock);
if (cinfo->mds->ncommit > 0) { if (cinfo->mds->ncommit > 0) {
const int max = INT_MAX; const int max = INT_MAX;
...@@ -1013,7 +1014,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, ...@@ -1013,7 +1014,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
cinfo, max); cinfo, max);
ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
} }
spin_unlock(cinfo->lock); spin_unlock(&cinfo->inode->i_lock);
return ret; return ret;
} }
...@@ -1709,6 +1710,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, ...@@ -1709,6 +1710,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
{ {
struct nfs_commit_data *data; struct nfs_commit_data *data;
/* another commit raced with us */
if (list_empty(head))
return 0;
data = nfs_commitdata_alloc(); data = nfs_commitdata_alloc();
if (!data) if (!data)
...@@ -1724,6 +1729,36 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, ...@@ -1724,6 +1729,36 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
return -ENOMEM; return -ENOMEM;
} }
int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
{
struct inode *inode = file_inode(file);
struct nfs_open_context *open;
struct nfs_commit_info cinfo;
struct nfs_page *req;
int ret;
open = get_nfs_open_context(nfs_file_open_context(file));
req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
if (IS_ERR(req)) {
ret = PTR_ERR(req);
goto out_put;
}
nfs_init_cinfo_from_inode(&cinfo, inode);
memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
nfs_request_add_commit_list(req, &cinfo);
ret = nfs_commit_inode(inode, FLUSH_SYNC);
if (ret > 0)
ret = 0;
nfs_free_request(req);
out_put:
put_nfs_open_context(open);
return ret;
}
EXPORT_SYMBOL_GPL(nfs_commit_file);
/* /*
* COMMIT call returned * COMMIT call returned
*/ */
...@@ -1748,7 +1783,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) ...@@ -1748,7 +1783,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
while (!list_empty(&data->pages)) { while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next); req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req); nfs_list_remove_request(req);
nfs_clear_page_commit(req->wb_page); if (req->wb_page)
nfs_clear_page_commit(req->wb_page);
dprintk("NFS: commit (%s/%llu %d@%lld)", dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id, req->wb_context->dentry->d_sb->s_id,
......
...@@ -28,5 +28,6 @@ ...@@ -28,5 +28,6 @@
#define EBADTYPE 527 /* Type not supported by server */ #define EBADTYPE 527 /* Type not supported by server */
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */
#define ERECALLCONFLICT 530 /* conflict with recalled state */
#endif #endif
...@@ -50,12 +50,27 @@ struct nfs4_label { ...@@ -50,12 +50,27 @@ struct nfs4_label {
typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
struct nfs_stateid4 { struct nfs4_stateid_struct {
__be32 seqid; union {
char other[NFS4_STATEID_OTHER_SIZE]; char data[NFS4_STATEID_SIZE];
} __attribute__ ((packed)); struct {
__be32 seqid;
char other[NFS4_STATEID_OTHER_SIZE];
} __attribute__ ((packed));
};
enum {
NFS4_INVALID_STATEID_TYPE = 0,
NFS4_SPECIAL_STATEID_TYPE,
NFS4_OPEN_STATEID_TYPE,
NFS4_LOCK_STATEID_TYPE,
NFS4_DELEGATION_STATEID_TYPE,
NFS4_LAYOUT_STATEID_TYPE,
NFS4_PNFS_DS_STATEID_TYPE,
} type;
};
typedef struct nfs_stateid4 nfs4_stateid; typedef struct nfs4_stateid_struct nfs4_stateid;
enum nfs_opnum4 { enum nfs_opnum4 {
OP_ACCESS = 3, OP_ACCESS = 3,
...@@ -504,6 +519,7 @@ enum { ...@@ -504,6 +519,7 @@ enum {
NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_CLONE,
NFSPROC4_CLNT_COPY,
}; };
/* nfs41 types */ /* nfs41 types */
...@@ -621,7 +637,9 @@ enum pnfs_update_layout_reason { ...@@ -621,7 +637,9 @@ enum pnfs_update_layout_reason {
PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, PNFS_UPDATE_LAYOUT_IO_TEST_FAIL,
PNFS_UPDATE_LAYOUT_FOUND_CACHED, PNFS_UPDATE_LAYOUT_FOUND_CACHED,
PNFS_UPDATE_LAYOUT_RETURN, PNFS_UPDATE_LAYOUT_RETURN,
PNFS_UPDATE_LAYOUT_RETRY,
PNFS_UPDATE_LAYOUT_BLOCKED, PNFS_UPDATE_LAYOUT_BLOCKED,
PNFS_UPDATE_LAYOUT_INVALID_OPEN,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
}; };
......
...@@ -246,5 +246,6 @@ struct nfs_server { ...@@ -246,5 +246,6 @@ struct nfs_server {
#define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22) #define NFS_CAP_LAYOUTSTATS (1U << 22)
#define NFS_CAP_CLONE (1U << 23) #define NFS_CAP_CLONE (1U << 23)
#define NFS_CAP_COPY (1U << 24)
#endif #endif
...@@ -233,7 +233,6 @@ struct nfs4_layoutget_args { ...@@ -233,7 +233,6 @@ struct nfs4_layoutget_args {
struct inode *inode; struct inode *inode;
struct nfs_open_context *ctx; struct nfs_open_context *ctx;
nfs4_stateid stateid; nfs4_stateid stateid;
unsigned long timestamp;
struct nfs4_layoutdriver_data layout; struct nfs4_layoutdriver_data layout;
}; };
...@@ -251,7 +250,6 @@ struct nfs4_layoutget { ...@@ -251,7 +250,6 @@ struct nfs4_layoutget {
struct nfs4_layoutget_res res; struct nfs4_layoutget_res res;
struct rpc_cred *cred; struct rpc_cred *cred;
gfp_t gfp_flags; gfp_t gfp_flags;
long timeout;
}; };
struct nfs4_getdeviceinfo_args { struct nfs4_getdeviceinfo_args {
...@@ -1343,6 +1341,32 @@ struct nfs42_falloc_res { ...@@ -1343,6 +1341,32 @@ struct nfs42_falloc_res {
const struct nfs_server *falloc_server; const struct nfs_server *falloc_server;
}; };
struct nfs42_copy_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh *src_fh;
nfs4_stateid src_stateid;
u64 src_pos;
struct nfs_fh *dst_fh;
nfs4_stateid dst_stateid;
u64 dst_pos;
u64 count;
};
struct nfs42_write_res {
u64 count;
struct nfs_writeverf verifier;
};
struct nfs42_copy_res {
struct nfs4_sequence_res seq_res;
struct nfs42_write_res write_res;
bool consecutive;
bool synchronous;
};
struct nfs42_seek_args { struct nfs42_seek_args {
struct nfs4_sequence_args seq_args; struct nfs4_sequence_args seq_args;
...@@ -1431,7 +1455,7 @@ struct nfs_commit_completion_ops { ...@@ -1431,7 +1455,7 @@ struct nfs_commit_completion_ops {
}; };
struct nfs_commit_info { struct nfs_commit_info {
spinlock_t *lock; /* inode->i_lock */ struct inode *inode; /* Needed for inode->i_lock */
struct nfs_mds_commit_info *mds; struct nfs_mds_commit_info *mds;
struct pnfs_ds_commit_info *ds; struct pnfs_ds_commit_info *ds;
struct nfs_direct_req *dreq; /* O_DIRECT request */ struct nfs_direct_req *dreq; /* O_DIRECT request */
......
...@@ -127,7 +127,7 @@ struct rpc_authops { ...@@ -127,7 +127,7 @@ struct rpc_authops {
void (*destroy)(struct rpc_auth *); void (*destroy)(struct rpc_auth *);
struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t);
int (*list_pseudoflavors)(rpc_authflavor_t *, int); int (*list_pseudoflavors)(rpc_authflavor_t *, int);
rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *);
int (*flavor2info)(rpc_authflavor_t, int (*flavor2info)(rpc_authflavor_t,
...@@ -167,6 +167,7 @@ void rpc_destroy_authunix(void); ...@@ -167,6 +167,7 @@ void rpc_destroy_authunix(void);
struct rpc_cred * rpc_lookup_cred(void); struct rpc_cred * rpc_lookup_cred(void);
struct rpc_cred * rpc_lookup_cred_nonblock(void); struct rpc_cred * rpc_lookup_cred_nonblock(void);
struct rpc_cred * rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t);
struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name);
int rpcauth_register(const struct rpc_authops *); int rpcauth_register(const struct rpc_authops *);
int rpcauth_unregister(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *);
...@@ -178,7 +179,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, ...@@ -178,7 +179,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t,
int rpcauth_get_gssinfo(rpc_authflavor_t, int rpcauth_get_gssinfo(rpc_authflavor_t,
struct rpcsec_gss_info *); struct rpcsec_gss_info *);
int rpcauth_list_flavors(rpc_authflavor_t *, int); int rpcauth_list_flavors(rpc_authflavor_t *, int);
struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t);
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
...@@ -201,9 +202,28 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); ...@@ -201,9 +202,28 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *);
static inline static inline
struct rpc_cred * get_rpccred(struct rpc_cred *cred) struct rpc_cred * get_rpccred(struct rpc_cred *cred)
{ {
atomic_inc(&cred->cr_count); if (cred != NULL)
atomic_inc(&cred->cr_count);
return cred; return cred;
} }
/**
* get_rpccred_rcu - get a reference to a cred using rcu-protected pointer
* @cred: cred of which to take a reference
*
* In some cases, we may have a pointer to a credential to which we
* want to take a reference, but don't already have one. Because these
* objects are freed using RCU, we can access the cr_count while its
* on its way to destruction and only take a reference if it's not already
* zero.
*/
static inline struct rpc_cred *
get_rpccred_rcu(struct rpc_cred *cred)
{
if (atomic_inc_not_zero(&cred->cr_count))
return cred;
return NULL;
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_AUTH_H */ #endif /* _LINUX_SUNRPC_AUTH_H */
...@@ -176,6 +176,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); ...@@ -176,6 +176,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
int rpc_protocol(struct rpc_clnt *); int rpc_protocol(struct rpc_clnt *);
struct net * rpc_net_ns(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *);
size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *);
size_t rpc_max_bc_payload(struct rpc_clnt *);
unsigned long rpc_get_timeout(struct rpc_clnt *clnt); unsigned long rpc_get_timeout(struct rpc_clnt *clnt);
void rpc_force_rebind(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *);
size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
......
...@@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr; ...@@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr;
/* /*
* Note that RFC 1833 does not put any size restrictions on the * Note that RFC 1833 does not put any size restrictions on the
* netid string, but all currently defined netid's fit in 4 bytes. * netid string, but all currently defined netid's fit in 5 bytes.
*/ */
#define RPCBIND_MAXNETIDLEN (4u) #define RPCBIND_MAXNETIDLEN (5u)
/* /*
* Universal addresses are introduced in RFC 1833 and further spelled * Universal addresses are introduced in RFC 1833 and further spelled
......
...@@ -142,6 +142,7 @@ struct rpc_xprt_ops { ...@@ -142,6 +142,7 @@ struct rpc_xprt_ops {
int (*bc_setup)(struct rpc_xprt *xprt, int (*bc_setup)(struct rpc_xprt *xprt,
unsigned int min_reqs); unsigned int min_reqs);
int (*bc_up)(struct svc_serv *serv, struct net *net); int (*bc_up)(struct svc_serv *serv, struct net *net);
size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_free_rqst)(struct rpc_rqst *rqst);
void (*bc_destroy)(struct rpc_xprt *xprt, void (*bc_destroy)(struct rpc_xprt *xprt,
unsigned int max_reqs); unsigned int max_reqs);
......
...@@ -52,7 +52,9 @@ ...@@ -52,7 +52,9 @@
#define RPCRDMA_DEF_SLOT_TABLE (128U) #define RPCRDMA_DEF_SLOT_TABLE (128U)
#define RPCRDMA_MAX_SLOT_TABLE (256U) #define RPCRDMA_MAX_SLOT_TABLE (256U)
#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */
#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */
#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */
/* Memory registration strategies, by number. /* Memory registration strategies, by number.
* This is part of a kernel / user space API. Do not remove. */ * This is part of a kernel / user space API. Do not remove. */
......
...@@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void) ...@@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void)
*/ */
struct rpc_cred * struct rpc_cred *
rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
int flags) int flags, gfp_t gfp)
{ {
LIST_HEAD(free); LIST_HEAD(free);
struct rpc_cred_cache *cache = auth->au_credcache; struct rpc_cred_cache *cache = auth->au_credcache;
...@@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, ...@@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (flags & RPCAUTH_LOOKUP_RCU) if (flags & RPCAUTH_LOOKUP_RCU)
return ERR_PTR(-ECHILD); return ERR_PTR(-ECHILD);
new = auth->au_ops->crcreate(auth, acred, flags); new = auth->au_ops->crcreate(auth, acred, flags, gfp);
if (IS_ERR(new)) { if (IS_ERR(new)) {
cred = new; cred = new;
goto out; goto out;
...@@ -703,8 +703,7 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) ...@@ -703,8 +703,7 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
new = rpcauth_bind_new_cred(task, lookupflags); new = rpcauth_bind_new_cred(task, lookupflags);
if (IS_ERR(new)) if (IS_ERR(new))
return PTR_ERR(new); return PTR_ERR(new);
if (req->rq_cred != NULL) put_rpccred(req->rq_cred);
put_rpccred(req->rq_cred);
req->rq_cred = new; req->rq_cred = new;
return 0; return 0;
} }
...@@ -712,6 +711,8 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) ...@@ -712,6 +711,8 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
void void
put_rpccred(struct rpc_cred *cred) put_rpccred(struct rpc_cred *cred)
{ {
if (cred == NULL)
return;
/* Fast path for unhashed credentials */ /* Fast path for unhashed credentials */
if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
if (atomic_dec_and_test(&cred->cr_count)) if (atomic_dec_and_test(&cred->cr_count))
......
...@@ -38,6 +38,13 @@ struct rpc_cred *rpc_lookup_cred(void) ...@@ -38,6 +38,13 @@ struct rpc_cred *rpc_lookup_cred(void)
} }
EXPORT_SYMBOL_GPL(rpc_lookup_cred); EXPORT_SYMBOL_GPL(rpc_lookup_cred);
struct rpc_cred *
rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
{
return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
}
EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
struct rpc_cred *rpc_lookup_cred_nonblock(void) struct rpc_cred *rpc_lookup_cred_nonblock(void)
{ {
return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
...@@ -77,15 +84,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task, ...@@ -77,15 +84,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
static struct rpc_cred * static struct rpc_cred *
generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{ {
return rpcauth_lookup_credcache(&generic_auth, acred, flags); return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
} }
static struct rpc_cred * static struct rpc_cred *
generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{ {
struct generic_cred *gcred; struct generic_cred *gcred;
gcred = kmalloc(sizeof(*gcred), GFP_KERNEL); gcred = kmalloc(sizeof(*gcred), gfp);
if (gcred == NULL) if (gcred == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred) ...@@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred)
static struct rpc_cred * static struct rpc_cred *
gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{ {
return rpcauth_lookup_credcache(auth, acred, flags); return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
} }
static struct rpc_cred * static struct rpc_cred *
gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{ {
struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
struct gss_cred *cred = NULL; struct gss_cred *cred = NULL;
...@@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) ...@@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
__func__, from_kuid(&init_user_ns, acred->uid), __func__, from_kuid(&init_user_ns, acred->uid),
auth->au_flavor); auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err; goto out_err;
rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
......
...@@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth) ...@@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth)
static struct rpc_cred * static struct rpc_cred *
unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{ {
return rpcauth_lookup_credcache(auth, acred, flags); return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
} }
static struct rpc_cred * static struct rpc_cred *
unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{ {
struct unx_cred *cred; struct unx_cred *cred;
unsigned int groups = 0; unsigned int groups = 0;
...@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) ...@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
from_kuid(&init_user_ns, acred->uid), from_kuid(&init_user_ns, acred->uid),
from_kgid(&init_user_ns, acred->gid)); from_kgid(&init_user_ns, acred->gid));
if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) if (!(cred = kmalloc(sizeof(*cred), gfp)))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
......
...@@ -1413,6 +1413,23 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) ...@@ -1413,6 +1413,23 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
} }
EXPORT_SYMBOL_GPL(rpc_max_payload); EXPORT_SYMBOL_GPL(rpc_max_payload);
/**
* rpc_max_bc_payload - Get maximum backchannel payload size, in bytes
* @clnt: RPC client to query
*/
size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
size_t ret;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
ret = xprt->ops->bc_maxpayload(xprt);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
/** /**
* rpc_get_timeout - Get timeout for transport in units of HZ * rpc_get_timeout - Get timeout for transport in units of HZ
* @clnt: RPC client to query * @clnt: RPC client to query
......
...@@ -797,6 +797,8 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) ...@@ -797,6 +797,8 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr_set_iov(xdr, buf->head, buf->len); xdr_set_iov(xdr, buf->head, buf->len);
else if (buf->page_len != 0) else if (buf->page_len != 0)
xdr_set_page_base(xdr, 0, buf->len); xdr_set_page_base(xdr, 0, buf->len);
else
xdr_set_iov(xdr, buf->head, buf->len);
if (p != NULL && p > xdr->p && xdr->end >= p) { if (p != NULL && p > xdr->p && xdr->end >= p) {
xdr->nwords -= p - xdr->p; xdr->nwords -= p - xdr->p;
xdr->p = p; xdr->p = p;
......
...@@ -191,6 +191,22 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) ...@@ -191,6 +191,22 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
return 0; return 0;
} }
/**
* xprt_rdma_bc_maxpayload - Return maximum backchannel message size
* @xprt: transport
*
* Returns maximum size, in bytes, of a backchannel message
*/
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
size_t maxmsg;
maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
/** /**
* rpcrdma_bc_marshal_reply - Send backwards direction reply * rpcrdma_bc_marshal_reply - Send backwards direction reply
* @rqst: buffer containing RPC reply data * @rqst: buffer containing RPC reply data
......
...@@ -35,10 +35,71 @@ ...@@ -35,10 +35,71 @@
/* Maximum scatter/gather per FMR */ /* Maximum scatter/gather per FMR */
#define RPCRDMA_MAX_FMR_SGES (64) #define RPCRDMA_MAX_FMR_SGES (64)
static struct workqueue_struct *fmr_recovery_wq;
#define FMR_RECOVERY_WQ_FLAGS (WQ_UNBOUND)
int
fmr_alloc_recovery_wq(void)
{
fmr_recovery_wq = alloc_workqueue("fmr_recovery", WQ_UNBOUND, 0);
return !fmr_recovery_wq ? -ENOMEM : 0;
}
void
fmr_destroy_recovery_wq(void)
{
struct workqueue_struct *wq;
if (!fmr_recovery_wq)
return;
wq = fmr_recovery_wq;
fmr_recovery_wq = NULL;
destroy_workqueue(wq);
}
static int
__fmr_unmap(struct rpcrdma_mw *mw)
{
LIST_HEAD(l);
list_add(&mw->fmr.fmr->list, &l);
return ib_unmap_fmr(&l);
}
/* Deferred reset of a single FMR. Generate a fresh rkey by
* replacing the MR. There's no recovery if this fails.
*/
static void
__fmr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_mw *mw = container_of(work, struct rpcrdma_mw,
mw_work);
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
__fmr_unmap(mw);
rpcrdma_put_mw(r_xprt, mw);
return;
}
/* A broken MR was discovered in a context that can't sleep.
* Defer recovery to the recovery worker.
*/
static void
__fmr_queue_recovery(struct rpcrdma_mw *mw)
{
INIT_WORK(&mw->mw_work, __fmr_recovery_worker);
queue_work(fmr_recovery_wq, &mw->mw_work);
}
static int static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata) struct rpcrdma_create_data_internal *cdata)
{ {
rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES));
return 0; return 0;
} }
...@@ -48,7 +109,7 @@ static size_t ...@@ -48,7 +109,7 @@ static size_t
fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
{ {
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
} }
static int static int
...@@ -89,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) ...@@ -89,6 +150,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
if (IS_ERR(r->fmr.fmr)) if (IS_ERR(r->fmr.fmr))
goto out_fmr_err; goto out_fmr_err;
r->mw_xprt = r_xprt;
list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_list, &buf->rb_mws);
list_add(&r->mw_all, &buf->rb_all); list_add(&r->mw_all, &buf->rb_all);
} }
...@@ -104,15 +166,6 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) ...@@ -104,15 +166,6 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
return rc; return rc;
} }
static int
__fmr_unmap(struct rpcrdma_mw *r)
{
LIST_HEAD(l);
list_add(&r->fmr.fmr->list, &l);
return ib_unmap_fmr(&l);
}
/* Use the ib_map_phys_fmr() verb to register a memory region /* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE. * for remote access via RDMA READ or RDMA WRITE.
*/ */
...@@ -183,15 +236,10 @@ static void ...@@ -183,15 +236,10 @@ static void
__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{ {
struct ib_device *device = r_xprt->rx_ia.ri_device; struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
int nsegs = seg->mr_nsegs; int nsegs = seg->mr_nsegs;
seg->rl_mw = NULL;
while (nsegs--) while (nsegs--)
rpcrdma_unmap_one(device, seg++); rpcrdma_unmap_one(device, seg++);
rpcrdma_put_mw(r_xprt, mw);
} }
/* Invalidate all memory regions that were registered for "req". /* Invalidate all memory regions that were registered for "req".
...@@ -234,42 +282,50 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) ...@@ -234,42 +282,50 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
seg = &req->rl_segments[i]; seg = &req->rl_segments[i];
__fmr_dma_unmap(r_xprt, seg); __fmr_dma_unmap(r_xprt, seg);
rpcrdma_put_mw(r_xprt, seg->rl_mw);
i += seg->mr_nsegs; i += seg->mr_nsegs;
seg->mr_nsegs = 0; seg->mr_nsegs = 0;
seg->rl_mw = NULL;
} }
req->rl_nchunks = 0; req->rl_nchunks = 0;
} }
/* Use the ib_unmap_fmr() verb to prevent further remote /* Use a slow, safe mechanism to invalidate all memory regions
* access via RDMA READ or RDMA WRITE. * that were registered for "req".
*
* In the asynchronous case, DMA unmapping occurs first here
* because the rpcrdma_mr_seg is released immediately after this
* call. It's contents won't be available in __fmr_dma_unmap later.
* FIXME.
*/ */
static int static void
fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{ {
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_mw *mw;
struct rpcrdma_mw *mw = seg1->rl_mw; unsigned int i;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FMR %p\n", __func__, mw); for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
seg = &req->rl_segments[i];
mw = seg->rl_mw;
seg1->rl_mw = NULL; if (sync) {
while (seg1->mr_nsegs--) /* ORDER */
rpcrdma_unmap_one(ia->ri_device, seg++); __fmr_unmap(mw);
rc = __fmr_unmap(mw); __fmr_dma_unmap(r_xprt, seg);
if (rc) rpcrdma_put_mw(r_xprt, mw);
goto out_err; } else {
rpcrdma_put_mw(r_xprt, mw); __fmr_dma_unmap(r_xprt, seg);
return nsegs; __fmr_queue_recovery(mw);
}
out_err: i += seg->mr_nsegs;
/* The FMR is abandoned, but remains in rb_all. fmr_op_destroy seg->mr_nsegs = 0;
* will attempt to release it when the transport is destroyed. seg->rl_mw = NULL;
*/ }
dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
return nsegs;
} }
static void static void
...@@ -295,7 +351,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) ...@@ -295,7 +351,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map, .ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync, .ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap = fmr_op_unmap, .ro_unmap_safe = fmr_op_unmap_safe,
.ro_open = fmr_op_open, .ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages, .ro_maxpages = fmr_op_maxpages,
.ro_init = fmr_op_init, .ro_init = fmr_op_init,
......
This diff is collapsed.
...@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
__func__, PTR_ERR(mr)); __func__, PTR_ERR(mr));
return -ENOMEM; return -ENOMEM;
} }
ia->ri_dma_mr = mr; ia->ri_dma_mr = mr;
rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int,
RPCRDMA_MAX_DATA_SEGS,
RPCRDMA_MAX_HDR_SEGS));
return 0; return 0;
} }
...@@ -47,7 +50,7 @@ static size_t ...@@ -47,7 +50,7 @@ static size_t
physical_op_maxpages(struct rpcrdma_xprt *r_xprt) physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
{ {
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
rpcrdma_max_segments(r_xprt)); RPCRDMA_MAX_HDR_SEGS);
} }
static int static int
...@@ -71,17 +74,6 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -71,17 +74,6 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return 1; return 1;
} }
/* Unmap a memory region, but leave it registered.
*/
static int
physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
rpcrdma_unmap_one(ia->ri_device, seg);
return 1;
}
/* DMA unmap all memory regions that were mapped for "req". /* DMA unmap all memory regions that were mapped for "req".
*/ */
static void static void
...@@ -94,6 +86,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) ...@@ -94,6 +86,25 @@ physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
rpcrdma_unmap_one(device, &req->rl_segments[i++]); rpcrdma_unmap_one(device, &req->rl_segments[i++]);
} }
/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for "req".
*
* For physical memory registration, there is no good way to
* fence a single MR that has been advertised to the server. The
* client has already handed the server an R_key that cannot be
* invalidated and is shared by all MRs on this connection.
* Tearing down the PD might be the only safe choice, but it's
* not clear that a freshly acquired DMA R_key would be different
* than the one used by the PD that was just destroyed.
* FIXME.
*/
static void
physical_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
bool sync)
{
physical_op_unmap_sync(r_xprt, req);
}
static void static void
physical_op_destroy(struct rpcrdma_buffer *buf) physical_op_destroy(struct rpcrdma_buffer *buf)
{ {
...@@ -102,7 +113,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) ...@@ -102,7 +113,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map, .ro_map = physical_op_map,
.ro_unmap_sync = physical_op_unmap_sync, .ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap = physical_op_unmap, .ro_unmap_safe = physical_op_unmap_safe,
.ro_open = physical_op_open, .ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages, .ro_maxpages = physical_op_maxpages,
.ro_init = physical_op_init, .ro_init = physical_op_init,
......
This diff is collapsed.
...@@ -73,6 +73,8 @@ static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; ...@@ -73,6 +73,8 @@ static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
static unsigned int zero; static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE; static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
...@@ -96,6 +98,8 @@ static struct ctl_table xr_tunables_table[] = { ...@@ -96,6 +98,8 @@ static struct ctl_table xr_tunables_table[] = {
.maxlen = sizeof(unsigned int), .maxlen = sizeof(unsigned int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
}, },
{ {
.procname = "rdma_max_inline_write", .procname = "rdma_max_inline_write",
...@@ -103,6 +107,8 @@ static struct ctl_table xr_tunables_table[] = { ...@@ -103,6 +107,8 @@ static struct ctl_table xr_tunables_table[] = {
.maxlen = sizeof(unsigned int), .maxlen = sizeof(unsigned int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
}, },
{ {
.procname = "rdma_inline_write_padding", .procname = "rdma_inline_write_padding",
...@@ -508,6 +514,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) ...@@ -508,6 +514,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
out: out:
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
req->rl_connect_cookie = 0; /* our reserved value */ req->rl_connect_cookie = 0; /* our reserved value */
req->rl_task = task;
return req->rl_sendbuf->rg_base; return req->rl_sendbuf->rg_base;
out_rdmabuf: out_rdmabuf:
...@@ -564,7 +571,6 @@ xprt_rdma_free(void *buffer) ...@@ -564,7 +571,6 @@ xprt_rdma_free(void *buffer)
struct rpcrdma_req *req; struct rpcrdma_req *req;
struct rpcrdma_xprt *r_xprt; struct rpcrdma_xprt *r_xprt;
struct rpcrdma_regbuf *rb; struct rpcrdma_regbuf *rb;
int i;
if (buffer == NULL) if (buffer == NULL)
return; return;
...@@ -578,11 +584,8 @@ xprt_rdma_free(void *buffer) ...@@ -578,11 +584,8 @@ xprt_rdma_free(void *buffer)
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
for (i = 0; req->rl_nchunks;) { r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req,
--req->rl_nchunks; !RPC_IS_ASYNC(req->rl_task));
i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
&req->rl_segments[i]);
}
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
...@@ -707,6 +710,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { ...@@ -707,6 +710,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
#if defined(CONFIG_SUNRPC_BACKCHANNEL) #if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup, .bc_setup = xprt_rdma_bc_setup,
.bc_up = xprt_rdma_bc_up, .bc_up = xprt_rdma_bc_up,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
.bc_free_rqst = xprt_rdma_bc_free_rqst, .bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy, .bc_destroy = xprt_rdma_bc_destroy,
#endif #endif
......
...@@ -203,15 +203,6 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc) ...@@ -203,15 +203,6 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
goto out_schedule; goto out_schedule;
} }
static void
rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
{
struct ib_wc wc;
while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
rpcrdma_receive_wc(NULL, &wc);
}
static int static int
rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
{ {
...@@ -373,23 +364,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -373,23 +364,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return ERR_PTR(rc); return ERR_PTR(rc);
} }
/*
* Drain any cq, prior to teardown.
*/
static void
rpcrdma_clean_cq(struct ib_cq *cq)
{
struct ib_wc wc;
int count = 0;
while (1 == ib_poll_cq(cq, 1, &wc))
++count;
if (count)
dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
__func__, count, wc.opcode);
}
/* /*
* Exported functions. * Exported functions.
*/ */
...@@ -459,7 +433,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ...@@ -459,7 +433,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
dprintk("RPC: %s: memory registration strategy is '%s'\n", dprintk("RPC: %s: memory registration strategy is '%s'\n",
__func__, ia->ri_ops->ro_displayname); __func__, ia->ri_ops->ro_displayname);
rwlock_init(&ia->ri_qplock);
return 0; return 0;
out3: out3:
...@@ -515,7 +488,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -515,7 +488,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
__func__); __func__);
return -ENOMEM; return -ENOMEM;
} }
max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS; max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
/* check provider's send/recv wr limits */ /* check provider's send/recv wr limits */
if (cdata->max_requests > max_qp_wr) if (cdata->max_requests > max_qp_wr)
...@@ -526,11 +499,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -526,11 +499,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL; ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests; ep->rep_attr.cap.max_send_wr = cdata->max_requests;
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
rc = ia->ri_ops->ro_open(ia, ep, cdata); rc = ia->ri_ops->ro_open(ia, ep, cdata);
if (rc) if (rc)
return rc; return rc;
ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.cap.max_inline_data = 0;
...@@ -578,6 +553,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -578,6 +553,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.recv_cq = recvcq; ep->rep_attr.recv_cq = recvcq;
/* Initialize cma parameters */ /* Initialize cma parameters */
memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
/* RPC/RDMA does not use private data */ /* RPC/RDMA does not use private data */
ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data = NULL;
...@@ -591,7 +567,16 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -591,7 +567,16 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_remote_cma.responder_resources = ep->rep_remote_cma.responder_resources =
ia->ri_device->attrs.max_qp_rd_atom; ia->ri_device->attrs.max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7; /* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
* transport connection and retransmission.
*/
ep->rep_remote_cma.retry_count = 6;
/* RPC-over-RDMA handles its own flow control. In addition,
* make all RNR NAKs visible so we know that RPC-over-RDMA
* flow control is working correctly (no NAKs should be seen).
*/
ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.flow_control = 0;
ep->rep_remote_cma.rnr_retry_count = 0; ep->rep_remote_cma.rnr_retry_count = 0;
...@@ -622,13 +607,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -622,13 +607,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
cancel_delayed_work_sync(&ep->rep_connect_worker); cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp)
rpcrdma_ep_disconnect(ep, ia);
rpcrdma_clean_cq(ep->rep_attr.recv_cq);
rpcrdma_clean_cq(ep->rep_attr.send_cq);
if (ia->ri_id->qp) { if (ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id); rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL; ia->ri_id->qp = NULL;
} }
...@@ -659,7 +639,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -659,7 +639,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: reconnecting...\n", __func__); dprintk("RPC: %s: reconnecting...\n", __func__);
rpcrdma_ep_disconnect(ep, ia); rpcrdma_ep_disconnect(ep, ia);
rpcrdma_flush_cqs(ep);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia, id = rpcrdma_create_id(xprt, ia,
...@@ -692,10 +671,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -692,10 +671,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
goto out; goto out;
} }
write_lock(&ia->ri_qplock);
old = ia->ri_id; old = ia->ri_id;
ia->ri_id = id; ia->ri_id = id;
write_unlock(&ia->ri_qplock);
rdma_destroy_qp(old); rdma_destroy_qp(old);
rpcrdma_destroy_id(old); rpcrdma_destroy_id(old);
...@@ -785,7 +762,6 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -785,7 +762,6 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{ {
int rc; int rc;
rpcrdma_flush_cqs(ep);
rc = rdma_disconnect(ia->ri_id); rc = rdma_disconnect(ia->ri_id);
if (!rc) { if (!rc) {
/* returns without wait if not connected */ /* returns without wait if not connected */
...@@ -797,6 +773,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -797,6 +773,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
ep->rep_connected = rc; ep->rep_connected = rc;
} }
ib_drain_qp(ia->ri_id->qp);
} }
struct rpcrdma_req * struct rpcrdma_req *
...@@ -1271,25 +1249,3 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1271,25 +1249,3 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
rpcrdma_recv_buffer_put(rep); rpcrdma_recv_buffer_put(rep);
return rc; return rc;
} }
/* How many chunk list items fit within our inline buffers?
*/
unsigned int
rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
int bytes, segments;
bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
bytes -= RPCRDMA_HDRLEN_MIN;
if (bytes < sizeof(struct rpcrdma_segment) * 2) {
pr_warn("RPC: %s: inline threshold too small\n",
__func__);
return 0;
}
segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
dprintk("RPC: %s: max chunk list size = %d segments\n",
__func__, segments);
return segments;
}
...@@ -65,7 +65,6 @@ ...@@ -65,7 +65,6 @@
*/ */
struct rpcrdma_ia { struct rpcrdma_ia {
const struct rpcrdma_memreg_ops *ri_ops; const struct rpcrdma_memreg_ops *ri_ops;
rwlock_t ri_qplock;
struct ib_device *ri_device; struct ib_device *ri_device;
struct rdma_cm_id *ri_id; struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd; struct ib_pd *ri_pd;
...@@ -73,6 +72,8 @@ struct rpcrdma_ia { ...@@ -73,6 +72,8 @@ struct rpcrdma_ia {
struct completion ri_done; struct completion ri_done;
int ri_async_rc; int ri_async_rc;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
struct ib_qp_attr ri_qp_attr; struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr; struct ib_qp_init_attr ri_qp_init_attr;
}; };
...@@ -144,6 +145,26 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) ...@@ -144,6 +145,26 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
* the number of RDMA segments allowed in header chunk lists
* is capped at 8. This prevents less-capable devices and
* memory registrations from overrunning the Send buffer
* while building chunk lists.
*
* Elements of the Read list take up more room than the
* Write list or Reply chunk. 8 read segments means the Read
* list (or Write list or Reply chunk) cannot consume more
* than
*
* ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes.
*
* And the fixed part of the header is another 24 bytes.
*
* The smallest inline threshold is 1024 bytes, ensuring that
* at least 750 bytes are available for RPC messages.
*/
#define RPCRDMA_MAX_HDR_SEGS (8)
/* /*
* struct rpcrdma_rep -- this structure encapsulates state required to recv * struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of * and complete a reply, asychronously. It needs several pieces of
...@@ -162,7 +183,9 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) ...@@ -162,7 +183,9 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
*/ */
#define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE) #define RPCRDMA_MAX_DATA_SEGS ((1 * 1024 * 1024) / PAGE_SIZE)
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
/* data segments + head/tail for Call + head/tail for Reply */
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 4)
struct rpcrdma_buffer; struct rpcrdma_buffer;
...@@ -198,14 +221,13 @@ enum rpcrdma_frmr_state { ...@@ -198,14 +221,13 @@ enum rpcrdma_frmr_state {
}; };
struct rpcrdma_frmr { struct rpcrdma_frmr {
struct scatterlist *sg; struct scatterlist *fr_sg;
int sg_nents; int fr_nents;
enum dma_data_direction fr_dir;
struct ib_mr *fr_mr; struct ib_mr *fr_mr;
struct ib_cqe fr_cqe; struct ib_cqe fr_cqe;
enum rpcrdma_frmr_state fr_state; enum rpcrdma_frmr_state fr_state;
struct completion fr_linv_done; struct completion fr_linv_done;
struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt;
union { union {
struct ib_reg_wr fr_regwr; struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr; struct ib_send_wr fr_invwr;
...@@ -222,6 +244,8 @@ struct rpcrdma_mw { ...@@ -222,6 +244,8 @@ struct rpcrdma_mw {
struct rpcrdma_fmr fmr; struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr; struct rpcrdma_frmr frmr;
}; };
struct work_struct mw_work;
struct rpcrdma_xprt *mw_xprt;
struct list_head mw_list; struct list_head mw_list;
struct list_head mw_all; struct list_head mw_all;
}; };
...@@ -270,12 +294,14 @@ struct rpcrdma_req { ...@@ -270,12 +294,14 @@ struct rpcrdma_req {
unsigned int rl_niovs; unsigned int rl_niovs;
unsigned int rl_nchunks; unsigned int rl_nchunks;
unsigned int rl_connect_cookie; unsigned int rl_connect_cookie;
struct rpc_task *rl_task;
struct rpcrdma_buffer *rl_buffer; struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
struct rpcrdma_regbuf *rl_rdmabuf; struct rpcrdma_regbuf *rl_rdmabuf;
struct rpcrdma_regbuf *rl_sendbuf; struct rpcrdma_regbuf *rl_sendbuf;
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
struct rpcrdma_mr_seg *rl_nextseg;
struct ib_cqe rl_cqe; struct ib_cqe rl_cqe;
struct list_head rl_all; struct list_head rl_all;
...@@ -372,8 +398,8 @@ struct rpcrdma_memreg_ops { ...@@ -372,8 +398,8 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *, int, bool); struct rpcrdma_mr_seg *, int, bool);
void (*ro_unmap_sync)(struct rpcrdma_xprt *, void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct rpcrdma_req *); struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *, void (*ro_unmap_safe)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *); struct rpcrdma_req *, bool);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *, struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *); struct rpcrdma_create_data_internal *);
...@@ -456,7 +482,6 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, ...@@ -456,7 +482,6 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
void rpcrdma_free_regbuf(struct rpcrdma_ia *, void rpcrdma_free_regbuf(struct rpcrdma_ia *,
struct rpcrdma_regbuf *); struct rpcrdma_regbuf *);
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
int frwr_alloc_recovery_wq(void); int frwr_alloc_recovery_wq(void);
...@@ -519,6 +544,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); ...@@ -519,6 +544,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/ */
int rpcrdma_marshal_req(struct rpc_rqst *); int rpcrdma_marshal_req(struct rpc_rqst *);
void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *,
unsigned int);
/* RPC/RDMA module init - xprtrdma/transport.c /* RPC/RDMA module init - xprtrdma/transport.c
*/ */
...@@ -534,6 +562,7 @@ void xprt_rdma_cleanup(void); ...@@ -534,6 +562,7 @@ void xprt_rdma_cleanup(void);
#if defined(CONFIG_SUNRPC_BACKCHANNEL) #if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
int xprt_rdma_bc_up(struct svc_serv *, struct net *); int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int rpcrdma_bc_marshal_reply(struct rpc_rqst *); int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
......
...@@ -1364,6 +1364,11 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) ...@@ -1364,6 +1364,11 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
return ret; return ret;
return 0; return 0;
} }
static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
{
return PAGE_SIZE;
}
#else #else
static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
struct xdr_skb_reader *desc) struct xdr_skb_reader *desc)
...@@ -2661,6 +2666,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { ...@@ -2661,6 +2666,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
#ifdef CONFIG_SUNRPC_BACKCHANNEL #ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc, .bc_setup = xprt_setup_bc,
.bc_up = xs_tcp_bc_up, .bc_up = xs_tcp_bc_up,
.bc_maxpayload = xs_tcp_bc_maxpayload,
.bc_free_rqst = xprt_free_bc_rqst, .bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc, .bc_destroy = xprt_destroy_bc,
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment