Commit 75ff24fa authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.15' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:
   - server-side nfs/rdma fixes from Jeff Layton and Tom Tucker
   - xdr fixes (a larger xdr rewrite has been posted but I decided it
     would be better to queue it up for 3.16).
   - miscellaneous fixes and cleanup from all over (thanks especially to
     Kinglong Mee)"

* 'for-3.15' of git://linux-nfs.org/~bfields/linux: (36 commits)
  nfsd4: don't create unnecessary mask acl
  nfsd: revert v2 half of "nfsd: don't return high mode bits"
  nfsd4: fix memory leak in nfsd4_encode_fattr()
  nfsd: check passed socket's net matches NFSd superblock's one
  SUNRPC: Clear xpt_bc_xprt if xs_setup_bc_tcp failed
  NFSD/SUNRPC: Check rpc_xprt out of xs_setup_bc_tcp
  SUNRPC: New helper for creating client with rpc_xprt
  NFSD: Free backchannel xprt in bc_destroy
  NFSD: Clear wcc data between compound ops
  nfsd: Don't return NFS4ERR_STALE_STATEID for NFSv4.1+
  nfsd4: fix nfs4err_resource in 4.1 case
  nfsd4: fix setclientid encode size
  nfsd4: remove redundant check from nfsd4_check_resp_size
  nfsd4: use more generous NFS4_ACL_MAX
  nfsd4: minor nfsd4_replay_cache_entry cleanup
  nfsd4: nfsd4_replay_cache_entry should be static
  nfsd4: update comments with obsolete function name
  rpc: Allow xdr_buf_subsegment to operate in-place
  NFSD: Using free_conn free connection
  SUNRPC: fix memory leak of peer addresses in XPRT
  ...
parents 0f386a70 06f9cc12
......@@ -235,6 +235,7 @@ static int make_socks(struct svc_serv *serv, struct net *net)
if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
svc_shutdown_net(serv, net);
return err;
}
......
......@@ -39,9 +39,13 @@ struct nfs4_acl;
struct svc_fh;
struct svc_rqst;
/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
* fit in a page: */
#define NFS4_ACL_MAX 170
/*
* Maximum ACL we'll accept from a client; chosen (somewhat
* arbitrarily) so that kmalloc'ing the ACL shouldn't require a
* high-order allocation. This allows 204 ACEs on x86_64:
*/
#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
/ sizeof(struct nfs4_ace))
struct nfs4_acl *nfs4_acl_new(int);
int nfs4_acl_get_whotype(char *, u32);
......
......@@ -542,7 +542,10 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
* up setting a 3-element effective posix ACL with all
* permissions zero.
*/
nace = 4 + state->users->n + state->groups->n;
if (!state->users->n && !state->groups->n)
nace = 3;
else /* Note we also include a MASK ACE in this case: */
nace = 4 + state->users->n + state->groups->n;
pacl = posix_acl_alloc(nace, GFP_KERNEL);
if (!pacl)
return ERR_PTR(-ENOMEM);
......@@ -586,9 +589,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
add_to_mask(state, &state->groups->aces[i].perms);
}
pace++;
pace->e_tag = ACL_MASK;
low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
if (!state->users->n && !state->groups->n) {
pace++;
pace->e_tag = ACL_MASK;
low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
}
pace++;
pace->e_tag = ACL_OTHER;
......
......@@ -32,6 +32,7 @@
*/
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include "nfsd.h"
......@@ -635,6 +636,22 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
}
}
static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
if (args->protocol != XPRT_TRANSPORT_BC_TCP)
return rpc_create(args);
xprt = args->bc_xprt->xpt_bc_xprt;
if (xprt) {
xprt_get(xprt);
return rpc_create_xprt(args, xprt);
}
return rpc_create(args);
}
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
struct rpc_timeout timeparms = {
......@@ -674,7 +691,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
client = rpc_create(&args);
client = create_backchannel_client(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client: %ld\n",
PTR_ERR(client));
......
......@@ -1273,6 +1273,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
struct nfsd4_op *op;
struct nfsd4_operation *opdesc;
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
int slack_bytes;
u32 plen = 0;
__be32 status;
......@@ -1288,11 +1290,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
resp->tag = args->tag;
resp->opcnt = 0;
resp->rqstp = rqstp;
resp->cstate.minorversion = args->minorversion;
resp->cstate.replay_owner = NULL;
resp->cstate.session = NULL;
fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
cstate->minorversion = args->minorversion;
cstate->replay_owner = NULL;
cstate->session = NULL;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
......@@ -1345,20 +1347,28 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
opdesc = OPDESC(op);
if (!cstate->current_fh.fh_dentry) {
if (!current_fh->fh_dentry) {
if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
} else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
} else if (current_fh->fh_export->ex_fslocs.migrated &&
!(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
}
fh_clear_wcc(current_fh);
/* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
plen = opdesc->op_rsize_bop(rqstp, op);
/*
* If there's still another operation, make sure
* we'll have space to at least encode an error:
*/
if (resp->opcnt < args->opcnt)
plen += COMPOUND_ERR_SLACK_SPACE;
op->status = nfsd4_check_resp_size(resp, plen);
}
......@@ -1377,12 +1387,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
clear_current_stateid(cstate);
if (need_wrongsec_check(rqstp))
op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
encode_op:
/* Only from SEQUENCE */
if (resp->cstate.status == nfserr_replay_cache) {
if (cstate->status == nfserr_replay_cache) {
dprintk("%s NFS4.1 replay from cache\n", __func__);
status = op->status;
goto out;
......@@ -1411,10 +1421,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
nfsd4_increment_op_stats(op->opnum);
}
resp->cstate.status = status;
fh_put(&resp->cstate.current_fh);
fh_put(&resp->cstate.save_fh);
BUG_ON(resp->cstate.replay_owner);
cstate->status = status;
fh_put(current_fh);
fh_put(save_fh);
BUG_ON(cstate->replay_owner);
out:
/* Reset deferral mechanism for RPC deferrals */
rqstp->rq_usedeferral = 1;
......@@ -1523,7 +1533,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
sizeof(__be32);
}
static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
......
......@@ -1538,7 +1538,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
}
/*
* Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
* Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
*/
void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
......@@ -1596,7 +1596,7 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
* The sequence operation is not cached because we can use the slot and
* session values.
*/
__be32
static __be32
nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq)
{
......@@ -1605,9 +1605,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
dprintk("--> %s slot %p\n", __func__, slot);
/* Either returns 0 or nfserr_retry_uncached */
status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
if (status == nfserr_retry_uncached_rep)
if (status)
return status;
/* The sequence operation has been encoded, cstate->datap set. */
......@@ -2287,7 +2286,8 @@ nfsd4_sequence(struct svc_rqst *rqstp,
if (!list_empty(&clp->cl_revoked))
seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
out_no_session:
kfree(conn);
if (conn)
free_conn(conn);
spin_unlock(&nn->client_lock);
return status;
out_put_session:
......@@ -3627,8 +3627,11 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
return nfserr_bad_stateid;
status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
nn, &cl);
if (status == nfserr_stale_clientid)
if (status == nfserr_stale_clientid) {
if (sessions)
return nfserr_bad_stateid;
return nfserr_stale_stateid;
}
if (status)
return status;
*s = find_stateid_by_type(cl, stateid, typemask);
......@@ -5062,7 +5065,6 @@ nfs4_state_destroy_net(struct net *net)
int i;
struct nfs4_client *clp = NULL;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct rb_node *node, *tmp;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->conf_id_hashtbl[i])) {
......@@ -5071,13 +5073,11 @@ nfs4_state_destroy_net(struct net *net)
}
}
node = rb_first(&nn->unconf_name_tree);
while (node != NULL) {
tmp = node;
node = rb_next(tmp);
clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
rb_erase(tmp, &nn->unconf_name_tree);
destroy_client(clp);
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->unconf_id_hashtbl[i])) {
clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
destroy_client(clp);
}
}
kfree(nn->sessionid_hashtbl);
......
......@@ -294,7 +294,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ32(nace);
if (nace > NFS4_ACL_MAX)
return nfserr_resource;
return nfserr_fbig;
*acl = nfs4_acl_new(nace);
if (*acl == NULL)
......@@ -1222,7 +1222,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
}
write->wr_head.iov_base = p;
write->wr_head.iov_len = avail;
WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
write->wr_pagelist = argp->pagelist;
len = XDR_QUADLEN(write->wr_buflen) << 2;
......@@ -2483,6 +2482,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out;
}
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
if ((buflen -= 16) < 0)
goto out_resource;
WRITE32(3);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
......@@ -2499,8 +2500,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
security_release_secctx(context, contextlen);
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
kfree(acl);
if (tempfh)
if (tempfh) {
fh_put(tempfh);
kfree(tempfh);
}
return status;
out_nfserr:
status = nfserrno(err);
......@@ -3471,6 +3474,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
if (nfserr)
return nfserr;
RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
*p++ = htonl(test_stateid->ts_num_ids);
......@@ -3579,8 +3585,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
return 0;
session = resp->cstate.session;
if (session == NULL)
return 0;
if (xb->page_len == 0) {
length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
......@@ -3620,9 +3624,17 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
/* nfsd4_check_drc_limit guarantees enough room for error status */
/* nfsd4_check_resp_size guarantees enough room for error status */
if (!op->status)
op->status = nfsd4_check_resp_size(resp, 0);
if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
struct nfsd4_slot *slot = resp->cstate.slot;
if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
op->status = nfserr_rep_too_big_to_cache;
else
op->status = nfserr_rep_too_big;
}
if (so) {
so->so_replay.rp_status = op->status;
so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
......@@ -3691,6 +3703,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
int
nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
{
if (rqstp->rq_arg.head[0].iov_len % 4) {
/* client is nuts */
dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
__func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
return 0;
}
args->p = p;
args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
args->pagelist = rqstp->rq_arg.pages;
......
......@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
if (err != 0 || fd < 0)
return -EINVAL;
if (svc_alien_sock(net, fd)) {
printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
return -EINVAL;
}
err = nfsd_create_serv(net);
if (err != 0)
return err;
......
......@@ -282,7 +282,7 @@ void nfsd_lockd_shutdown(void);
* reason.
*/
#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
......
......@@ -132,6 +132,17 @@ fh_init(struct svc_fh *fhp, int maxsize)
}
#ifdef CONFIG_NFSD_V3
/*
* The wcc data stored in current_fh should be cleared
* between compound ops.
*/
static inline void
fh_clear_wcc(struct svc_fh *fhp)
{
fhp->fh_post_saved = 0;
fhp->fh_pre_saved = 0;
}
/*
* Fill in the pre_op attr for the wcc data
*/
......@@ -152,7 +163,8 @@ fill_pre_wcc(struct svc_fh *fhp)
extern void fill_post_wcc(struct svc_fh *);
#else
#define fill_pre_wcc(ignored)
#define fh_clear_wcc(ignored)
#define fill_pre_wcc(ignored)
#define fill_post_wcc(notused)
#endif /* CONFIG_NFSD_V3 */
......
......@@ -152,7 +152,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
type = (stat->mode & S_IFMT);
*p++ = htonl(nfs_ftypes[type >> 12]);
*p++ = htonl((u32) (stat->mode & S_IALLUGO));
*p++ = htonl((u32) stat->mode);
*p++ = htonl((u32) stat->nlink);
*p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
*p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
......
......@@ -404,6 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
umode_t ftype = 0;
__be32 err;
int host_err;
bool get_write_count;
int size_change = 0;
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
......@@ -411,10 +412,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
goto out;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
}
dentry = fhp->fh_dentry;
inode = dentry->d_inode;
......@@ -1706,10 +1715,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
dput(odentry);
out_nfserr:
err = nfserrno(host_err);
/* we cannot reply on fh_unlock on the two filehandles,
/*
* We cannot rely on fh_unlock on the two filehandles,
* as that would do the wrong thing if the two directories
* were the same, so again we do it by hand
* were the same, so again we do it by hand.
*/
fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
......
......@@ -574,8 +574,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid_confirm *setclientid_confirm);
extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
......
......@@ -130,6 +130,8 @@ struct rpc_create_args {
#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
const struct rpc_program *, u32);
void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
......
......@@ -56,6 +56,7 @@ int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
bool svc_alien_sock(struct net *net, int fd);
int svc_addsock(struct svc_serv *serv, const int fd,
char *name_return, const size_t len);
void svc_init_xprt_sock(void);
......
......@@ -295,13 +295,24 @@ int xprt_adjust_timeout(struct rpc_rqst *req);
void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_release(struct rpc_task *task);
struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
void xprt_put(struct rpc_xprt *xprt);
struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_req);
void xprt_free(struct rpc_xprt *);
/**
* xprt_get - return a reference to an RPC transport.
* @xprt: pointer to the transport
*
*/
static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
{
if (atomic_inc_not_zero(&xprt->count))
return xprt;
return NULL;
}
static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
{
return p + xprt->tsh_size;
......
......@@ -9,19 +9,6 @@ config SUNRPC_BACKCHANNEL
bool
depends on SUNRPC
config SUNRPC_XPRT_RDMA
tristate
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS client and server to support
an RDMA-enabled transport.
To compile RPC client RDMA transport support as a module,
choose M here: the module will be called xprtrdma.
If unsure, say N.
config SUNRPC_SWAP
bool
depends on SUNRPC
......@@ -57,3 +44,29 @@ config SUNRPC_DEBUG
but makes troubleshooting NFS issues significantly harder.
If unsure, say Y.
config SUNRPC_XPRT_RDMA_CLIENT
tristate "RPC over RDMA Client Support"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS client to support an RDMA-enabled
transport.
To compile RPC client RDMA transport support as a module,
choose M here: the module will be called xprtrdma.
If unsure, say N.
config SUNRPC_XPRT_RDMA_SERVER
tristate "RPC over RDMA Server Support"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS server to support an RDMA-enabled
transport.
To compile RPC server RDMA transport support as a module,
choose M here: the module will be called svcrdma.
If unsure, say N.
......@@ -5,7 +5,8 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
obj-y += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
......
......@@ -438,6 +438,38 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
return ERR_PTR(err);
}
struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
clnt = rpc_new_client(args, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
int err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
}
clnt->cl_softrtry = 1;
if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
clnt->cl_softrtry = 0;
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
return clnt;
}
EXPORT_SYMBOL_GPL(rpc_create_xprt);
/**
* rpc_create - create an RPC client and transport with one call
* @args: rpc_clnt create argument structure
......@@ -451,7 +483,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
struct xprt_create xprtargs = {
.net = args->net,
.ident = args->protocol,
......@@ -515,30 +546,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
clnt = rpc_new_client(args, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
int err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
}
clnt->cl_softrtry = 1;
if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
clnt->cl_softrtry = 0;
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
return clnt;
return rpc_create_xprt(args, xprt);
}
EXPORT_SYMBOL_GPL(rpc_create);
......
......@@ -1397,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return svsk;
}
bool svc_alien_sock(struct net *net, int fd)
{
int err;
struct socket *sock = sockfd_lookup(fd, &err);
bool ret = false;
if (!sock)
goto out;
if (sock_net(sock->sk) != net)
ret = true;
sockfd_put(sock);
out:
return ret;
}
EXPORT_SYMBOL_GPL(svc_alien_sock);
/**
* svc_addsock - add a listener socket to an RPC service
* @serv: pointer to RPC service to which to add a new listener
......
......@@ -833,8 +833,20 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
}
EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
/* Sets subbuf to the portion of buf of length len beginning base bytes
* from the start of buf. Returns -1 if base of length are out of bounds. */
/**
* xdr_buf_subsegment - set subbuf to a portion of buf
* @buf: an xdr buffer
* @subbuf: the result buffer
* @base: beginning of range in bytes
* @len: length of range in bytes
*
* sets @subbuf to an xdr buffer representing the portion of @buf of
* length @len starting at offset @base.
*
* @buf and @subbuf may be pointers to the same struct xdr_buf.
*
* Returns -1 if base of length are out of bounds.
*/
int
xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
unsigned int base, unsigned int len)
......@@ -847,9 +859,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
len -= subbuf->head[0].iov_len;
base = 0;
} else {
subbuf->head[0].iov_base = NULL;
subbuf->head[0].iov_len = 0;
base -= buf->head[0].iov_len;
subbuf->head[0].iov_len = 0;
}
if (base < buf->page_len) {
......@@ -871,9 +882,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
len -= subbuf->tail[0].iov_len;
base = 0;
} else {
subbuf->tail[0].iov_base = NULL;
subbuf->tail[0].iov_len = 0;
base -= buf->tail[0].iov_len;
subbuf->tail[0].iov_len = 0;
}
if (base || len)
......
......@@ -1383,15 +1383,3 @@ void xprt_put(struct rpc_xprt *xprt)
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
/**
* xprt_get - return a reference to an RPC transport.
* @xprt: pointer to the transport
*
*/
struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
{
if (atomic_inc_not_zero(&xprt->count))
return xprt;
return NULL;
}
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
......@@ -90,6 +90,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* We should never run out of SGE because the limit is defined to
* support the max allowed RPC data length
......@@ -169,6 +170,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt,
*/
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
byte_count -= sge_bytes;
ch_bytes -= sge_bytes;
......@@ -276,6 +278,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
/* rq_respages points one past arg pages */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Create the reply and chunk maps */
offset = 0;
......@@ -520,13 +523,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
rqstp->rq_pages[ch_no] = NULL;
/*
* Detach res pages. If svc_release sees any it will attempt to
* put them.
*/
while (rqstp->rq_next_page != rqstp->rq_respages)
*(--rqstp->rq_next_page) = NULL;
return err;
}
......@@ -550,7 +546,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->arg.head[0];
......
......@@ -265,6 +265,7 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
xdr_off -= xdr->head[0].iov_len;
if (xdr_off < xdr->page_len) {
/* This offset is in the page list */
xdr_off += xdr->page_base;
page = xdr->pages[xdr_off >> PAGE_SHIFT];
xdr_off &= ~PAGE_MASK;
} else {
......@@ -625,6 +626,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
......
......@@ -477,8 +477,7 @@ struct page *svc_rdma_get_page(void)
while ((page = alloc_page(GFP_KERNEL)) == NULL) {
/* If we can't get memory, wait a bit and try again */
printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 "
"jiffies.\n");
printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
}
return page;
......
......@@ -909,6 +909,12 @@ static void xs_tcp_close(struct rpc_xprt *xprt)
xs_tcp_shutdown(xprt);
}
static void xs_xprt_free(struct rpc_xprt *xprt)
{
xs_free_peer_addresses(xprt);
xprt_free(xprt);
}
/**
* xs_destroy - prepare to shutdown a transport
* @xprt: doomed transport
......@@ -919,8 +925,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
dprintk("RPC: xs_destroy xprt %p\n", xprt);
xs_close(xprt);
xs_free_peer_addresses(xprt);
xprt_free(xprt);
xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
......@@ -2532,6 +2537,10 @@ static void bc_close(struct rpc_xprt *xprt)
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);
xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
static struct rpc_xprt_ops xs_local_ops = {
......@@ -2732,7 +2741,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
xprt_free(xprt);
xs_xprt_free(xprt);
return ret;
}
......@@ -2810,7 +2819,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
xprt_free(xprt);
xs_xprt_free(xprt);
return ret;
}
......@@ -2885,12 +2894,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PROTO]);
if (try_module_get(THIS_MODULE))
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
xprt_free(xprt);
xs_xprt_free(xprt);
return ret;
}
......@@ -2907,15 +2915,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
struct svc_sock *bc_sock;
struct rpc_xprt *ret;
if (args->bc_xprt->xpt_bc_xprt) {
/*
* This server connection already has a backchannel
* transport; we can't create a new one, as we wouldn't
* be able to match replies based on xid any more. So,
* reuse the already-existing one:
*/
return args->bc_xprt->xpt_bc_xprt;
}
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
......@@ -2973,13 +2972,14 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
*/
xprt_set_connected(xprt);
if (try_module_get(THIS_MODULE))
return xprt;
args->bc_xprt->xpt_bc_xprt = NULL;
xprt_put(xprt);
ret = ERR_PTR(-EINVAL);
out_err:
xprt_free(xprt);
xs_xprt_free(xprt);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment