Commit 14b395e3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.27' of git://linux-nfs.org/~bfields/linux: (51 commits)
  nfsd: nfs4xdr.c do-while is not a compound statement
  nfsd: Use C99 initializers in fs/nfsd/nfs4xdr.c
  lockd: Pass "struct sockaddr *" to new failover-by-IP function
  lockd: get host reference in nlmsvc_create_block() instead of callers
  lockd: minor svclock.c style fixes
  lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_lock
  lockd: eliminate duplicate nlmsvc_lookup_host call from nlmsvc_testlock
  lockd: nlm_release_host() checks for NULL, caller needn't
  file lock: reorder struct file_lock to save space on 64 bit builds
  nfsd: take file and mnt write in nfs4_upgrade_open
  nfsd: document open share bit tracking
  nfsd: tabulate nfs4 xdr encoding functions
  nfsd: dprint operation names
  svcrdma: Change WR context get/put to use the kmem cache
  svcrdma: Create a kmem cache for the WR contexts
  svcrdma: Add flush_scheduled_work to module exit function
  svcrdma: Limit ORD based on client's advertised IRD
  svcrdma: Remove unused wait q from svcrdma_xprt structure
  svcrdma: Remove unneeded spin locks from __svc_rdma_free
  svcrdma: Add dma map count and WARN_ON
  ...
parents 734b397c 5108b276
......@@ -5,7 +5,7 @@
################################################################################
Author: NetApp and Open Grid Computing
Date: April 15, 2008
Date: May 29, 2008
Table of Contents
~~~~~~~~~~~~~~~~~
......@@ -60,16 +60,18 @@ Installation
The procedures described in this document have been tested with
distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
- Install nfs-utils-1.1.1 or greater on the client
- Install nfs-utils-1.1.2 or greater on the client
An NFS/RDMA mount point can only be obtained by using the mount.nfs
command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs
you are using, type:
An NFS/RDMA mount point can be obtained by using the mount.nfs command in
nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
version with support for NFS/RDMA mounts, but for various reasons we
recommend using nfs-utils-1.1.2 or greater). To see which version of
mount.nfs you are using, type:
> /sbin/mount.nfs -V
$ /sbin/mount.nfs -V
If the version is less than 1.1.1 or the command does not exist,
then you will need to install the latest version of nfs-utils.
If the version is less than 1.1.2 or the command does not exist,
you should install the latest version of nfs-utils.
Download the latest package from:
......@@ -77,22 +79,33 @@ Installation
Uncompress the package and follow the installation instructions.
If you will not be using GSS and NFSv4, the installation process
can be simplified by disabling these features when running configure:
If you will not need the idmapper and gssd executables (you do not need
these to create an NFS/RDMA enabled mount command), the installation
process can be simplified by disabling these features when running
configure:
> ./configure --disable-gss --disable-nfsv4
$ ./configure --disable-gss --disable-nfsv4
For more information on this see the package's README and INSTALL files.
To build nfs-utils you will need the tcp_wrappers package installed. For
more information on this see the package's README and INSTALL files.
After building the nfs-utils package, there will be a mount.nfs binary in
the utils/mount directory. This binary can be used to initiate NFS v2, v3,
or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4.
The standard technique is to create a symlink called mount.nfs4 to mount.nfs.
or v4 mounts. To initiate a v4 mount, the binary must be called
mount.nfs4. The standard technique is to create a symlink called
mount.nfs4 to mount.nfs.
NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed
This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
$ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
In this location, mount.nfs will be invoked automatically for NFS mounts
by the system mount commmand.
NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
on the NFS client machine. You do not need this specific version of
nfs-utils on the server. Furthermore, only the mount.nfs command from
nfs-utils-1.1.1 is needed on the client.
nfs-utils-1.1.2 is needed on the client.
- Install a Linux kernel with NFS/RDMA
......@@ -156,8 +169,8 @@ Check RDMA and NFS Setup
this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
card:
> modprobe ib_mthca
> modprobe ib_ipoib
$ modprobe ib_mthca
$ modprobe ib_ipoib
If you are using InfiniBand, make sure there is a Subnet Manager (SM)
running on the network. If your IB switch has an embedded SM, you can
......@@ -166,7 +179,7 @@ Check RDMA and NFS Setup
If an SM is running on your network, you should see the following:
> cat /sys/class/infiniband/driverX/ports/1/state
$ cat /sys/class/infiniband/driverX/ports/1/state
4: ACTIVE
where driverX is mthca0, ipath5, ehca3, etc.
......@@ -174,10 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2):
host1> ifconfig ib0 a.b.c.x
host2> ifconfig ib0 a.b.c.y
host1> ping a.b.c.y
host2> ping a.b.c.x
host1$ ifconfig ib0 a.b.c.x
host2$ ifconfig ib0 a.b.c.y
host1$ ping a.b.c.y
host2$ ping a.b.c.x
For other device types, follow the appropriate procedures.
......@@ -202,11 +215,11 @@ NFS/RDMA Setup
/vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
/vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the
cleint's iWARP address(es) for an RNIC.
The IP address(es) is(are) the client's IPoIB address for an InfiniBand
HCA or the cleint's iWARP address(es) for an RNIC.
NOTE: The "insecure" option must be used because the NFS/RDMA client does not
use a reserved port.
NOTE: The "insecure" option must be used because the NFS/RDMA client does
not use a reserved port.
Each time a machine boots:
......@@ -214,43 +227,45 @@ NFS/RDMA Setup
For InfiniBand using a Mellanox adapter:
> modprobe ib_mthca
> modprobe ib_ipoib
> ifconfig ib0 a.b.c.d
$ modprobe ib_mthca
$ modprobe ib_ipoib
$ ifconfig ib0 a.b.c.d
NOTE: use unique addresses for the client and server
- Start the NFS server
If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
load the RDMA transport module:
If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
kernel config), load the RDMA transport module:
> modprobe svcrdma
$ modprobe svcrdma
Regardless of how the server was built (module or built-in), start the server:
Regardless of how the server was built (module or built-in), start the
server:
> /etc/init.d/nfs start
$ /etc/init.d/nfs start
or
> service nfs start
$ service nfs start
Instruct the server to listen on the RDMA transport:
> echo rdma 2050 > /proc/fs/nfsd/portlist
$ echo rdma 2050 > /proc/fs/nfsd/portlist
- On the client system
If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
load the RDMA client module:
If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
kernel config), load the RDMA client module:
> modprobe xprtrdma.ko
$ modprobe xprtrdma.ko
Regardless of how the client was built (module or built-in), issue the mount.nfs command:
Regardless of how the client was built (module or built-in), use this
command to mount the NFS/RDMA server:
> /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050
$ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt
To verify that the mount is using RDMA, run "cat /proc/mounts" and check the
"proto" field for the given mount.
To verify that the mount is using RDMA, run "cat /proc/mounts" and check
the "proto" field for the given mount.
Congratulations! You're using NFS/RDMA!
......@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
static struct task_struct *nlmsvc_task;
static struct svc_serv *nlmsvc_serv;
static struct svc_rqst *nlmsvc_rqst;
int nlmsvc_grace_period;
unsigned long nlmsvc_timeout;
......@@ -194,20 +194,11 @@ lockd(void *vrqstp)
svc_process(rqstp);
}
flush_signals(current);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
unlock_kernel();
nlmsvc_task = NULL;
nlmsvc_serv = NULL;
/* Exit the RPC thread */
svc_exit_thread(rqstp);
return 0;
}
......@@ -254,16 +245,15 @@ int
lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
{
struct svc_serv *serv;
struct svc_rqst *rqstp;
int error = 0;
mutex_lock(&nlmsvc_mutex);
/*
* Check whether we're already up and running.
*/
if (nlmsvc_serv) {
if (nlmsvc_rqst) {
if (proto)
error = make_socks(nlmsvc_serv, proto);
error = make_socks(nlmsvc_rqst->rq_server, proto);
goto out;
}
......@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
/*
* Create the kernel thread and wait for it to start.
*/
rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
if (IS_ERR(nlmsvc_rqst)) {
error = PTR_ERR(nlmsvc_rqst);
nlmsvc_rqst = NULL;
printk(KERN_WARNING
"lockd_up: svc_rqst allocation failed, error=%d\n",
error);
......@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
}
svc_sock_update_bufs(serv);
nlmsvc_serv = rqstp->rq_server;
nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task);
svc_exit_thread(nlmsvc_rqst);
nlmsvc_task = NULL;
nlmsvc_serv = NULL;
nlmsvc_rqst = NULL;
printk(KERN_WARNING
"lockd_up: kthread_run failed, error=%d\n", error);
svc_exit_thread(rqstp);
goto destroy_and_out;
}
......@@ -346,6 +336,9 @@ lockd_down(void)
BUG();
}
kthread_stop(nlmsvc_task);
svc_exit_thread(nlmsvc_rqst);
nlmsvc_task = NULL;
nlmsvc_rqst = NULL;
out:
mutex_unlock(&nlmsvc_mutex);
}
......
......@@ -58,7 +58,6 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
if (host)
nlm_release_host(host);
if (error)
return error;
......@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
else
......@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
#endif
/* Now try to lock the file */
resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
argp->block, &argp->cookie);
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
......
......@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
{
if(a->len != b->len)
if (a->len != b->len)
return 0;
if(memcmp(a->data,b->data,a->len))
if (memcmp(a->data, b->data, a->len))
return 0;
return 1;
}
......@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
struct nlm_block *block;
struct nlm_rqst *call = NULL;
nlm_get_host(host);
call = nlm_alloc_call(host);
if (call == NULL)
return NULL;
......@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
*/
__be32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
struct nlm_host *host, struct nlm_lock *lock, int wait,
struct nlm_cookie *cookie)
{
struct nlm_block *block = NULL;
struct nlm_host *host;
int error;
__be32 ret;
......@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
/* Create host handle for callback */
host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
if (host == NULL)
return nlm_lck_denied_nolocks;
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
......@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
block = nlmsvc_lookup_block(file, lock);
if (block == NULL) {
block = nlmsvc_create_block(rqstp, nlm_get_host(host), file,
lock, cookie);
block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
ret = nlm_lck_denied_nolocks;
if (block == NULL)
goto out;
......@@ -417,7 +412,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
switch(error) {
switch (error) {
case 0:
ret = nlm_granted;
goto out;
......@@ -450,7 +445,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
out:
mutex_unlock(&file->f_mutex);
nlmsvc_release_block(block);
nlm_release_host(host);
dprintk("lockd: nlmsvc_lock returned %u\n", ret);
return ret;
}
......@@ -460,8 +454,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
__be32
nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, struct nlm_lock *conflock,
struct nlm_cookie *cookie)
struct nlm_host *host, struct nlm_lock *lock,
struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
struct nlm_block *block = NULL;
int error;
......@@ -479,16 +473,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
if (block == NULL) {
struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
struct nlm_host *host;
if (conf == NULL)
return nlm_granted;
/* Create host handle for callback */
host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
if (host == NULL) {
kfree(conf);
return nlm_lck_denied_nolocks;
}
block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
if (block == NULL) {
kfree(conf);
......@@ -897,7 +884,7 @@ nlmsvc_retry_blocked(void)
if (block->b_when == NLM_NEVER)
break;
if (time_after(block->b_when,jiffies)) {
if (time_after(block->b_when, jiffies)) {
timeout = block->b_when - jiffies;
break;
}
......
......@@ -87,7 +87,6 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
if (host)
nlm_release_host(host);
if (error)
return error;
......@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
else
......@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
#endif
/* Now try to lock the file */
resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
argp->block, &argp->cookie));
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
......
......@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
}
}
/*
* Remove all locks held for clients
/**
* nlmsvc_invalidate_all - remove all locks held for clients
*
* Release all locks held by NFS clients.
*
*/
void
nlmsvc_invalidate_all(void)
{
/* Release all locks held by NFS clients.
/*
* Previously, the code would call
* nlmsvc_free_host_resources for each client in
* turn, which is about as inefficient as it gets.
......@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
return sb == file->f_file->f_path.mnt->mnt_sb;
}
/**
* nlmsvc_unlock_all_by_sb - release locks held on this file system
* @sb: super block
*
* Release all locks held by clients accessing this file system.
*/
int
nlmsvc_unlock_all_by_sb(struct super_block *sb)
{
......@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
static int
nlmsvc_match_ip(void *datap, struct nlm_host *host)
{
__be32 *server_addr = datap;
return host->h_saddr.sin_addr.s_addr == *server_addr;
return nlm_cmp_addr(&host->h_saddr, datap);
}
/**
* nlmsvc_unlock_all_by_ip - release local locks by IP address
* @server_addr: server's IP address as seen by clients
*
* Release all locks held by clients accessing this host
* via the passed in IP address.
*/
int
nlmsvc_unlock_all_by_ip(__be32 server_addr)
nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
{
int ret;
ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
return ret ? -EIO : 0;
ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
return ret ? -EIO : 0;
}
EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
......@@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
fh.fh_export = NULL;
exp_readlock();
nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
fh_put(&fh);
rqstp->rq_client = NULL;
exp_readunlock();
......
......@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
......@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
if (!nfserr) {
nfserr = nfserrno( nfsd_set_posix_acl(
......@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
}
/*
......
......@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
__be32 nfserr = 0;
fh = fh_copy(&resp->fh, &argp->fh);
if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
......@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
__be32 nfserr = 0;
fh = fh_copy(&resp->fh, &argp->fh);
nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
if (!nfserr) {
nfserr = nfserrno( nfsd_set_posix_acl(
......
......@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
......@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
attr = &argp->attrs;
/* Get the directory inode */
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
if (nfserr)
RETURN_STATUS(nfserr);
......@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT;
nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
/* Check special features of the file system. May request
* different read/write sizes for file systems known to have
......@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
resp->p_case_insensitive = 0;
resp->p_case_preserving = 1;
nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
if (nfserr == 0) {
struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
......
......@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
return nfserr_inval;
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
accmode |= MAY_READ;
accmode |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
accmode |= (MAY_WRITE | MAY_TRUNC);
accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
accmode |= MAY_WRITE;
accmode |= NFSD_MAY_WRITE;
status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
......@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
if (!created)
status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
status = do_open_permission(rqstp, current_fh, open,
NFSD_MAY_NOP);
out:
fh_put(&resfh);
......@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
(open->op_iattr.ia_size == 0);
status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE);
status = do_open_permission(rqstp, current_fh, open,
NFSD_MAY_OWNER_OVERRIDE);
return status;
}
......@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
rp->rp_openfh_len);
status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
dprintk("nfsd4_open: replay failed"
" restoring previous filehandle\n");
......@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
putfh->pf_fhlen);
return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
}
static __be32
......@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fh_init(&resfh, NFS4_FHSIZE);
status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE);
status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
NFSD_MAY_CREATE);
if (status == nfserr_symlink)
status = nfserr_notdir;
if (status)
......@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
......@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int count;
__be32 status;
status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
......@@ -843,10 +846,13 @@ struct nfsd4_operation {
#define ALLOWED_WITHOUT_FH 1
/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
#define ALLOWED_ON_ABSENT_FS 2
char *op_name;
};
static struct nfsd4_operation nfsd4_ops[];
static inline char *nfsd4_op_name(unsigned opnum);
/*
* COMPOUND call.
*/
......@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
resp->opcnt, args->opcnt, op->opnum,
nfsd4_op_name(op->opnum));
/*
* The XDR decode routines may have pre-set op->status;
......@@ -952,126 +960,170 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
out:
nfsd4_release_compoundargs(args);
cstate_free(cstate);
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
.op_name = "OP_CLOSE",
},
[OP_COMMIT] = {
.op_func = (nfsd4op_func)nfsd4_commit,
.op_name = "OP_COMMIT",
},
[OP_CREATE] = {
.op_func = (nfsd4op_func)nfsd4_create,
.op_name = "OP_CREATE",
},
[OP_DELEGRETURN] = {
.op_func = (nfsd4op_func)nfsd4_delegreturn,
.op_name = "OP_DELEGRETURN",
},
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
.op_flags = ALLOWED_ON_ABSENT_FS,
.op_name = "OP_GETATTR",
},
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
.op_name = "OP_LINK",
},
[OP_LOCK] = {
.op_func = (nfsd4op_func)nfsd4_lock,
.op_name = "OP_LOCK",
},
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
.op_name = "OP_LOCKU",
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_name = "OP_LOOKUP",
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_name = "OP_LOOKUPP",
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
.op_name = "OP_OPEN",
},
[OP_OPEN_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_open_confirm,
.op_name = "OP_OPEN_CONFIRM",
},
[OP_OPEN_DOWNGRADE] = {
.op_func = (nfsd4op_func)nfsd4_open_downgrade,
.op_name = "OP_OPEN_DOWNGRADE",
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_PUTFH",
},
[OP_PUTPUBFH] = {
/* unsupported; just for future reference: */
/* unsupported, just for future reference: */
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_PUTPUBFH",
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_PUTROOTFH",
},
[OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read,
.op_name = "OP_READ",
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
.op_name = "OP_READDIR",
},
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
.op_name = "OP_REMOVE",
},
[OP_RENAME] = {
.op_name = "OP_RENAME",
.op_func = (nfsd4op_func)nfsd4_rename,
},
[OP_RENEW] = {
.op_func = (nfsd4op_func)nfsd4_renew,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_RENEW",
},
[OP_RESTOREFH] = {
.op_func = (nfsd4op_func)nfsd4_restorefh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_RESTOREFH",
},
[OP_SAVEFH] = {
.op_func = (nfsd4op_func)nfsd4_savefh,
.op_name = "OP_SAVEFH",
},
[OP_SECINFO] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_name = "OP_SECINFO",
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
.op_name = "OP_SETATTR",
},
[OP_SETCLIENTID] = {
.op_func = (nfsd4op_func)nfsd4_setclientid,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_SETCLIENTID",
},
[OP_SETCLIENTID_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_SETCLIENTID_CONFIRM",
},
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
.op_name = "OP_WRITE",
},
[OP_RELEASE_LOCKOWNER] = {
.op_func = (nfsd4op_func)nfsd4_release_lockowner,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_RELEASE_LOCKOWNER",
},
};
static inline char *
nfsd4_op_name(unsigned opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
return "unknown_operation";
}
#define nfs4svc_decode_voidargs NULL
#define nfs4svc_release_void NULL
#define nfsd4_voidres nfsd4_voidargs
......
......@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
return x <= NFS4_SHARE_DENY_BOTH;
}
/*
* We store the NONE, READ, WRITE, and BOTH bits separately in the
* st_{access,deny}_bmap field of the stateid, in order to track not
* only what share bits are currently in force, but also what
* combinations of share bits previous opens have used. This allows us
* to enforce the recommendation of rfc 3530 14.2.19 that the server
* return an error if the client attempt to downgrade to a combination
* of share bits not explicable by closing some of its previous opens.
*
* XXX: This enforcement is actually incomplete, since we don't keep
* track of access/deny bit combinations; so, e.g., we allow:
*
* OPEN allow read, deny write
* OPEN allow both, deny none
* DOWNGRADE allow read, deny none
*
* which we should reject.
*/
static void
set_access(unsigned int *access, unsigned long bmap) {
int i;
......@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
int err = get_write_access(inode);
if (err)
return nfserrno(err);
err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
if (err)
return nfserrno(err);
file_take_write(filp);
}
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status) {
......@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
}
/* remember the open */
filp->f_mode |= open->op_share_access;
set_bit(open->op_share_access, &stp->st_access_bmap);
set_bit(open->op_share_deny, &stp->st_deny_bmap);
__set_bit(open->op_share_access, &stp->st_access_bmap);
__set_bit(open->op_share_deny, &stp->st_deny_bmap);
return nfs_ok;
}
......@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
/* Stateid was not found, this is a new OPEN */
int flags = 0;
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
flags |= MAY_READ;
flags |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
flags |= MAY_WRITE;
flags |= NFSD_MAY_WRITE;
status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
if (status)
goto out;
......@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
if ((status = fh_verify(rqstp, &cstate->current_fh,
S_IFREG, MAY_LOCK))) {
S_IFREG, NFSD_MAY_LOCK))) {
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
......@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
nfs4_unlock_state();
}
/*
* user_recovery_dirname is protected by the nfsd_mutex since it's only
* accessed when nfsd is starting.
*/
static void
nfs4_set_recdir(char *recdir)
{
nfs4_lock_state();
strcpy(user_recovery_dirname, recdir);
nfs4_unlock_state();
}
/*
......@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
return status;
}
char *
nfs4_recoverydir(void)
{
return user_recovery_dirname;
}
/*
* Called when leasetime is changed.
*
......@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
* we start to register any changes in lease time. If the administrator
* really wants to change the lease time *now*, they can go ahead and bring
* nfsd down and then back up again after changing the lease time.
*
* user_lease_time is protected by nfsd_mutex since it's only really accessed
* when nfsd is starting
*/
void
nfs4_reset_lease(time_t leasetime)
{
lock_kernel();
user_lease_time = leasetime;
unlock_kernel();
}
......@@ -985,11 +985,75 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
DECODE_TAIL;
}
static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{
return nfs_ok;
}
static __be32
nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
{
return nfserr_opnotsupp;
}
typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
static nfsd4_dec nfsd4_dec_ops[] = {
[OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
[OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
[OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
[OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
[OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
[OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
[OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
[OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
[OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
[OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
[OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
[OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
[OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
[OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
[OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
[OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
[OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_READ] = (nfsd4_dec)nfsd4_decode_read,
[OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
[OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
[OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
[OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
[OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
[OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
[OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
[OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
[OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
[OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
[OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
[OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
};
struct nfsd4_minorversion_ops {
nfsd4_dec *decoders;
int nops;
};
static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
[0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
};
static __be32
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
DECODE_HEAD;
struct nfsd4_op *op;
struct nfsd4_minorversion_ops *ops;
int i;
/*
......@@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
}
}
if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
argp->opcnt = 0;
ops = &nfsd4_minorversion[argp->minorversion];
for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i];
op->replay = NULL;
......@@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
}
op->opnum = ntohl(*argp->p++);
switch (op->opnum) {
case 2: /* Reserved operation */
op->opnum = OP_ILLEGAL;
if (argp->minorversion == 0)
op->status = nfserr_op_illegal;
else
op->status = nfserr_minor_vers_mismatch;
break;
case OP_ACCESS:
op->status = nfsd4_decode_access(argp, &op->u.access);
break;
case OP_CLOSE:
op->status = nfsd4_decode_close(argp, &op->u.close);
break;
case OP_COMMIT:
op->status = nfsd4_decode_commit(argp, &op->u.commit);
break;
case OP_CREATE:
op->status = nfsd4_decode_create(argp, &op->u.create);
break;
case OP_DELEGRETURN:
op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
break;
case OP_GETATTR:
op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
break;
case OP_GETFH:
op->status = nfs_ok;
break;
case OP_LINK:
op->status = nfsd4_decode_link(argp, &op->u.link);
break;
case OP_LOCK:
op->status = nfsd4_decode_lock(argp, &op->u.lock);
break;
case OP_LOCKT:
op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
break;
case OP_LOCKU:
op->status = nfsd4_decode_locku(argp, &op->u.locku);
break;
case OP_LOOKUP:
op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
break;
case OP_LOOKUPP:
op->status = nfs_ok;
break;
case OP_NVERIFY:
op->status = nfsd4_decode_verify(argp, &op->u.nverify);
break;
case OP_OPEN:
op->status = nfsd4_decode_open(argp, &op->u.open);
break;
case OP_OPEN_CONFIRM:
op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
break;
case OP_OPEN_DOWNGRADE:
op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
break;
case OP_PUTFH:
op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
break;
case OP_PUTROOTFH:
op->status = nfs_ok;
break;
case OP_READ:
op->status = nfsd4_decode_read(argp, &op->u.read);
break;
case OP_READDIR:
op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
break;
case OP_READLINK:
op->status = nfs_ok;
break;
case OP_REMOVE:
op->status = nfsd4_decode_remove(argp, &op->u.remove);
break;
case OP_RENAME:
op->status = nfsd4_decode_rename(argp, &op->u.rename);
break;
case OP_RESTOREFH:
op->status = nfs_ok;
break;
case OP_RENEW:
op->status = nfsd4_decode_renew(argp, &op->u.renew);
break;
case OP_SAVEFH:
op->status = nfs_ok;
break;
case OP_SECINFO:
op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
break;
case OP_SETATTR:
op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
break;
case OP_SETCLIENTID:
op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
break;
case OP_SETCLIENTID_CONFIRM:
op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
break;
case OP_VERIFY:
op->status = nfsd4_decode_verify(argp, &op->u.verify);
break;
case OP_WRITE:
op->status = nfsd4_decode_write(argp, &op->u.write);
break;
case OP_RELEASE_LOCKOWNER:
op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
break;
default:
if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
op->status = ops->decoders[op->opnum](argp, &op->u);
else {
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
break;
}
if (op->status) {
......@@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
*p++ = htonl((u32)((n) >> 32)); \
*p++ = htonl((u32)(n)); \
} while (0)
#define WRITEMEM(ptr,nbytes) do { \
#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
*(p + XDR_QUADLEN(nbytes) -1) = 0; \
memcpy(p, ptr, nbytes); \
p += XDR_QUADLEN(nbytes); \
} while (0)
}} while (0)
#define WRITECINFO(c) do { \
*p++ = htonl(c.atomic); \
*p++ = htonl(c.before_ctime_sec); \
......@@ -1991,7 +1950,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return -EINVAL;
}
static void
static __be32
nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
{
ENCODE_HEAD;
......@@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITE32(access->ac_resp_access);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
ADJUST_ARGS();
}
ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
return nfserr;
}
static void
static __be32
nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
{
ENCODE_HEAD;
......@@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITEMEM(commit->co_verf.data, 8);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
{
ENCODE_HEAD;
......@@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITE32(create->cr_bmval[1]);
ADJUST_ARGS();
}
return nfserr;
}
static __be32
......@@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
return nfserr;
}
static void
nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp)
static __be32
nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
{
struct svc_fh *fhp = *fhpp;
unsigned int len;
ENCODE_HEAD;
......@@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
WRITEMEM(&fhp->fh_handle.fh_base, len);
ADJUST_ARGS();
}
return nfserr;
}
/*
......@@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
ADJUST_ARGS();
}
static void
static __be32
nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
nfsd4_encode_lock_denied(resp, &lock->lk_denied);
ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
return nfserr;
}
static void
static __be32
nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
{
if (nfserr == nfserr_denied)
nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
return nfserr;
}
static void
static __be32
nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
}
ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
return nfserr;
}
static void
static __be32
nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
{
ENCODE_HEAD;
......@@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
WRITECINFO(link->li_cinfo);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
/* XXX save filehandle here */
out:
ENCODE_SEQID_OP_TAIL(open->op_stateowner);
return nfserr;
}
static void
static __be32
nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
}
ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
return nfserr;
}
static void
static __be32
nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
{
ENCODE_SEQID_OP_HEAD;
......@@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
}
ENCODE_SEQID_OP_TAIL(od->od_stateowner);
return nfserr;
}
static __be32
......@@ -2443,7 +2415,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
return nfserr;
}
static void
static __be32
nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
{
ENCODE_HEAD;
......@@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITECINFO(remove->rm_cinfo);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
{
ENCODE_HEAD;
......@@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITECINFO(rename->rn_tinfo);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo *secinfo)
{
......@@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
out:
if (exp)
exp_put(exp);
return nfserr;
}
/*
* The SETATTR encode routine is special -- it always encodes a bitmap,
* regardless of the error status.
*/
static void
static __be32
nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
{
ENCODE_HEAD;
......@@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
WRITE32(setattr->sa_bmval[1]);
}
ADJUST_ARGS();
return nfserr;
}
static void
static __be32
nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
{
ENCODE_HEAD;
......@@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
WRITE32(0);
ADJUST_ARGS();
}
return nfserr;
}
static void
static __be32
nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
{
ENCODE_HEAD;
......@@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
WRITEMEM(write->wr_verifier.data, 8);
ADJUST_ARGS();
}
return nfserr;
}
static __be32
nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
{
return nfserr;
}
typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
static nfsd4_enc nfsd4_enc_ops[] = {
[OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
[OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
[OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
[OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
[OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
[OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
[OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
[OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
[OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
[OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
[OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
[OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
[OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
[OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
[OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
[OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
[OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
[OP_READ] = (nfsd4_enc)nfsd4_encode_read,
[OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
[OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
[OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
[OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
[OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
[OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
[OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
[OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
[OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
[OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
[OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
};
void
nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
......@@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
statp = p++; /* to be backfilled at the end */
ADJUST_ARGS();
switch (op->opnum) {
case OP_ACCESS:
nfsd4_encode_access(resp, op->status, &op->u.access);
break;
case OP_CLOSE:
nfsd4_encode_close(resp, op->status, &op->u.close);
break;
case OP_COMMIT:
nfsd4_encode_commit(resp, op->status, &op->u.commit);
break;
case OP_CREATE:
nfsd4_encode_create(resp, op->status, &op->u.create);
break;
case OP_DELEGRETURN:
break;
case OP_GETATTR:
op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
break;
case OP_GETFH:
nfsd4_encode_getfh(resp, op->status, op->u.getfh);
break;
case OP_LINK:
nfsd4_encode_link(resp, op->status, &op->u.link);
break;
case OP_LOCK:
nfsd4_encode_lock(resp, op->status, &op->u.lock);
break;
case OP_LOCKT:
nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
break;
case OP_LOCKU:
nfsd4_encode_locku(resp, op->status, &op->u.locku);
break;
case OP_LOOKUP:
break;
case OP_LOOKUPP:
break;
case OP_NVERIFY:
break;
case OP_OPEN:
nfsd4_encode_open(resp, op->status, &op->u.open);
break;
case OP_OPEN_CONFIRM:
nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
break;
case OP_OPEN_DOWNGRADE:
nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
break;
case OP_PUTFH:
break;
case OP_PUTROOTFH:
break;
case OP_READ:
op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
break;
case OP_READDIR:
op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
break;
case OP_READLINK:
op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
break;
case OP_REMOVE:
nfsd4_encode_remove(resp, op->status, &op->u.remove);
break;
case OP_RENAME:
nfsd4_encode_rename(resp, op->status, &op->u.rename);
break;
case OP_RENEW:
break;
case OP_RESTOREFH:
break;
case OP_SAVEFH:
break;
case OP_SECINFO:
nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
break;
case OP_SETATTR:
nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
break;
case OP_SETCLIENTID:
nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
break;
case OP_SETCLIENTID_CONFIRM:
break;
case OP_VERIFY:
break;
case OP_WRITE:
nfsd4_encode_write(resp, op->status, &op->u.write);
break;
case OP_RELEASE_LOCKOWNER:
break;
default:
break;
}
if (op->opnum == OP_ILLEGAL)
goto status;
BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
status:
/*
* Note: We write the status directly, instead of using WRITE32(),
* since it is already in network byte order.
......
......@@ -310,9 +310,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
{
__be32 server_ip;
char *fo_path, c;
struct sockaddr_in sin = {
.sin_family = AF_INET,
};
int b1, b2, b3, b4;
char c;
char *fo_path;
/* sanity check */
if (size == 0)
......@@ -326,11 +329,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
return -EINVAL;
/* get ipv4 address */
if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
return -EINVAL;
if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
return -EINVAL;
server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
return nlmsvc_unlock_all_by_ip(server_ip);
return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
}
static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
......@@ -450,22 +455,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
int i;
int rv;
int len;
int npools = nfsd_nrpools();
int npools;
int *nthreads;
mutex_lock(&nfsd_mutex);
npools = nfsd_nrpools();
if (npools == 0) {
/*
* NFS is shut down. The admin can start it by
* writing to the threads file but NOT the pool_threads
* file, sorry. Report zero threads.
*/
mutex_unlock(&nfsd_mutex);
strcpy(buf, "0\n");
return strlen(buf);
}
nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
rv = -ENOMEM;
if (nthreads == NULL)
return -ENOMEM;
goto out_free;
if (size > 0) {
for (i = 0; i < npools; i++) {
......@@ -496,14 +505,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
mesg += len;
}
mutex_unlock(&nfsd_mutex);
return (mesg-buf);
out_free:
kfree(nthreads);
mutex_unlock(&nfsd_mutex);
return rv;
}
static ssize_t write_versions(struct file *file, char *buf, size_t size)
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
/*
* Format:
......@@ -566,14 +577,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}
static ssize_t write_ports(struct file *file, char *buf, size_t size)
static ssize_t write_versions(struct file *file, char *buf, size_t size)
{
ssize_t rv;
mutex_lock(&nfsd_mutex);
rv = __write_versions(file, buf, size);
mutex_unlock(&nfsd_mutex);
return rv;
}
static ssize_t __write_ports(struct file *file, char *buf, size_t size)
{
if (size == 0) {
int len = 0;
lock_kernel();
if (nfsd_serv)
len = svc_xprt_names(nfsd_serv, buf, 0);
unlock_kernel();
return len;
}
/* Either a single 'fd' number is written, in which
......@@ -603,9 +623,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
/* Decrease the count, but don't shutdown the
* the service
*/
lock_kernel();
nfsd_serv->sv_nrthreads--;
unlock_kernel();
}
return err < 0 ? err : 0;
}
......@@ -614,10 +632,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
if (!toclose)
return -ENOMEM;
lock_kernel();
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
unlock_kernel();
if (len >= 0)
lockd_down();
kfree(toclose);
......@@ -655,7 +671,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
......@@ -666,13 +681,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
} else
err = -ENOTCONN;
}
unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL;
}
static ssize_t write_ports(struct file *file, char *buf, size_t size)
{
ssize_t rv;
mutex_lock(&nfsd_mutex);
rv = __write_ports(file, buf, size);
mutex_unlock(&nfsd_mutex);
return rv;
}
int nfsd_max_blksize;
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
......@@ -691,13 +716,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
if (bsize > NFSSVC_MAXBLKSIZE)
bsize = NFSSVC_MAXBLKSIZE;
bsize &= ~(1024-1);
lock_kernel();
mutex_lock(&nfsd_mutex);
if (nfsd_serv && nfsd_serv->sv_nrthreads) {
unlock_kernel();
mutex_unlock(&nfsd_mutex);
return -EBUSY;
}
nfsd_max_blksize = bsize;
unlock_kernel();
mutex_unlock(&nfsd_mutex);
}
return sprintf(buf, "%d\n", nfsd_max_blksize);
}
......@@ -705,16 +730,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
{
/* if size > 10 seconds, call
* nfs4_reset_lease() then write out the new lease (seconds) as reply
*/
char *mesg = buf;
int rv;
int rv, lease;
if (size > 0) {
int lease;
if (nfsd_serv)
return -EBUSY;
rv = get_int(&mesg, &lease);
if (rv)
return rv;
......@@ -726,13 +752,28 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
return strlen(buf);
}
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
{
ssize_t rv;
mutex_lock(&nfsd_mutex);
rv = __write_leasetime(file, buf, size);
mutex_unlock(&nfsd_mutex);
return rv;
}
extern char *nfs4_recoverydir(void);
static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
char *recdir;
int len, status;
if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
if (size > 0) {
if (nfsd_serv)
return -EBUSY;
if (size > PATH_MAX || buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
......@@ -742,8 +783,21 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
return -EINVAL;
status = nfs4_reset_recoverydir(recdir);
}
sprintf(buf, "%s\n", nfs4_recoverydir());
return strlen(buf);
}
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
{
ssize_t rv;
mutex_lock(&nfsd_mutex);
rv = __write_recoverydir(file, buf, size);
mutex_unlock(&nfsd_mutex);
return rv;
}
#endif
/*----------------------------------------------------------------------------*/
......
......@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
/* Elevate privileges so that the lack of 'r' or 'x'
* permission on some parent directory will
* not stop exportfs_decode_fh from being able
* to reconnect a directory into the dentry cache.
* The same problem can affect "SUBTREECHECK" exports,
* but as nfsd_acceptable depends on correct
* access control settings being in effect, we cannot
* fix that case easily.
*/
current->cap_effective =
cap_raise_nfsd_set(current->cap_effective,
current->cap_permitted);
} else {
error = nfsd_setuser_and_check_port(rqstp, exp);
if (error)
goto out;
}
/*
* Look up the dentry using the NFS file handle.
......@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
goto out;
}
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
error = nfsd_setuser_and_check_port(rqstp, exp);
if (error) {
dput(dentry);
goto out;
}
}
if (S_ISDIR(dentry->d_inode->i_mode) &&
(dentry->d_flags & DCACHE_DISCONNECTED)) {
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
......@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
if (error)
goto out;
if (!(access & MAY_LOCK)) {
if (!(access & NFSD_MAY_LOCK)) {
/*
* pseudoflavor restrictions are not enforced on NLM,
* which clients virtually always use auth_sys for,
......
......@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
return nfsd_return_attrs(nfserr, resp);
}
......@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
SVCFH_fmt(dirfhp), argp->len, argp->name);
/* First verify the parent file handle */
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
if (nfserr)
goto done; /* must fh_put dirfhp even on error */
/* Check for MAY_WRITE in nfsd_create if necessary */
/* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
nfserr = nfserr_acces;
if (!argp->len)
......@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfsd_permission(rqstp,
newfhp->fh_export,
newfhp->fh_dentry,
MAY_WRITE|MAY_LOCAL_ACCESS);
NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
if (nfserr && nfserr != nfserr_rofs)
goto out_unlock;
}
......@@ -614,6 +614,7 @@ nfserrno (int errno)
#endif
{ nfserr_stale, -ESTALE },
{ nfserr_jukebox, -ETIMEDOUT },
{ nfserr_jukebox, -ERESTARTSYS },
{ nfserr_dropit, -EAGAIN },
{ nfserr_dropit, -ENOMEM },
{ nfserr_badname, -ESRCH },
......
......@@ -21,6 +21,7 @@
#include <linux/smp_lock.h>
#include <linux/freezer.h>
#include <linux/fs_struct.h>
#include <linux/kthread.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
......@@ -36,28 +37,38 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
/* these signals will be delivered to an nfsd thread
* when handling a request
*/
#define ALLOWED_SIGS (sigmask(SIGKILL))
/* these signals will be delivered to an nfsd thread
* when not handling a request. i.e. when waiting
*/
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
/* if the last thread dies with SIGHUP, then the exports table is
* left unchanged ( like 2.4-{0-9} ). Any other signal will clear
* the exports table (like 2.2).
*/
#define SIG_NOCLEAN SIGHUP
extern struct svc_program nfsd_program;
static void nfsd(struct svc_rqst *rqstp);
static int nfsd(void *vrqstp);
struct timeval nfssvc_boot;
struct svc_serv *nfsd_serv;
static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
static DEFINE_SPINLOCK(nfsd_call_lock);
/*
* nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
* of the svc_serv struct. In particular, ->sv_nrthreads but also to some
* extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
*
* If (out side the lock) nfsd_serv is non-NULL, then it must point to a
* properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
* of nfsd threads must exist and each must listed in ->sp_all_threads in each
* entry of ->sv_pools[].
*
* Transitions of the thread count between zero and non-zero are of particular
* interest since the svc_serv needs to be created and initialized at that
* point, or freed.
*
* Finally, the nfsd_mutex also protects some of the global variables that are
* accessed when nfsd starts and that are settable via the write_* routines in
* nfsctl.c. In particular:
*
* user_recovery_dirname
* user_lease_time
* nfsd_versions
*/
DEFINE_MUTEX(nfsd_mutex);
struct svc_serv *nfsd_serv;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
......@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)
int nfsd_nrthreads(void)
{
if (nfsd_serv == NULL)
return 0;
else
return nfsd_serv->sv_nrthreads;
int rv = 0;
mutex_lock(&nfsd_mutex);
if (nfsd_serv)
rv = nfsd_serv->sv_nrthreads;
mutex_unlock(&nfsd_mutex);
return rv;
}
static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
......@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
nfsd_racache_shutdown();
nfs4_state_shutdown();
printk(KERN_WARNING "nfsd: last server has exited\n");
if (killsig != SIG_NOCLEAN) {
printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
printk(KERN_WARNING "nfsd: last server has exited, flushing export "
"cache\n");
nfsd_export_flush();
}
}
void nfsd_reset_versions(void)
......@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
}
}
int nfsd_create_serv(void)
{
int err = 0;
lock_kernel();
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
svc_get(nfsd_serv);
unlock_kernel();
return 0;
}
if (nfsd_max_blksize == 0) {
......@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
}
atomic_set(&nfsd_busy, 0);
nfsd_serv = svc_create_pooled(&nfsd_program,
nfsd_max_blksize,
nfsd_last_thread,
nfsd, SIG_NOCLEAN, THIS_MODULE);
nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
nfsd_last_thread, nfsd, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
unlock_kernel();
do_gettimeofday(&nfssvc_boot); /* record boot time */
return err;
}
......@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
int tot = 0;
int err = 0;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv == NULL || n <= 0)
return 0;
......@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
nthreads[0] = 1;
/* apply the new numbers */
lock_kernel();
svc_get(nfsd_serv);
for (i = 0; i < n; i++) {
err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
......@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
break;
}
svc_destroy(nfsd_serv);
unlock_kernel();
return err;
}
......@@ -335,7 +344,7 @@ nfsd_svc(unsigned short port, int nrservs)
{
int error;
lock_kernel();
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
......@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
failure:
svc_destroy(nfsd_serv); /* Release server */
out:
unlock_kernel();
mutex_unlock(&nfsd_mutex);
return error;
}
......@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
/*
* This is the NFS server kernel thread
*/
static void
nfsd(struct svc_rqst *rqstp)
static int
nfsd(void *vrqstp)
{
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct fs_struct *fsp;
int err;
sigset_t shutdown_mask, allowed_mask;
int err, preverr = 0;
/* Lock module and set up kernel thread */
lock_kernel();
daemonize("nfsd");
mutex_lock(&nfsd_mutex);
/* After daemonize() this kernel thread shares current->fs
/* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
fsp = copy_fs_struct(current->fs);
......@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
current->fs = fsp;
current->fs->umask = 0;
siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
siginitsetinv(&allowed_mask, ALLOWED_SIGS);
/*
* thread is spawned with all signals set to SIG_IGN, re-enable
* the ones that will bring down the thread
*/
allow_signal(SIGKILL);
allow_signal(SIGHUP);
allow_signal(SIGINT);
allow_signal(SIGQUIT);
nfsdstats.th_cnt++;
rqstp->rq_task = current;
unlock_kernel();
mutex_unlock(&nfsd_mutex);
/*
* We want less throttling in balance_dirty_pages() so that nfs to
......@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
* The main request loop
*/
for (;;) {
/* Block all but the shutdown signals */
sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
/*
* Find a socket with data available and call its
* recvfrom routine.
*/
while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
if (err < 0)
if (err == -EINTR)
break;
else if (err < 0) {
if (err != preverr) {
printk(KERN_WARNING "%s: unexpected error "
"from svc_recv (%d)\n", __func__, -err);
preverr = err;
}
schedule_timeout_uninterruptible(HZ);
continue;
}
update_thread_usage(atomic_read(&nfsd_busy));
atomic_inc(&nfsd_busy);
/* Lock the export hash tables for reading. */
exp_readlock();
/* Process request with signals blocked. */
sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
svc_process(rqstp);
/* Unlock export hash tables */
......@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
atomic_dec(&nfsd_busy);
}
if (err != -EINTR) {
printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
} else {
unsigned int signo;
for (signo = 1; signo <= _NSIG; signo++)
if (sigismember(&current->pending.signal, signo) &&
!sigismember(&current->blocked, signo))
break;
killsig = signo;
}
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);
lock_kernel();
mutex_lock(&nfsd_mutex);
nfsdstats.th_cnt --;
out:
......@@ -486,8 +489,9 @@ nfsd(struct svc_rqst *rqstp)
svc_exit_thread(rqstp);
/* Release module */
unlock_kernel();
mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
return 0;
}
static __be32 map_new_errors(u32 vers, __be32 nfserr)
......
......@@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
/* Obtain dentry and export. */
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
......@@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
{
struct dentry *dentry;
struct inode *inode;
int accmode = MAY_SATTR;
int accmode = NFSD_MAY_SATTR;
int ftype = 0;
__be32 err;
int host_err;
int size_change = 0;
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
......@@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*/
if (iap->ia_valid & ATTR_SIZE) {
if (iap->ia_size < inode->i_size) {
err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
err = nfsd_permission(rqstp, fhp->fh_export, dentry,
NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
}
......@@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int flags = 0;
/* Get inode */
error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
if (error)
return error;
......@@ -563,20 +564,20 @@ struct accessmap {
int how;
};
static struct accessmap nfs3_regaccess[] = {
{ NFS3_ACCESS_READ, MAY_READ },
{ NFS3_ACCESS_EXECUTE, MAY_EXEC },
{ NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC },
{ NFS3_ACCESS_EXTEND, MAY_WRITE },
{ NFS3_ACCESS_READ, NFSD_MAY_READ },
{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
{ 0, 0 }
};
static struct accessmap nfs3_diraccess[] = {
{ NFS3_ACCESS_READ, MAY_READ },
{ NFS3_ACCESS_LOOKUP, MAY_EXEC },
{ NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC },
{ NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE },
{ NFS3_ACCESS_DELETE, MAY_REMOVE },
{ NFS3_ACCESS_READ, NFSD_MAY_READ },
{ NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
{ 0, 0 }
};
......@@ -589,10 +590,10 @@ static struct accessmap nfs3_anyaccess[] = {
* mainly at mode bits, and we make sure to ignore read-only
* filesystem checks
*/
{ NFS3_ACCESS_READ, MAY_READ },
{ NFS3_ACCESS_EXECUTE, MAY_EXEC },
{ NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS },
{ NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS },
{ NFS3_ACCESS_READ, NFSD_MAY_READ },
{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
{ 0, 0 }
};
......@@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
u32 query, result = 0, sresult = 0;
__be32 error;
error = fh_verify(rqstp, fhp, 0, MAY_NOP);
error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (error)
goto out;
......@@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*/
err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
......@@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* or any access when mandatory locking enabled
*/
err = nfserr_perm;
if (IS_APPEND(inode) && (access & MAY_WRITE))
if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
goto out;
/*
* We must ignore files (but only files) which might have mandatory
......@@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* Check to see if there are any leases on this file.
* This may block while leases are broken.
*/
host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
if (host_err == -EWOULDBLOCK)
host_err = -ETIMEDOUT;
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
if (access & MAY_WRITE) {
if (access & MAY_READ)
if (access & NFSD_MAY_WRITE) {
if (access & NFSD_MAY_READ)
flags = O_RDWR|O_LARGEFILE;
else
flags = O_WRONLY|O_LARGEFILE;
......@@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
MAY_READ|MAY_OWNER_OVERRIDE);
NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
......@@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
MAY_WRITE|MAY_OWNER_OVERRIDE);
NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
......@@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if ((u64)count > ~(u64)offset)
return nfserr_inval;
if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
return err;
if (EX_ISSYNC(fhp->fh_export)) {
if (file->f_op && file->f_op->fsync) {
......@@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (isdotent(fname, flen))
goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
......@@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
err = nfserr_inval;
if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
type);
goto out;
}
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
/*
* Get the dir op function pointer.
*/
err = 0;
switch (type) {
case S_IFREG:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
break;
case S_IFDIR:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
default:
printk("nfsd: bad file type %o in nfsd_create\n", type);
host_err = -EINVAL;
goto out_nfserr;
}
if (host_err < 0) {
mnt_drop_write(fhp->fh_export->ex_path.mnt);
......@@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
write_inode_now(dchild->d_inode, 1);
}
err2 = nfsd_create_setattr(rqstp, resfhp, iap);
if (err2)
err = err2;
......@@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
......@@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
__be32 err;
int host_err;
err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
if (err)
goto out;
......@@ -1526,7 +1525,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (isdotent(fname, flen))
goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
fh_lock(fhp);
......@@ -1591,10 +1590,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
__be32 err;
int host_err;
err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
if (err)
goto out;
......@@ -1661,10 +1660,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
__be32 err;
int host_err;
err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
......@@ -1768,7 +1767,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
err = nfserr_acces;
if (!flen || isdotent(fname, flen))
goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
......@@ -1834,7 +1833,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
struct file *file;
loff_t offset = *offsetp;
err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
if (err)
goto out;
......@@ -1875,7 +1874,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
__be32
nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
{
__be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
__be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (!err && vfs_statfs(fhp->fh_dentry,stat))
err = nfserr_io;
return err;
......@@ -1896,18 +1895,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
struct inode *inode = dentry->d_inode;
int err;
if (acc == MAY_NOP)
if (acc == NFSD_MAY_NOP)
return 0;
#if 0
dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
acc,
(acc & MAY_READ)? " read" : "",
(acc & MAY_WRITE)? " write" : "",
(acc & MAY_EXEC)? " exec" : "",
(acc & MAY_SATTR)? " sattr" : "",
(acc & MAY_TRUNC)? " trunc" : "",
(acc & MAY_LOCK)? " lock" : "",
(acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
(acc & NFSD_MAY_READ)? " read" : "",
(acc & NFSD_MAY_WRITE)? " write" : "",
(acc & NFSD_MAY_EXEC)? " exec" : "",
(acc & NFSD_MAY_SATTR)? " sattr" : "",
(acc & NFSD_MAY_TRUNC)? " trunc" : "",
(acc & NFSD_MAY_LOCK)? " lock" : "",
(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
IS_APPEND(inode)? " append" : "",
......@@ -1920,18 +1919,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
* system. But if it is IRIX doing check on write-access for a
* device special file, we ignore rofs.
*/
if (!(acc & MAY_LOCAL_ACCESS))
if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
if (!(acc & NFSD_MAY_LOCAL_ACCESS))
if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
if (exp_rdonly(rqstp, exp) ||
__mnt_is_readonly(exp->ex_path.mnt))
return nfserr_rofs;
if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
return nfserr_perm;
}
if ((acc & MAY_TRUNC) && IS_APPEND(inode))
if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
if (acc & MAY_LOCK) {
if (acc & NFSD_MAY_LOCK) {
/* If we cannot rely on authentication in NLM requests,
* just allow locks, otherwise require read permission, or
* ownership
......@@ -1939,7 +1938,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
if (exp->ex_flags & NFSEXP_NOAUTHNLM)
return 0;
else
acc = MAY_READ | MAY_OWNER_OVERRIDE;
acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
}
/*
* The file owner always gets access permission for accesses that
......@@ -1955,15 +1954,16 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
* We must trust the client to do permission checking - using "ACCESS"
* with NFSv3.
*/
if ((acc & MAY_OWNER_OVERRIDE) &&
if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
inode->i_uid == current->fsuid)
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
acc == (MAY_READ | MAY_OWNER_OVERRIDE))
acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
err = permission(inode, MAY_EXEC, NULL);
return err? nfserrno(err) : 0;
......
......@@ -918,12 +918,12 @@ struct file_lock {
struct list_head fl_link; /* doubly linked list of all locks */
struct list_head fl_block; /* circular list of blocked processes */
fl_owner_t fl_owner;
unsigned char fl_flags;
unsigned char fl_type;
unsigned int fl_pid;
struct pid *fl_nspid;
wait_queue_head_t fl_wait;
struct file *fl_file;
unsigned char fl_flags;
unsigned char fl_type;
loff_t fl_start;
loff_t fl_end;
......
......@@ -200,10 +200,12 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
* Server-side lock handling
*/
__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_lock *, int, struct nlm_cookie *);
struct nlm_host *, struct nlm_lock *, int,
struct nlm_cookie *);
__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *);
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
unsigned long nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
......@@ -224,7 +226,7 @@ void nlmsvc_invalidate_all(void);
* Cluster failover support
*/
int nlmsvc_unlock_all_by_sb(struct super_block *sb);
int nlmsvc_unlock_all_by_ip(__be32 server_addr);
int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
......
......@@ -65,9 +65,6 @@
#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
#define NFS4_ACE_OWNER 0x00000080
#define NFS4_ACE_GROUP 0x00000100
#define NFS4_ACE_EVERYONE 0x00000200
#define NFS4_ACE_READ_DATA 0x00000001
#define NFS4_ACE_LIST_DIRECTORY 0x00000001
......
......@@ -28,20 +28,20 @@
#define NFSD_SUPPORTED_MINOR_VERSION 0
/*
* Special flags for nfsd_permission. These must be different from MAY_READ,
* MAY_WRITE, and MAY_EXEC.
* Flags for nfsd_permission
*/
#define MAY_NOP 0
#define MAY_SATTR 8
#define MAY_TRUNC 16
#define MAY_LOCK 32
#define MAY_OWNER_OVERRIDE 64
#define MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
#if (MAY_SATTR | MAY_TRUNC | MAY_LOCK | MAY_OWNER_OVERRIDE | MAY_LOCAL_ACCESS) & (MAY_READ | MAY_WRITE | MAY_EXEC)
# error "please use a different value for MAY_SATTR or MAY_TRUNC or MAY_LOCK or MAY_LOCAL_ACCESS or MAY_OWNER_OVERRIDE."
#endif
#define MAY_CREATE (MAY_EXEC|MAY_WRITE)
#define MAY_REMOVE (MAY_EXEC|MAY_WRITE|MAY_TRUNC)
#define NFSD_MAY_NOP 0
#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
#define NFSD_MAY_READ 4 /* == MAY_READ */
#define NFSD_MAY_SATTR 8
#define NFSD_MAY_TRUNC 16
#define NFSD_MAY_LOCK 32
#define NFSD_MAY_OWNER_OVERRIDE 64
#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
/*
* Callback function for readdir
......@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
extern struct svc_program nfsd_program;
extern struct svc_version nfsd_version2, nfsd_version3,
nfsd_version4;
extern struct mutex nfsd_mutex;
extern struct svc_serv *nfsd_serv;
extern struct seq_operations nfs_exports_op;
......
......@@ -98,8 +98,6 @@ struct nfs4_callback {
u32 cb_ident;
/* RPC client info */
atomic_t cb_set; /* successful CB_NULL call */
struct rpc_program cb_program;
struct rpc_stat cb_stat;
struct rpc_clnt * cb_client;
};
......
......@@ -51,6 +51,9 @@ struct krb5_ctx {
extern spinlock_t krb5_seq_lock;
/* The length of the Kerberos GSS token header */
#define GSS_KRB5_TOK_HDR_LEN (16)
#define KG_TOK_MIC_MSG 0x0101
#define KG_TOK_WRAP_MSG 0x0201
......
......@@ -22,7 +22,7 @@
/*
* This is the RPC server thread function prototype
*/
typedef void (*svc_thread_fn)(struct svc_rqst *);
typedef int (*svc_thread_fn)(void *);
/*
*
......@@ -80,7 +80,6 @@ struct svc_serv {
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
int sv_kill_signal; /* signal to kill threads */
};
/*
......@@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
void (*shutdown)(struct svc_serv*),
svc_thread_fn, int sig, struct module *);
void (*shutdown)(struct svc_serv*), svc_thread_fn,
struct module *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
void svc_destroy(struct svc_serv *);
int svc_process(struct svc_rqst *);
......
......@@ -72,7 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
*/
struct svc_rdma_op_ctxt {
struct svc_rdma_op_ctxt *read_hdr;
struct list_head free_list;
int hdr_count;
struct xdr_buf arg;
struct list_head dto_q;
enum ib_wr_opcode wr_op;
......@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
struct page *pages[RPCSVC_MAXPAGES];
};
/*
* NFS_ requests are mapped on the client side by the chunk lists in
* the RPCRDMA header. During the fetching of the RPC from the client
* and the writing of the reply to the client, the memory in the
* client and the memory in the server must be mapped as contiguous
* vaddr/len for access by the hardware. These data strucures keep
* these mappings.
*
* For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
* 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
* 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
* mapping of the reply.
*/
struct svc_rdma_chunk_sge {
int start; /* sge no for this chunk */
int count; /* sge count for this chunk */
};
struct svc_rdma_req_map {
unsigned long count;
union {
struct kvec sge[RPCSVC_MAXPAGES];
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
};
};
#define RDMACTXT_F_LAST_CTXT 2
struct svcxprt_rdma {
......@@ -93,7 +118,6 @@ struct svcxprt_rdma {
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
wait_queue_head_t sc_read_wait;
int sc_max_sge;
int sc_sq_depth; /* Depth of SQ */
......@@ -104,12 +128,8 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
atomic_t sc_dma_used;
atomic_t sc_ctxt_used;
struct list_head sc_ctxt_free;
int sc_ctxt_cnt;
int sc_ctxt_bump;
int sc_ctxt_max;
spinlock_t sc_ctxt_lock;
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
......@@ -173,6 +193,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern struct svc_xprt_class svc_rdma_class;
......
......@@ -5,12 +5,12 @@
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
gss_mech_switch.o svcauth_gss.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
gss_krb5_seqnum.o gss_krb5_wrap.o
gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
......
......@@ -83,8 +83,6 @@ krb5_encrypt(
return ret;
}
EXPORT_SYMBOL(krb5_encrypt);
u32
krb5_decrypt(
struct crypto_blkcipher *tfm,
......@@ -118,8 +116,6 @@ krb5_decrypt(
return ret;
}
EXPORT_SYMBOL(krb5_decrypt);
static int
checksummer(struct scatterlist *sg, void *data)
{
......@@ -161,8 +157,6 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
return err ? GSS_S_FAILURE : 0;
}
EXPORT_SYMBOL(make_checksum);
struct encryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
struct blkcipher_desc desc;
......@@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
return ret;
}
EXPORT_SYMBOL(gss_encrypt_xdr_buf);
struct decryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
struct blkcipher_desc desc;
......@@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
}
EXPORT_SYMBOL(gss_decrypt_xdr_buf);
......@@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr, *krb5_hdr, *msg_start;
unsigned char *ptr, *msg_start;
s32 now;
u32 seq_send;
......@@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
now = get_seconds();
token->len = g_token_size(&ctx->mech_used, 24);
token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
ptr = token->data;
g_make_token_header(&ctx->mech_used, 24, &ptr);
g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
*ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
*ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
/* ptr now at header described in rfc 1964, section 1.2.1: */
ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr - 2;
msg_start = krb5_hdr + 24;
msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(krb5_hdr + 4, 0xff, 4);
*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(ptr + 4, 0xff, 4);
if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send++;
spin_unlock(&krb5_seq_lock);
if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
seq_send, krb5_hdr + 16, krb5_hdr + 8))
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
ptr + 8))
return GSS_S_FAILURE;
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
......
......@@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
read_token->len))
return GSS_S_DEFECTIVE_TOKEN;
if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
(*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
(ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
signalg = ptr[0] + (ptr[1] << 8);
signalg = ptr[2] + (ptr[3] << 8);
if (signalg != SGN_ALG_DES_MAC_MD5)
return GSS_S_DEFECTIVE_TOKEN;
sealalg = ptr[2] + (ptr[3] << 8);
sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != SEAL_ALG_NONE)
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
return GSS_S_FAILURE;
if (memcmp(md5cksum.data + 8, ptr + 14, 8))
if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
......@@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
/* do sequencing checks */
if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
return GSS_S_FAILURE;
if ((ctx->initiate && direction != 0xff) ||
......
......@@ -87,8 +87,8 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
return 0;
}
static inline void
make_confounder(char *p, int blocksize)
static void
make_confounder(char *p, u32 conflen)
{
static u64 i = 0;
u64 *q = (u64 *)p;
......@@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize)
* uniqueness would mean worrying about atomicity and rollover, and I
* don't care enough. */
BUG_ON(blocksize != 8);
*q = i++;
/* initialize to random value */
if (i == 0) {
i = random32();
i = (i << 32) | random32();
}
switch (conflen) {
case 16:
*q++ = i++;
/* fall through */
case 8:
*q++ = i++;
break;
default:
BUG();
}
}
/* Assumptions: the head and tail of inbuf are ours to play with.
......@@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
int blocksize = 0, plainlen;
unsigned char *ptr, *krb5_hdr, *msg_start;
unsigned char *ptr, *msg_start;
s32 now;
int headlen;
struct page **tmp_pages;
......@@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
buf->len += headlen;
BUG_ON((buf->len - offset - headlen) % blocksize);
g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr);
g_make_token_header(&kctx->mech_used,
GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
*ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
*ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
/* ptr now at header described in rfc 1964, section 1.2.1: */
ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr - 2;
msg_start = krb5_hdr + 24;
msg_start = ptr + 24;
*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(krb5_hdr + 4, 0xff, 4);
*(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(ptr + 4, 0xff, 4);
*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
make_confounder(msg_start, blocksize);
/* XXXJBF: UGH!: */
tmp_pages = buf->pages;
buf->pages = pages;
if (make_checksum("md5", krb5_hdr, 8, buf,
if (make_checksum("md5", ptr, 8, buf,
offset + headlen - blocksize, &md5cksum))
return GSS_S_FAILURE;
buf->pages = tmp_pages;
......@@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
spin_lock(&krb5_seq_lock);
seq_send = kctx->seq_send++;
......@@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
seq_send, krb5_hdr + 16, krb5_hdr + 8)))
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
return GSS_S_FAILURE;
if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
......@@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
buf->len - offset))
return GSS_S_DEFECTIVE_TOKEN;
if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
(*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
(ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
/* get the sign and seal algorithms */
signalg = ptr[0] + (ptr[1] << 8);
signalg = ptr[2] + (ptr[3] << 8);
if (signalg != SGN_ALG_DES_MAC_MD5)
return GSS_S_DEFECTIVE_TOKEN;
sealalg = ptr[2] + (ptr[3] << 8);
sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != SEAL_ALG_DES)
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
if (gss_decrypt_xdr_buf(kctx->enc, buf,
ptr + 22 - (unsigned char *)buf->head[0].iov_base))
ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
return GSS_S_DEFECTIVE_TOKEN;
if (make_checksum("md5", ptr - 2, 8, buf,
ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
if (make_checksum("md5", ptr, 8, buf,
ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
if (memcmp(md5cksum.data + 8, ptr + 14, 8))
if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
......@@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
/* do sequencing checks */
if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
&seqnum))
if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
&direction, &seqnum))
return GSS_S_BAD_SIG;
if ((kctx->initiate && direction != 0xff) ||
......@@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
* better to copy and encrypt at the same time. */
blocksize = crypto_blkcipher_blocksize(kctx->enc);
data_start = ptr + 22 + blocksize;
data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
orig_start = buf->head[0].iov_base + offset;
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
memmove(orig_start, data_start, data_len);
......
......@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
......@@ -291,15 +292,14 @@ svc_pool_map_put(void)
/*
* Set the current thread's cpus_allowed mask so that it
* Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
*
* Returns 1 and fills in oldmask iff a cpumask was applied.
*/
static inline int
svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
static inline void
svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
{
struct svc_pool_map *m = &svc_pool_map;
unsigned int node = m->pool_to[pidx];
/*
* The caller checks for sv_nrpools > 1, which
......@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
*/
BUG_ON(m->count == 0);
switch (m->mode)
{
default:
return 0;
switch (m->mode) {
case SVC_POOL_PERCPU:
{
unsigned int cpu = m->pool_to[pidx];
*oldmask = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
return 1;
set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
break;
}
case SVC_POOL_PERNODE:
{
unsigned int node = m->pool_to[pidx];
node_to_cpumask_ptr(nodecpumask, node);
*oldmask = current->cpus_allowed;
set_cpus_allowed_ptr(current, nodecpumask);
return 1;
set_cpus_allowed_ptr(task, nodecpumask);
break;
}
}
}
......@@ -443,7 +434,7 @@ EXPORT_SYMBOL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, int sig, struct module *mod)
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
......@@ -452,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
if (serv != NULL) {
serv->sv_function = func;
serv->sv_kill_signal = sig;
serv->sv_module = mod;
}
......@@ -461,7 +451,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
EXPORT_SYMBOL(svc_create_pooled);
/*
* Destroy an RPC service. Should be called with the BKL held
* Destroy an RPC service. Should be called with appropriate locking to
* protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
*/
void
svc_destroy(struct svc_serv *serv)
......@@ -577,46 +568,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
}
EXPORT_SYMBOL(svc_prepare_thread);
/*
* Create a thread in the given pool. Caller must hold BKL.
* On a NUMA or SMP machine, with a multi-pool serv, the thread
* will be restricted to run on the cpus belonging to the pool.
*/
static int
__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
struct svc_pool *pool)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
int have_oldmask = 0;
cpumask_t uninitialized_var(oldmask);
rqstp = svc_prepare_thread(serv, pool);
if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
goto out;
}
if (serv->sv_nrpools > 1)
have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
error = kernel_thread((int (*)(void *)) func, rqstp, 0);
if (have_oldmask)
set_cpus_allowed(current, oldmask);
if (error < 0)
goto out_thread;
svc_sock_update_bufs(serv);
error = 0;
out:
return error;
out_thread:
svc_exit_thread(rqstp);
goto out;
}
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
*/
......@@ -674,7 +625,7 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Must be called with a svc_get() reference and
* the BKL held.
* the BKL or another lock to protect access to svc_serv fields.
*
* Destroying threads relies on the service threads filling in
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
......@@ -686,7 +637,9 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
struct task_struct *victim;
struct svc_rqst *rqstp;
struct task_struct *task;
struct svc_pool *chosen_pool;
int error = 0;
unsigned int state = serv->sv_nrthreads-1;
......@@ -702,18 +655,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
/* create new threads */
while (nrservs > 0) {
nrservs--;
chosen_pool = choose_pool(serv, pool, &state);
rqstp = svc_prepare_thread(serv, chosen_pool);
if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
break;
}
__module_get(serv->sv_module);
error = __svc_create_thread(serv->sv_function, serv,
choose_pool(serv, pool, &state));
if (error < 0) {
task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
if (IS_ERR(task)) {
error = PTR_ERR(task);
module_put(serv->sv_module);
svc_exit_thread(rqstp);
break;
}
rqstp->rq_task = task;
if (serv->sv_nrpools > 1)
svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
svc_sock_update_bufs(serv);
wake_up_process(task);
}
/* destroy old threads */
while (nrservs < 0 &&
(victim = choose_victim(serv, pool, &state)) != NULL) {
send_sig(serv->sv_kill_signal, victim, 1);
(task = choose_victim(serv, pool, &state)) != NULL) {
send_sig(SIGINT, task, 1);
nrservs++;
}
......@@ -722,7 +691,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
EXPORT_SYMBOL(svc_set_num_threads);
/*
* Called from a server thread as it's exiting. Caller must hold BKL.
* Called from a server thread as it's exiting. Caller must hold the BKL or
* the "service mutex", whichever is appropriate for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
......
......@@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
/* Temporary NFS request map and context caches */
struct kmem_cache *svc_rdma_map_cachep;
struct kmem_cache *svc_rdma_ctxt_cachep;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
......@@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = {
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
flush_scheduled_work();
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
kmem_cache_destroy(svc_rdma_map_cachep);
kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
......@@ -255,9 +262,37 @@ int svc_rdma_init(void)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
/* Create the temporary map cache */
svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
sizeof(struct svc_rdma_req_map),
0,
SLAB_HWCACHE_ALIGN,
NULL);
if (!svc_rdma_map_cachep) {
printk(KERN_INFO "Could not allocate map cache.\n");
goto err0;
}
/* Create the temporary context cache */
svc_rdma_ctxt_cachep =
kmem_cache_create("svc_rdma_ctxt_cache",
sizeof(struct svc_rdma_op_ctxt),
0,
SLAB_HWCACHE_ALIGN,
NULL);
if (!svc_rdma_ctxt_cachep) {
printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
goto err1;
}
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
err1:
kmem_cache_destroy(svc_rdma_map_cachep);
err0:
unregister_sysctl_table(svcrdma_table_header);
return -ENOMEM;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
......
......@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0;
}
struct chunk_sge {
int start; /* sge no for this chunk */
int count; /* sge count for this chunk */
};
/* Encode a read-chunk-list as an array of IB SGE
*
* Assumptions:
......@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
struct ib_sge *sge,
struct chunk_sge *ch_sge_ary,
struct svc_rdma_req_map *rpl_map,
struct svc_rdma_req_map *chl_map,
int ch_count,
int byte_count)
{
......@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->sge[0].length = head->count; /* save count of hdr pages */
head->hdr_count = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = ch_bytes;
head->arg.len = rqstp->rq_arg.len + ch_bytes;
head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
head->count++;
ch_sge_ary[0].start = 0;
chl_map->ch[0].start = 0;
while (byte_count) {
rpl_map->sge[sge_no].iov_base =
page_address(rqstp->rq_arg.pages[page_no]) + page_off;
sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
rqstp->rq_arg.pages[page_no],
page_off, sge_bytes,
DMA_FROM_DEVICE);
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
rpl_map->sge[sge_no].iov_len = sge_bytes;
/*
* Don't bump head->count here because the same page
* may be used by multiple SGE.
......@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
* SGE, move to the next SGE
*/
if (ch_bytes == 0) {
ch_sge_ary[ch_no].count =
sge_no - ch_sge_ary[ch_no].start;
chl_map->ch[ch_no].count =
sge_no - chl_map->ch[ch_no].start;
ch_no++;
ch++;
ch_sge_ary[ch_no].start = sge_no;
chl_map->ch[ch_no].start = sge_no;
ch_bytes = ch->rc_target.rs_length;
/* If bytes remaining account for next chunk */
if (byte_count) {
......@@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
return sge_no;
}
static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
struct ib_sge *sge,
static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt,
struct kvec *vec,
u64 *sgl_offset,
int count)
{
int i;
ctxt->count = count;
ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
ctxt->sge[i].addr = sge[i].addr;
ctxt->sge[i].length = sge[i].length;
*sgl_offset = *sgl_offset + sge[i].length;
atomic_inc(&xprt->sc_dma_used);
ctxt->sge[i].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
vec[i].iov_base, vec[i].iov_len,
DMA_FROM_DEVICE);
ctxt->sge[i].length = vec[i].iov_len;
ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
*sgl_offset = *sgl_offset + vec[i].iov_len;
}
}
......@@ -282,34 +280,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct ib_send_wr read_wr;
int err = 0;
int ch_no;
struct ib_sge *sge;
int ch_count;
int byte_count;
int sge_count;
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary;
struct svc_rdma_req_map *rpl_map;
struct svc_rdma_req_map *chl_map;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
/* Allocate temporary contexts to keep SGE */
BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
tmp_ch_ctxt = svc_rdma_get_context(xprt);
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
/* Allocate temporary reply and chunk maps */
rpl_map = svc_rdma_get_req_map();
chl_map = svc_rdma_get_req_map();
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary,
rpl_map, chl_map,
ch_count, byte_count);
sgl_offset = 0;
ch_no = 0;
......@@ -331,14 +324,15 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
read_wr.wr.rdma.remote_addr =
get_unaligned(&(ch->rc_target.rs_offset)) +
sgl_offset;
read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
read_wr.sg_list = ctxt->sge;
read_wr.num_sge =
rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
rdma_set_ctxt_sge(xprt, ctxt,
&rpl_map->sge[chl_map->ch[ch_no].start],
&sgl_offset,
read_wr.num_sge);
if (((ch+1)->rc_discrim == 0) &&
(read_wr.num_sge == ch_sge_ary[ch_no].count)) {
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
/*
* Mark the last RDMA_READ with a bit to
* indicate all RPC data has been fetched from
......@@ -358,9 +352,9 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
}
atomic_inc(&rdma_stat_read);
if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
ch_sge_ary[ch_no].count -= read_wr.num_sge;
ch_sge_ary[ch_no].start += read_wr.num_sge;
if (read_wr.num_sge < chl_map->ch[ch_no].count) {
chl_map->ch[ch_no].count -= read_wr.num_sge;
chl_map->ch[ch_no].start += read_wr.num_sge;
goto next_sge;
}
sgl_offset = 0;
......@@ -368,8 +362,8 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
}
out:
svc_rdma_put_context(tmp_sge_ctxt, 0);
svc_rdma_put_context(tmp_ch_ctxt, 0);
svc_rdma_put_req_map(rpl_map);
svc_rdma_put_req_map(chl_map);
/* Detach arg pages. svc_recv will replenish them */
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
......@@ -399,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
......
......@@ -63,52 +63,44 @@
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
* The max SGE we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
* reserves a page for both the request and the reply header, and this
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
static void xdr_to_sge(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct ib_sge *sge,
int *sge_count)
struct svc_rdma_req_map *vec)
{
/* Max we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header
*/
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 byte_count = xdr->len;
u32 sge_bytes;
u32 page_bytes;
int page_off;
u32 page_off;
int page_no;
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
/* Head SGE */
sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
xdr->head[0].iov_base,
xdr->head[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
sge_no++;
/* pages SGE */
page_no = 0;
page_bytes = xdr->page_len;
page_off = xdr->page_base;
while (byte_count && page_bytes) {
sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
sge[sge_no].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
xdr->pages[page_no], page_off,
sge_bytes, DMA_TO_DEVICE);
sge_bytes = min(sge_bytes, page_bytes);
byte_count -= sge_bytes;
while (page_bytes) {
vec->sge[sge_no].iov_base =
page_address(xdr->pages[page_no]) + page_off;
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
page_bytes -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
vec->sge[sge_no].iov_len = sge_bytes;
sge_no++;
page_no++;
......@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
}
/* Tail SGE */
if (byte_count && xdr->tail[0].iov_len) {
sge[sge_no].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
xdr->tail[0].iov_base,
xdr->tail[0].iov_len,
DMA_TO_DEVICE);
sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
byte_count -= sge_bytes;
sge[sge_no].length = sge_bytes;
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
if (xdr->tail[0].iov_len) {
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
sge_no++;
}
BUG_ON(sge_no > sge_max);
BUG_ON(byte_count != 0);
*sge_count = sge_no;
return sge;
vec->count = sge_no;
}
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to,
u32 xdr_off, int write_len,
struct ib_sge *xdr_sge, int sge_count)
struct svc_rdma_req_map *vec)
{
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct ib_send_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
......@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
int sge_off;
int bc;
struct svc_rdma_op_ctxt *ctxt;
int ret = 0;
BUG_ON(sge_count > RPCSVC_MAXPAGES);
BUG_ON(vec->count > RPCSVC_MAXPAGES);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, xdr_sge=%p, sge_count=%d\n",
"write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off,
write_len, xdr_sge, sge_count);
write_len, vec->sge, vec->count);
ctxt = svc_rdma_get_context(xprt);
ctxt->count = 0;
tmp_sge_ctxt = svc_rdma_get_context(xprt);
sge = tmp_sge_ctxt->sge;
ctxt->direction = DMA_TO_DEVICE;
sge = ctxt->sge;
/* Find the SGE associated with xdr_off */
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
xdr_sge_no++) {
if (xdr_sge[xdr_sge_no].length > bc)
if (vec->sge[xdr_sge_no].iov_len > bc)
break;
bc -= xdr_sge[xdr_sge_no].length;
bc -= vec->sge[xdr_sge_no].iov_len;
}
sge_off = bc;
......@@ -180,21 +158,28 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no = 0;
/* Copy the remaining SGE */
while (bc != 0 && xdr_sge_no < sge_count) {
sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
while (bc != 0 && xdr_sge_no < vec->count) {
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_bytes = min((size_t)bc,
(size_t)(xdr_sge[xdr_sge_no].length-sge_off));
(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
sge[sge_no].length = sge_bytes;
atomic_inc(&xprt->sc_dma_used);
sge[sge_no].addr =
ib_dma_map_single(xprt->sc_cm_id->device,
(void *)
vec->sge[xdr_sge_no].iov_base + sge_off,
sge_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(sge[sge_no].addr))
goto err;
sge_off = 0;
sge_no++;
ctxt->count++;
xdr_sge_no++;
bc -= sge_bytes;
}
BUG_ON(bc != 0);
BUG_ON(xdr_sge_no > sge_count);
BUG_ON(xdr_sge_no > vec->count);
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
......@@ -209,21 +194,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Post It */
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr)) {
svc_rdma_put_context(ctxt, 1);
if (svc_rdma_send(xprt, &write_wr))
goto err;
return 0;
err:
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
ret = -EIO;
}
svc_rdma_put_context(tmp_sge_ctxt, 0);
return ret;
return -EIO;
}
static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
......@@ -269,8 +253,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
vec);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
......@@ -292,8 +275,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct ib_sge *sge,
int sge_count)
struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
......@@ -341,8 +323,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off,
xdr_off,
this_write,
sge,
sge_count);
vec);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
......@@ -380,7 +361,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt,
int sge_count,
struct svc_rdma_req_map *vec,
int byte_count)
{
struct ib_send_wr send_wr;
......@@ -405,6 +386,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
atomic_inc(&rdma->sc_dma_used);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device,
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
......@@ -413,10 +395,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
/* Determine how many of our SGE are to be transmitted */
for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
sge_bytes = min((size_t)ctxt->sge[sge_no].length,
(size_t)byte_count);
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
atomic_inc(&rdma->sc_dma_used);
ctxt->sge[sge_no].addr =
ib_dma_map_single(rdma->sc_cm_id->device,
vec->sge[sge_no].iov_base,
sge_bytes, DMA_TO_DEVICE);
ctxt->sge[sge_no].length = sge_bytes;
ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
}
BUG_ON(byte_count != 0);
......@@ -428,8 +416,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
/* If there are more pages than SGE, terminate SGE list */
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
}
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
......@@ -473,20 +463,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct ib_sge *sge;
int sge_count = 0;
struct page *res_page;
struct svc_rdma_op_ctxt *ctxt;
struct svc_rdma_req_map *vec;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg);
/* Build an SGE for the XDR */
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
vec = svc_rdma_get_req_map();
xdr_to_sge(rdma, &rqstp->rq_res, vec);
inline_bytes = rqstp->rq_res.len;
......@@ -503,7 +493,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
......@@ -513,7 +503,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
rqstp, sge, sge_count);
rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
......@@ -521,11 +511,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
inline_bytes -= ret;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
svc_rdma_put_req_map(vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
error:
svc_rdma_put_req_map(vec);
svc_rdma_put_context(ctxt, 0);
put_page(res_page);
return ret;
......
......@@ -84,72 +84,39 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
};
static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
{
int target;
int at_least_one = 0;
struct svc_rdma_op_ctxt *ctxt;
target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
xprt->sc_ctxt_max);
spin_lock_bh(&xprt->sc_ctxt_lock);
while (xprt->sc_ctxt_cnt < target) {
xprt->sc_ctxt_cnt++;
spin_unlock_bh(&xprt->sc_ctxt_lock);
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
spin_lock_bh(&xprt->sc_ctxt_lock);
if (ctxt) {
at_least_one = 1;
INIT_LIST_HEAD(&ctxt->free_list);
list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
} else {
/* kmalloc failed...give up for now */
xprt->sc_ctxt_cnt--;
break;
}
}
spin_unlock_bh(&xprt->sc_ctxt_lock);
dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
return at_least_one;
}
/* WR context cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_ctxt_cachep;
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
while (1) {
spin_lock_bh(&xprt->sc_ctxt_lock);
if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
/* Try to bump my cache. */
spin_unlock_bh(&xprt->sc_ctxt_lock);
if (rdma_bump_context_cache(xprt))
continue;
printk(KERN_INFO "svcrdma: sleeping waiting for "
"context memory on xprt=%p\n",
xprt);
ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
if (ctxt)
break;
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
continue;
}
ctxt = list_entry(xprt->sc_ctxt_free.next,
struct svc_rdma_op_ctxt,
free_list);
list_del_init(&ctxt->free_list);
spin_unlock_bh(&xprt->sc_ctxt_lock);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
atomic_inc(&xprt->sc_ctxt_used);
break;
}
return ctxt;
}
static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
{
struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
atomic_dec(&xprt->sc_dma_used);
ib_dma_unmap_single(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
}
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
struct svcxprt_rdma *xprt;
......@@ -161,18 +128,36 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
for (i = 0; i < ctxt->count; i++)
ib_dma_unmap_single(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
spin_lock_bh(&xprt->sc_ctxt_lock);
list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
spin_unlock_bh(&xprt->sc_ctxt_lock);
kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
atomic_dec(&xprt->sc_ctxt_used);
}
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/*
* Temporary NFS req mappings are shared across all transport
* instances. These are short lived and should be bounded by the number
* of concurrent server threads * depth of the SQ.
*/
struct svc_rdma_req_map *svc_rdma_get_req_map(void)
{
struct svc_rdma_req_map *map;
while (1) {
map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
if (map)
break;
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
return map;
}
void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
{
kmem_cache_free(svc_rdma_map_cachep, map);
}
/* ib_cq event handler */
static void cq_event_handler(struct ib_event *event, void *context)
{
......@@ -302,6 +287,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
ctxt->wc_status = wc.status;
ctxt->byte_len = wc.byte_len;
svc_rdma_unmap_dma(ctxt);
if (wc.status != IB_WC_SUCCESS) {
/* Close the transport */
dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
......@@ -351,6 +337,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
xprt = ctxt->xprt;
svc_rdma_unmap_dma(ctxt);
if (wc.status != IB_WC_SUCCESS)
/* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
......@@ -361,10 +348,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
switch (ctxt->wr_op) {
case IB_WR_SEND:
case IB_WR_RDMA_WRITE:
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
......@@ -423,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
tasklet_schedule(&dto_tasklet);
}
static void create_context_cache(struct svcxprt_rdma *xprt,
int ctxt_count, int ctxt_bump, int ctxt_max)
{
struct svc_rdma_op_ctxt *ctxt;
int i;
xprt->sc_ctxt_max = ctxt_max;
xprt->sc_ctxt_bump = ctxt_bump;
xprt->sc_ctxt_cnt = 0;
atomic_set(&xprt->sc_ctxt_used, 0);
INIT_LIST_HEAD(&xprt->sc_ctxt_free);
for (i = 0; i < ctxt_count; i++) {
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt) {
INIT_LIST_HEAD(&ctxt->free_list);
list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
xprt->sc_ctxt_cnt++;
}
}
}
static void destroy_context_cache(struct svcxprt_rdma *xprt)
{
while (!list_empty(&xprt->sc_ctxt_free)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(xprt->sc_ctxt_free.next,
struct svc_rdma_op_ctxt,
free_list);
list_del_init(&ctxt->free_list);
kfree(ctxt);
}
}
static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
int listener)
{
......@@ -473,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_read_complete_lock);
spin_lock_init(&cma_xprt->sc_ctxt_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
cma_xprt->sc_ord = svcrdma_ord;
......@@ -482,21 +437,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
cma_xprt->sc_max_requests = svcrdma_max_requests;
cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
atomic_set(&cma_xprt->sc_sq_count, 0);
atomic_set(&cma_xprt->sc_ctxt_used, 0);
if (!listener) {
int reqs = cma_xprt->sc_max_requests;
create_context_cache(cma_xprt,
reqs << 1, /* starting size */
reqs, /* bump amount */
reqs +
cma_xprt->sc_sq_depth +
RPCRDMA_MAX_THREADS + 1); /* max */
if (list_empty(&cma_xprt->sc_ctxt_free)) {
kfree(cma_xprt);
return NULL;
}
clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
} else
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
return cma_xprt;
......@@ -532,6 +475,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON(sge_no >= xprt->sc_max_sge);
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
atomic_inc(&xprt->sc_dma_used);
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
......@@ -566,7 +510,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
* will call the recvfrom method on the listen xprt which will accept the new
* connection.
*/
static void handle_connect_req(struct rdma_cm_id *new_cma_id)
static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
......@@ -583,6 +527,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
/* Save client advertised inbound read limit for use later in accept. */
newxprt->sc_ord = client_ird;
/* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
......@@ -619,7 +566,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_CONNECT_REQUEST:
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event=%d\n", cma_id, cma_id->context, event->event);
handle_connect_req(cma_id);
handle_connect_req(cma_id,
event->param.conn.responder_resources);
break;
case RDMA_CM_EVENT_ESTABLISHED:
......@@ -793,8 +741,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
(size_t)svcrdma_max_requests);
newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom,
(size_t)svcrdma_ord);
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
if (IS_ERR(newxprt->sc_pd)) {
......@@ -987,7 +939,6 @@ static void __svc_rdma_free(struct work_struct *work)
* cm_id because the device ptr is needed to unmap the dma in
* svc_rdma_put_context.
*/
spin_lock_bh(&rdma->sc_read_complete_lock);
while (!list_empty(&rdma->sc_read_complete_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_read_complete_q.next,
......@@ -996,10 +947,8 @@ static void __svc_rdma_free(struct work_struct *work)
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1);
}
spin_unlock_bh(&rdma->sc_read_complete_lock);
/* Destroy queued, but not processed recv completions */
spin_lock_bh(&rdma->sc_rq_dto_lock);
while (!list_empty(&rdma->sc_rq_dto_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_rq_dto_q.next,
......@@ -1008,10 +957,10 @@ static void __svc_rdma_free(struct work_struct *work)
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1);
}
spin_unlock_bh(&rdma->sc_rq_dto_lock);
/* Warn if we leaked a resource or under-referenced */
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
......@@ -1032,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
/* Destroy the CM ID */
rdma_destroy_id(rdma->sc_cm_id);
destroy_context_cache(rdma);
kfree(rdma);
}
......@@ -1132,6 +1080,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
/* Prepare SGE for local address */
atomic_inc(&xprt->sc_dma_used);
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
sge.lkey = xprt->sc_phys_mr->lkey;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment