Commit a0e7d495 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] kNFSd: Convert nfsd to use a list of pages instead of one big buffer

This means:
  1/ We don't need an order-4 allocation for each nfsd that starts
  2/ We don't need an order-4 allocation in skb_linearize when
     we receive a 32K write request
  3/ It will be easier to incorporate the zero-copy read changes

The pages are handed around using an xdr_buf (instead of svc_buf)
much like the NFS client so future crypto code can use the same
data structure for both client and server.

The code assumes that most requests and replies fit in a single page.
The exceptions are assumed to have some largish 'data' bit, and the
rest must fit in a single page.
The 'data' bits are file data, readdir data, and symlinks.
There must be only one 'data' bit per request.
This is all fine for nfs/nlm.

This isn't complete:
  1/ NFSv4 hasn't been converted yet (it won't compile)
  2/ NFSv3 allows symlinks upto 4096, but the code will only support
     upto about 3800 at the moment
  3/ readdir responses are limited to about 3800.

but I thought that patch was big enough, and the rest can come
later.


This patch introduces vfs_readv and vfs_writev as parallels to
vfs_read and vfs_write.  This means there is a fair bit of
duplication in read_write.c that should probably be tidied up...
parent 335c5fc7
......@@ -216,25 +216,6 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
return p;
}
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
return (buf->len <= buf->buflen);
}
/*
* First, the server side XDR functions
......
......@@ -222,26 +222,6 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
}
/*
* Check buffer bounds after decoding arguments
*/
static int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
return (buf->len <= buf->buflen);
}
/*
* First, the server side XDR functions
*/
......
......@@ -43,11 +43,11 @@ static int nfs3_ftypes[] = {
/*
* Reserve room in the send buffer
*/
static void
svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr)
static inline void
svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{
*ptr = buf->buf + nr;
*len = buf->buflen - buf->len - nr;
*ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
*len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
}
/*
......@@ -150,7 +150,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status, post_op_attr, and path length */
svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy,
svcbuf_reserve(&rqstp->rq_res, &path, &dummy,
1 + NFS3_POST_OP_ATTR_WORDS + 1);
/* Read the symlink. */
......@@ -167,8 +167,7 @@ static int
nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp)
{
u32 * buffer;
int nfserr, avail;
int nfserr;
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh),
......@@ -179,18 +178,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail,
1 + NFS3_POST_OP_ATTR_WORDS + 3);
resp->count = argp->count;
if ((avail << 2) < resp->count)
resp->count = avail << 2;
if (NFSSVC_MAXBLKSIZE < resp->count)
resp->count = NFSSVC_MAXBLKSIZE;
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + argp->count +4);
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
(char *) buffer,
argp->vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
......@@ -220,7 +218,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh,
argp->offset,
argp->data,
argp->vec, argp->vlen,
argp->len,
&resp->committed);
resp->count = argp->count;
......@@ -447,7 +445,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count,
svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
......@@ -490,7 +488,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count,
svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
......
......@@ -13,6 +13,7 @@
#include <linux/spinlock.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svc.h>
......@@ -269,27 +270,6 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
return encode_post_op_attr(rqstp, p, fhp);
}
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
dprintk("nfsd: ressize_check p %p base %p len %d\n",
p, buf->base, buf->buflen);
return (buf->len <= buf->buflen);
}
/*
* XDR decode functions
......@@ -342,11 +322,29 @@ int
nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readargs *args)
{
int len;
int v,pn;
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
return 0;
args->count = ntohl(*p++);
len = args->count = ntohl(*p++);
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
/* set up the iovec */
v=0;
while (len > 0) {
pn = rqstp->rq_resused;
take_page(rqstp);
args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
args->vlen = v;
return xdr_argsize_check(rqstp, p);
}
......@@ -354,17 +352,33 @@ int
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args)
{
int len, v;
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
return 0;
args->count = ntohl(*p++);
args->stable = ntohl(*p++);
args->len = ntohl(*p++);
args->data = (char *) p;
p += XDR_QUADLEN(args->len);
len = args->len = ntohl(*p++);
args->vec[0].iov_base = (void*)p;
args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
v= 0;
while (len > args->vec[v].iov_len) {
len -= args->vec[v].iov_len;
v++;
args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
args->vec[v].iov_len = PAGE_SIZE;
}
args->vec[v].iov_len = len;
args->vlen = v+1;
return xdr_argsize_check(rqstp, p);
return args->count == args->len && args->vec[0].iov_len > 0;
}
int
......@@ -584,9 +598,23 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
*p++ = htonl(resp->count);
*p++ = htonl(resp->eof);
*p++ = htonl(resp->count); /* xdr opaque count */
p += XDR_QUADLEN(resp->count);
}
return xdr_ressize_check(rqstp, p);
xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data aswell */
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to page with tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
}
rqstp->rq_res.len =
rqstp->rq_res.head[0].iov_len+
rqstp->rq_res.page_len+
rqstp->rq_res.tail[0].iov_len;
return 1;
} else
return xdr_ressize_check(rqstp, p);
}
/* WRITE */
......
......@@ -41,7 +41,7 @@ static struct svc_cacherep * lru_tail;
static struct svc_cacherep * nfscache;
static int cache_disabled = 1;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data);
static int nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *vec);
/*
* locking for the reply cache:
......@@ -107,7 +107,7 @@ nfsd_cache_shutdown(void)
for (rp = lru_head; rp; rp = rp->c_lru_next) {
if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
kfree(rp->c_replbuf.buf);
kfree(rp->c_replvec.iov_base);
}
cache_disabled = 1;
......@@ -242,8 +242,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
/* release any buffer */
if (rp->c_type == RC_REPLBUFF) {
kfree(rp->c_replbuf.buf);
rp->c_replbuf.buf = NULL;
kfree(rp->c_replvec.iov_base);
rp->c_replvec.iov_base = NULL;
}
rp->c_type = RC_NOCACHE;
out:
......@@ -272,11 +272,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
case RC_NOCACHE:
break;
case RC_REPLSTAT:
svc_putu32(&rqstp->rq_resbuf, rp->c_replstat);
svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
rtn = RC_REPLY;
break;
case RC_REPLBUFF:
if (!nfsd_cache_append(rqstp, &rp->c_replbuf))
if (!nfsd_cache_append(rqstp, &rp->c_replvec))
goto out; /* should not happen */
rtn = RC_REPLY;
break;
......@@ -308,13 +308,14 @@ void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
{
struct svc_cacherep *rp;
struct svc_buf *resp = &rqstp->rq_resbuf, *cachp;
struct iovec *resv = &rqstp->rq_res.head[0], *cachv;
int len;
if (!(rp = rqstp->rq_cacherep) || cache_disabled)
return;
len = resp->len - (statp - resp->base);
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
......@@ -329,16 +330,16 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
rp->c_replstat = *statp;
break;
case RC_REPLBUFF:
cachp = &rp->c_replbuf;
cachp->buf = (u32 *) kmalloc(len << 2, GFP_KERNEL);
if (!cachp->buf) {
cachv = &rp->c_replvec;
cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
if (!cachv->iov_base) {
spin_lock(&cache_lock);
rp->c_state = RC_UNUSED;
spin_unlock(&cache_lock);
return;
}
cachp->len = len;
memcpy(cachp->buf, statp, len << 2);
cachv->iov_len = len << 2;
memcpy(cachv->iov_base, statp, len << 2);
break;
}
spin_lock(&cache_lock);
......@@ -353,19 +354,20 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
/*
* Copy cached reply to current reply buffer. Should always fit.
* FIXME as reply is in a page, we should just attach the page, and
* keep a refcount....
*/
static int
nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data)
nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *data)
{
struct svc_buf *resp = &rqstp->rq_resbuf;
struct iovec *vec = &rqstp->rq_res.head[0];
if (resp->len + data->len > resp->buflen) {
if (vec->iov_len + data->iov_len > PAGE_SIZE) {
printk(KERN_WARNING "nfsd: cached reply too large (%d).\n",
data->len);
data->iov_len);
return 0;
}
memcpy(resp->buf, data->buf, data->len << 2);
resp->buf += data->len;
resp->len += data->len;
memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
vec->iov_len += data->iov_len;
return 1;
}
......@@ -30,11 +30,11 @@ typedef struct svc_buf svc_buf;
#define NFSDDBG_FACILITY NFSDDBG_PROC
static void
svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr)
static inline void
svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{
*ptr = buf->buf + nr;
*len = buf->buflen - buf->len - nr;
*ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
*len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
}
static int
......@@ -109,7 +109,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status and path length */
svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2);
svcbuf_reserve(&rqstp->rq_res, &path, &dummy, 2);
/* Read the symlink. */
resp->len = NFS_MAXPATHLEN;
......@@ -127,8 +127,7 @@ static int
nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
struct nfsd_readres *resp)
{
u32 * buffer;
int nfserr, avail;
int nfserr;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
......@@ -137,22 +136,21 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
*/
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19);
if ((avail << 2) < argp->count) {
if (NFSSVC_MAXBLKSIZE < argp->count) {
printk(KERN_NOTICE
"oversized read request from %08x:%d (%d bytes)\n",
ntohl(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port),
argp->count);
argp->count = avail << 2;
argp->count = NFSSVC_MAXBLKSIZE;
}
svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
(char *) buffer,
argp->vec, argp->vlen,
&resp->count);
return nfserr;
......@@ -175,7 +173,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
argp->data,
argp->vec, argp->vlen,
argp->len,
&stable);
return nfserr;
......@@ -478,7 +476,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
argp->count, argp->cookie);
/* Reserve buffer space for status */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1);
svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1);
/* Shrink to the client read size */
if (count > (argp->count >> 2))
......
......@@ -277,7 +277,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
/* Decode arguments */
xdr = proc->pc_decode;
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) {
if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
rqstp->rq_argp)) {
dprintk("nfsd: failed to decode arguments!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
*statp = rpc_garbage_args;
......@@ -293,14 +294,15 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
}
if (rqstp->rq_proc != 0)
svc_putu32(&rqstp->rq_resbuf, nfserr);
svc_putu32(&rqstp->rq_res.head[0], nfserr);
/* Encode result.
* For NFSv2, additional info is never returned in case of an error.
*/
if (!(nfserr && rqstp->rq_vers == 2)) {
xdr = proc->pc_encode;
if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
if (xdr && !xdr(rqstp, (u32*)(rqstp->rq_res.head[0].iov_base+rqstp->rq_res.head[0].iov_len),
rqstp->rq_resp)) {
/* Failed to encode result. Release cache entry */
dprintk("nfsd: failed to encode result!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
......
......@@ -14,6 +14,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h>
#include <linux/mm.h>
#define NFSDDBG_FACILITY NFSDDBG_XDR
......@@ -176,27 +177,6 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
return p;
}
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
dprintk("nfsd: ressize_check p %p base %p len %d\n",
p, buf->base, buf->buflen);
return (buf->len <= buf->buflen);
}
/*
* XDR decode functions
......@@ -241,13 +221,31 @@ int
nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_readargs *args)
{
int len;
int v,pn;
if (!(p = decode_fh(p, &args->fh)))
return 0;
args->offset = ntohl(*p++);
args->count = ntohl(*p++);
args->totalsize = ntohl(*p++);
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
pn=rqstp->rq_resused;
take_page(rqstp);
args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
args->vlen = v;
return xdr_argsize_check(rqstp, p);
}
......@@ -255,17 +253,30 @@ int
nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_writeargs *args)
{
int len;
int v;
if (!(p = decode_fh(p, &args->fh)))
return 0;
p++; /* beginoffset */
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
args->len = ntohl(*p++);
args->data = (char *) p;
p += XDR_QUADLEN(args->len);
return xdr_argsize_check(rqstp, p);
len = args->len = ntohl(*p++);
args->vec[0].iov_base = (void*)p;
args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
v = 0;
while (len > args->vec[v].iov_len) {
len -= args->vec[v].iov_len;
v++;
args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
args->vec[v].iov_len = PAGE_SIZE;
}
args->vec[v].iov_len = len;
args->vlen = v+1;
return args->vec[0].iov_len > 0;
}
int
......@@ -371,9 +382,22 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
{
p = encode_fattr(rqstp, p, &resp->fh);
*p++ = htonl(resp->count);
p += XDR_QUADLEN(resp->count);
return xdr_ressize_check(rqstp, p);
xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data aswell */
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad with tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
}
rqstp->rq_res.len =
rqstp->rq_res.head[0].iov_len+
rqstp->rq_res.page_len+
rqstp->rq_res.tail[0].iov_len;
return 1;
}
int
......
......@@ -577,7 +577,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
*/
int
nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
char *buf, unsigned long *count)
struct iovec *vec, int vlen, unsigned long *count)
{
struct raparms *ra;
mm_segment_t oldfs;
......@@ -603,7 +603,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
oldfs = get_fs();
set_fs(KERNEL_DS);
err = vfs_read(&file, buf, *count, &offset);
err = vfs_readv(&file, vec, vlen, *count, &offset);
set_fs(oldfs);
/* Write back readahead params */
......@@ -629,7 +629,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
*/
int
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
char *buf, unsigned long cnt, int *stablep)
struct iovec *vec, int vlen,
unsigned long cnt, int *stablep)
{
struct svc_export *exp;
struct file file;
......@@ -677,7 +678,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
err = vfs_write(&file, buf, cnt, &offset);
err = vfs_writev(&file, vec, vlen, cnt, &offset);
if (err >= 0)
nfsdstats.io_write += cnt;
set_fs(oldfs);
......
......@@ -207,6 +207,53 @@ ssize_t vfs_read(struct file *file, char *buf, size_t count, loff_t *pos)
return ret;
}
ssize_t vfs_readv(struct file *file, struct iovec *vec, int vlen, size_t count, loff_t *pos)
{
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count);
if (!ret) {
ret = security_ops->file_permission (file, MAY_READ);
if (!ret) {
if (file->f_op->readv)
ret = file->f_op->readv(file, vec, vlen, pos);
else {
/* do it by hand */
struct iovec *vector = vec;
ret = 0;
while (vlen > 0) {
void * base = vector->iov_base;
size_t len = vector->iov_len;
ssize_t nr;
vector++;
vlen--;
if (file->f_op->read)
nr = file->f_op->read(file, base, len, pos);
else
nr = do_sync_read(file, base, len, pos);
if (nr < 0) {
if (!ret) ret = nr;
break;
}
ret += nr;
if (nr != len)
break;
}
}
if (ret > 0)
dnotify_parent(file->f_dentry, DN_ACCESS);
}
}
return ret;
}
ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos)
{
struct kiocb kiocb;
......@@ -247,6 +294,53 @@ ssize_t vfs_write(struct file *file, const char *buf, size_t count, loff_t *pos)
return ret;
}
ssize_t vfs_writev(struct file *file, const struct iovec *vec, int vlen, size_t count, loff_t *pos)
{
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count);
if (!ret) {
ret = security_ops->file_permission (file, MAY_WRITE);
if (!ret) {
if (file->f_op->writev)
ret = file->f_op->writev(file, vec, vlen, pos);
else {
/* do it by hand */
const struct iovec *vector = vec;
ret = 0;
while (vlen > 0) {
void * base = vector->iov_base;
size_t len = vector->iov_len;
ssize_t nr;
vector++;
vlen--;
if (file->f_op->write)
nr = file->f_op->write(file, base, len, pos);
else
nr = do_sync_write(file, base, len, pos);
if (nr < 0) {
if (!ret) ret = nr;
break;
}
ret += nr;
if (nr != len)
break;
}
}
if (ret > 0)
dnotify_parent(file->f_dentry, DN_MODIFY);
}
}
return ret;
}
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count)
{
struct file *file;
......
......@@ -795,6 +795,8 @@ struct seq_file;
extern ssize_t vfs_read(struct file *, char *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, struct iovec *, int, size_t, loff_t *);
extern ssize_t vfs_writev(struct file *, const struct iovec *, int, size_t, loff_t *);
/*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
......
......@@ -32,12 +32,12 @@ struct svc_cacherep {
u32 c_vers;
unsigned long c_timestamp;
union {
struct svc_buf u_buffer;
struct iovec u_vec;
u32 u_status;
} c_u;
};
#define c_replbuf c_u.u_buffer
#define c_replvec c_u.u_vec
#define c_replstat c_u.u_status
/* cache entry states */
......
......@@ -88,9 +88,9 @@ int nfsd_open(struct svc_rqst *, struct svc_fh *, int,
int, struct file *);
void nfsd_close(struct file *);
int nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, char *, unsigned long *);
loff_t, struct iovec *,int, unsigned long *);
int nfsd_write(struct svc_rqst *, struct svc_fh *,
loff_t, char *, unsigned long, int *);
loff_t, struct iovec *,int, unsigned long, int *);
int nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
int nfsd_symlink(struct svc_rqst *, struct svc_fh *,
......
......@@ -29,16 +29,16 @@ struct nfsd_readargs {
struct svc_fh fh;
__u32 offset;
__u32 count;
__u32 totalsize;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
};
struct nfsd_writeargs {
svc_fh fh;
__u32 beginoffset;
__u32 offset;
__u32 totalcount;
__u8 * data;
int len;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
};
struct nfsd_createargs {
......
......@@ -33,6 +33,8 @@ struct nfsd3_readargs {
struct svc_fh fh;
__u64 offset;
__u32 count;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
};
struct nfsd3_writeargs {
......@@ -40,8 +42,9 @@ struct nfsd3_writeargs {
__u64 offset;
__u32 count;
int stable;
__u8 * data;
int len;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
};
struct nfsd3_createargs {
......
......@@ -48,43 +48,49 @@ struct svc_serv {
* This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation.
*/
#define RPCSVC_MAXPAYLOAD 16384u
#define RPCSVC_MAXPAYLOAD (64*1024u)
/*
* Buffer to store RPC requests or replies in.
* Each server thread has one of these beasts.
* RPC Requsts and replies are stored in one or more pages.
* We maintain an array of pages for each server thread.
* Requests are copied into these pages as they arrive. Remaining
* pages are available to write the reply into.
*
* Area points to the allocated memory chunk currently owned by the
* buffer. Base points to the buffer containing the request, which is
* different from area when directly reading from an sk_buff. buf is
* the current read/write position while processing an RPC request.
* Currently pages are all re-used by the same server. Later we
* will use ->sendpage to transmit pages with reduced copying. In
* that case we will need to give away the page and allocate new ones.
* In preparation for this, we explicitly move pages off the recv
* list onto the transmit list, and back.
*
* The array of iovecs can hold additional data that the server process
* may not want to copy into the RPC reply buffer, but pass to the
* network sendmsg routines directly. The prime candidate for this
* will of course be NFS READ operations, but one might also want to
* do something about READLINK and READDIR. It might be worthwhile
* to implement some generic readdir cache in the VFS layer...
* We use xdr_buf for holding responses as it fits well with NFS
* read responses (that have a header, and some data pages, and possibly
* a tail) and means we can share some client side routines.
*
* On the receiving end of the RPC server, the iovec may be used to hold
* the list of IP fragments once we get to process fragmented UDP
* datagrams directly.
* The xdr_buf.head iovec always points to the first page in the rq_*pages
* list. The xdr_buf.pages pointer points to the second page on that
* list. xdr_buf.tail points to the end of the first page.
* This assumes that the non-page part of an rpc reply will fit
* in a page - NFSd ensures this. lockd also has no trouble.
*/
#define RPCSVC_MAXIOV ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
struct svc_buf {
u32 * area; /* allocated memory */
u32 * base; /* base of RPC datagram */
int buflen; /* total length of buffer */
u32 * buf; /* read/write pointer */
int len; /* current end of buffer */
/* iovec for zero-copy NFS READs */
struct iovec iov[RPCSVC_MAXIOV];
int nriov;
};
#define svc_getu32(argp, val) { (val) = *(argp)->buf++; (argp)->len--; }
#define svc_putu32(resp, val) { *(resp)->buf++ = (val); (resp)->len++; }
#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
static inline u32 svc_getu32(struct iovec *iov)
{
u32 val, *vp;
vp = iov->iov_base;
val = *vp++;
iov->iov_base = (void*)vp;
iov->iov_len -= sizeof(u32);
return val;
}
static inline void svc_putu32(struct iovec *iov, u32 val)
{
u32 *vp = iov->iov_base + iov->iov_len;
*vp = val;
iov->iov_len += sizeof(u32);
}
/*
* The context of a single thread, including the request currently being
* processed.
......@@ -102,9 +108,15 @@ struct svc_rqst {
struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
struct svc_buf rq_defbuf; /* default buffer */
struct svc_buf rq_argbuf; /* argument buffer */
struct svc_buf rq_resbuf; /* result buffer */
struct xdr_buf rq_arg;
struct xdr_buf rq_res;
struct page * rq_argpages[RPCSVC_MAXPAGES];
struct page * rq_respages[RPCSVC_MAXPAGES];
short rq_argused; /* pages used for argument */
short rq_arghi; /* pages available in argument page list */
short rq_resused; /* pages used for result */
u32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
u32 rq_vers; /* program version */
......@@ -136,6 +148,38 @@ struct svc_rqst {
wait_queue_head_t rq_wait; /* synchronization */
};
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
char *cp = (char *)p;
struct iovec *vec = &rqstp->rq_arg.head[0];
return cp - (char*)vec->iov_base <= vec->iov_len;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct iovec *vec = &rqstp->rq_res.head[0];
char *cp = (char*)p;
vec->iov_len = cp - (char*)vec->iov_base;
rqstp->rq_res.len = vec->iov_len;
return vec->iov_len <= PAGE_SIZE;
}
static int inline take_page(struct svc_rqst *rqstp)
{
if (rqstp->rq_arghi <= rqstp->rq_argused)
return -ENOMEM;
rqstp->rq_respages[rqstp->rq_resused++] =
rqstp->rq_argpages[--rqstp->rq_arghi];
return 0;
}
struct svc_deferred_req {
struct svc_serv *serv;
u32 prot; /* protocol (UDP or TCP) */
......
......@@ -253,7 +253,9 @@ EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(get_unused_fd);
EXPORT_SYMBOL(vfs_read);
EXPORT_SYMBOL(vfs_readv);
EXPORT_SYMBOL(vfs_write);
EXPORT_SYMBOL(vfs_writev);
EXPORT_SYMBOL(vfs_create);
EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod);
......
......@@ -13,6 +13,7 @@
#include <linux/net.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <linux/mm.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
......@@ -35,7 +36,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
memset(serv, 0, sizeof(*serv));
serv->sv_program = prog;
serv->sv_nrthreads = 1;
......@@ -105,35 +105,42 @@ svc_destroy(struct svc_serv *serv)
}
/*
* Allocate an RPC server buffer
* Later versions may do nifty things by allocating multiple pages
* of memory directly and putting them into the bufp->iov.
* Allocate an RPC server's buffer space.
* We allocate pages and place them in rq_argpages.
*/
int
svc_init_buffer(struct svc_buf *bufp, unsigned int size)
static int
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
{
if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL)))
return 0;
bufp->base = bufp->area;
bufp->buf = bufp->area;
bufp->len = 0;
bufp->buflen = size >> 2;
bufp->iov[0].iov_base = bufp->area;
bufp->iov[0].iov_len = size;
bufp->nriov = 1;
return 1;
int pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
int arghi;
rqstp->rq_argused = 0;
rqstp->rq_resused = 0;
arghi = 0;
if (pages > RPCSVC_MAXPAGES)
BUG();
while (pages) {
struct page *p = alloc_page(GFP_KERNEL);
if (!p)
break;
rqstp->rq_argpages[arghi++] = p;
pages--;
}
rqstp->rq_arghi = arghi;
return ! pages;
}
/*
* Release an RPC server buffer
*/
void
svc_release_buffer(struct svc_buf *bufp)
static void
svc_release_buffer(struct svc_rqst *rqstp)
{
kfree(bufp->area);
bufp->area = 0;
while (rqstp->rq_arghi)
put_page(rqstp->rq_argpages[--rqstp->rq_arghi]);
while (rqstp->rq_resused)
put_page(rqstp->rq_respages[--rqstp->rq_resused]);
rqstp->rq_argused = 0;
}
/*
......@@ -154,7 +161,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz))
|| !svc_init_buffer(rqstp, serv->sv_bufsz))
goto out_thread;
serv->sv_nrthreads++;
......@@ -180,7 +187,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
svc_release_buffer(&rqstp->rq_defbuf);
svc_release_buffer(rqstp);
if (rqstp->rq_resp)
kfree(rqstp->rq_resp);
if (rqstp->rq_argp)
......@@ -242,37 +249,51 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
struct svc_program *progp;
struct svc_version *versp = NULL; /* compiler food */
struct svc_procedure *procp = NULL;
struct svc_buf * argp = &rqstp->rq_argbuf;
struct svc_buf * resp = &rqstp->rq_resbuf;
struct iovec * argv = &rqstp->rq_arg.head[0];
struct iovec * resv = &rqstp->rq_res.head[0];
kxdrproc_t xdr;
u32 *bufp, *statp;
u32 *statp;
u32 dir, prog, vers, proc,
auth_stat, rpc_stat;
rpc_stat = rpc_success;
bufp = argp->buf;
if (argp->len < 5)
if (argv->iov_len < 6*4)
goto err_short_len;
dir = ntohl(*bufp++);
vers = ntohl(*bufp++);
/* setup response xdr_buf.
* Initially it has just one page
*/
take_page(rqstp); /* must succeed */
resv->iov_base = page_address(rqstp->rq_respages[0]);
resv->iov_len = 0;
rqstp->rq_res.pages = rqstp->rq_respages+1;
rqstp->rq_res.len = 0;
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = 0;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putu32(resv, 0);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
dir = ntohl(svc_getu32(argv));
vers = ntohl(svc_getu32(argv));
/* First words of reply: */
svc_putu32(resp, xdr_one); /* REPLY */
svc_putu32(resp, xdr_zero); /* ACCEPT */
svc_putu32(resv, xdr_one); /* REPLY */
if (dir != 0) /* direction != CALL */
goto err_bad_dir;
if (vers != 2) /* RPC version number */
goto err_bad_rpc;
rqstp->rq_prog = prog = ntohl(*bufp++); /* program number */
rqstp->rq_vers = vers = ntohl(*bufp++); /* version number */
rqstp->rq_proc = proc = ntohl(*bufp++); /* procedure number */
svc_putu32(resv, xdr_zero); /* ACCEPT */
argp->buf += 5;
argp->len -= 5;
rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */
rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
/*
* Decode auth data, and add verifier to reply buffer.
......@@ -307,8 +328,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
serv->sv_stats->rpccnt++;
/* Build the reply header. */
statp = resp->buf;
svc_putu32(resp, rpc_success); /* RPC_SUCCESS */
statp = resv->iov_base +resv->iov_len;
svc_putu32(resv, rpc_success); /* RPC_SUCCESS */
/* Bump per-procedure stats counter */
procp->pc_count++;
......@@ -327,14 +348,14 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
if (!versp->vs_dispatch) {
/* Decode arguments */
xdr = procp->pc_decode;
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp))
if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
goto err_garbage;
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
/* Encode reply */
if (*statp == rpc_success && (xdr = procp->pc_encode)
&& !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
&& !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
dprintk("svc: failed to encode reply\n");
/* serv->sv_stats->rpcsystemerr++; */
*statp = rpc_system_err;
......@@ -347,7 +368,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* Check RPC status result */
if (*statp != rpc_success)
resp->len = statp + 1 - resp->base;
resv->iov_len = ((void*)statp) - resv->iov_base + 4;
/* Release reply info */
if (procp->pc_release)
......@@ -369,7 +390,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
err_short_len:
#ifdef RPC_PARANOIA
printk("svc: short len %d, dropping request\n", argp->len);
printk("svc: short len %d, dropping request\n", argv->iov_len);
#endif
goto dropit; /* drop request */
......@@ -382,18 +403,19 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
resp->buf[-1] = xdr_one; /* REJECT */
svc_putu32(resp, xdr_zero); /* RPC_MISMATCH */
svc_putu32(resp, xdr_two); /* Only RPCv2 supported */
svc_putu32(resp, xdr_two);
svc_putu32(resv, xdr_one); /* REJECT */
svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */
svc_putu32(resv, xdr_two); /* Only RPCv2 supported */
svc_putu32(resv, xdr_two);
goto sendit;
err_bad_auth:
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
serv->sv_stats->rpcbadauth++;
resp->buf[-1] = xdr_one; /* REJECT */
svc_putu32(resp, xdr_one); /* AUTH_ERROR */
svc_putu32(resp, auth_stat); /* status */
resv->iov_len -= 4;
svc_putu32(resv, xdr_one); /* REJECT */
svc_putu32(resv, xdr_one); /* AUTH_ERROR */
svc_putu32(resv, auth_stat); /* status */
goto sendit;
err_bad_prog:
......@@ -403,7 +425,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* else it is just a Solaris client seeing if ACLs are supported */
#endif
serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_prog_unavail);
svc_putu32(resv, rpc_prog_unavail);
goto sendit;
err_bad_vers:
......@@ -411,9 +433,9 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: unknown version (%d)\n", vers);
#endif
serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_prog_mismatch);
svc_putu32(resp, htonl(progp->pg_lovers));
svc_putu32(resp, htonl(progp->pg_hivers));
svc_putu32(resv, rpc_prog_mismatch);
svc_putu32(resv, htonl(progp->pg_lovers));
svc_putu32(resv, htonl(progp->pg_hivers));
goto sendit;
err_bad_proc:
......@@ -421,7 +443,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: unknown procedure (%d)\n", proc);
#endif
serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_proc_unavail);
svc_putu32(resv, rpc_proc_unavail);
goto sendit;
err_garbage:
......@@ -429,6 +451,6 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: failed to decode args\n");
#endif
serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_garbage_args);
svc_putu32(resv, rpc_garbage_args);
goto sendit;
}
......@@ -40,8 +40,7 @@ svc_authenticate(struct svc_rqst *rqstp, u32 *statp, u32 *authp, int proc)
*statp = rpc_success;
*authp = rpc_auth_ok;
svc_getu32(&rqstp->rq_argbuf, flavor);
flavor = ntohl(flavor);
flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
dprintk("svc: svc_authenticate (%d)\n", flavor);
if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])) {
......
......@@ -295,20 +295,20 @@ void svcauth_unix_purge(void)
static int
svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{
struct svc_buf *argp = &rqstp->rq_argbuf;
struct svc_buf *resp = &rqstp->rq_resbuf;
struct iovec *argv = &rqstp->rq_arg.head[0];
struct iovec *resv = &rqstp->rq_res.head[0];
int rv=0;
struct ip_map key, *ipm;
if ((argp->len -= 3) < 0) {
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
}
if (*(argp->buf)++ != 0) { /* we already skipped the flavor */
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
if (*(argp->buf)++ != RPC_AUTH_NULL || *(argp->buf)++ != 0) {
if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return SVC_DENIED;
......@@ -320,8 +320,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
rqstp->rq_cred.cr_groups[0] = NOGROUP;
/* Put NULL verifier */
svc_putu32(resp, RPC_AUTH_NULL);
svc_putu32(resp, 0);
svc_putu32(resv, RPC_AUTH_NULL);
svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr;
......@@ -376,55 +376,54 @@ struct auth_ops svcauth_null = {
int
svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{
struct svc_buf *argp = &rqstp->rq_argbuf;
struct svc_buf *resp = &rqstp->rq_resbuf;
struct iovec *argv = &rqstp->rq_arg.head[0];
struct iovec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
u32 *bufp = argp->buf, slen, i;
int len = argp->len;
u32 slen, i;
int len = argv->iov_len;
int rv=0;
struct ip_map key, *ipm;
if ((len -= 3) < 0)
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
bufp++; /* length */
bufp++; /* time stamp */
slen = XDR_QUADLEN(ntohl(*bufp++)); /* machname length */
if (slen > 64 || (len -= slen + 3) < 0)
svc_getu32(argv); /* length */
svc_getu32(argv); /* time stamp */
slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */
if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
bufp += slen; /* skip machname */
cred->cr_uid = ntohl(*bufp++); /* uid */
cred->cr_gid = ntohl(*bufp++); /* gid */
argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
slen = ntohl(*bufp++); /* gids length */
if (slen > 16 || (len -= slen + 2) < 0)
cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */
cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */
slen = ntohl(svc_getu32(argv)); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
for (i = 0; i < NGROUPS && i < slen; i++)
cred->cr_groups[i] = ntohl(*bufp++);
for (i = 0; i < slen; i++)
if (i < NGROUPS)
cred->cr_groups[i] = ntohl(svc_getu32(argv));
else
svc_getu32(argv);
if (i < NGROUPS)
cred->cr_groups[i] = NOGROUP;
bufp += (slen - i);
if (*bufp++ != RPC_AUTH_NULL || *bufp++ != 0) {
if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
argp->buf = bufp;
argp->len = len;
/* Put NULL verifier */
svc_putu32(resp, RPC_AUTH_NULL);
svc_putu32(resp, 0);
svc_putu32(resv, RPC_AUTH_NULL);
svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr;
ipm = ip_map_lookup(&key, 0);
rqstp->rq_client = NULL;
if (ipm)
switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
case -EAGAIN:
......
......@@ -234,7 +234,7 @@ svc_sock_received(struct svc_sock *svsk)
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
space += rqstp->rq_resbuf.len<<2;
space += rqstp->rq_res.head[0].iov_len;
if (space < rqstp->rq_reserved) {
struct svc_sock *svsk = rqstp->rq_sock;
......@@ -278,13 +278,12 @@ svc_sock_release(struct svc_rqst *rqstp)
* But first, check that enough space was reserved
* for the reply, otherwise we have a bug!
*/
if ((rqstp->rq_resbuf.len<<2) > rqstp->rq_reserved)
if ((rqstp->rq_res.len) > rqstp->rq_reserved)
printk(KERN_ERR "RPC request reserved %d but used %d\n",
rqstp->rq_reserved,
rqstp->rq_resbuf.len<<2);
rqstp->rq_res.len);
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base;
rqstp->rq_resbuf.len = 0;
rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0);
rqstp->rq_sock = NULL;
......@@ -348,8 +347,9 @@ svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr)
len = sock_sendmsg(sock, &msg, buflen);
set_fs(oldfs);
dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len);
dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d (addr %x)\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len,
rqstp->rq_addr.sin_addr.s_addr);
return len;
}
......@@ -480,13 +480,15 @@ svc_write_space(struct sock *sk)
/*
* Receive a datagram from a UDP socket.
*/
extern int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
static int
svc_udp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
u32 *data;
int err, len;
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
......@@ -512,33 +514,19 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
}
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
/* Sorry. */
if (skb_is_nonlinear(skb)) {
if (skb_linearize(skb, GFP_KERNEL) != 0) {
kfree_skb(skb);
svc_sock_received(svsk);
return 0;
}
}
len = skb->len - sizeof(struct udphdr);
if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
skb_free_datagram(svsk->sk_sk, skb);
svc_sock_received(svsk);
return 0;
}
if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
/* checksum error */
skb_free_datagram(svsk->sk_sk, skb);
svc_sock_received(svsk);
return 0;
}
len = skb->len - sizeof(struct udphdr);
data = (u32 *) (skb->data + sizeof(struct udphdr));
rqstp->rq_skbuff = skb;
rqstp->rq_argbuf.base = data;
rqstp->rq_argbuf.buf = data;
rqstp->rq_argbuf.len = (len >> 2);
rqstp->rq_argbuf.buflen = (len >> 2);
/* rqstp->rq_resbuf = rqstp->rq_defbuf; */
rqstp->rq_arg.len = len;
rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
rqstp->rq_prot = IPPROTO_UDP;
/* Get sender address */
......@@ -546,6 +534,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_addr.sin_port = skb->h.uh->source;
rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
skb_free_datagram(svsk->sk_sk, skb);
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
......@@ -559,21 +549,36 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
static int
svc_udp_sendto(struct svc_rqst *rqstp)
{
struct svc_buf *bufp = &rqstp->rq_resbuf;
int error;
struct iovec vec[RPCSVC_MAXPAGES];
int v;
int base, len;
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
/* bufp->base = bufp->area; */
bufp->iov[0].iov_base = bufp->base;
bufp->iov[0].iov_len = bufp->len << 2;
error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
vec[0] = rqstp->rq_res.head[0];
v=1;
base=rqstp->rq_res.page_base;
len = rqstp->rq_res.page_len;
while (len) {
vec[v].iov_base = page_address(rqstp->rq_res.pages[v-1]) + base;
vec[v].iov_len = PAGE_SIZE-base;
if (len <= vec[v].iov_len)
vec[v].iov_len = len;
len -= vec[v].iov_len;
base = 0;
v++;
}
if (rqstp->rq_res.tail[0].iov_len) {
vec[v] = rqstp->rq_res.tail[0];
v++;
}
error = svc_sendto(rqstp, vec, v);
if (error == -ECONNREFUSED)
/* ICMP error on earlier request. */
error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
error = svc_sendto(rqstp, vec, v);
return error;
}
......@@ -785,8 +790,9 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct svc_buf *bufp = &rqstp->rq_argbuf;
int len;
struct iovec vec[RPCSVC_MAXPAGES];
int pnum, vlen;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(SK_DATA, &svsk->sk_flags),
......@@ -851,7 +857,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
}
svsk->sk_reclen &= 0x7fffffff;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
if (svsk->sk_reclen > (bufp->buflen<<2)) {
if (svsk->sk_reclen > serv->sv_bufsz) {
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
(unsigned long) svsk->sk_reclen);
goto err_delete;
......@@ -869,30 +875,35 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
svc_sock_received(svsk);
return -EAGAIN; /* record not complete */
}
len = svsk->sk_reclen;
set_bit(SK_DATA, &svsk->sk_flags);
/* Frob argbuf */
bufp->iov[0].iov_base += 4;
bufp->iov[0].iov_len -= 4;
vec[0] = rqstp->rq_arg.head[0];
vlen = PAGE_SIZE;
pnum = 1;
while (vlen < len) {
vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
vec[pnum].iov_len = PAGE_SIZE;
pnum++;
vlen += PAGE_SIZE;
}
/* Now receive data */
len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen);
len = svc_recvfrom(rqstp, vec, pnum, len);
if (len < 0)
goto error;
dprintk("svc: TCP complete record (%d bytes)\n", len);
/* Position reply write pointer immediately after args,
* allowing for record length */
rqstp->rq_resbuf.base = rqstp->rq_argbuf.base + 1 + (len>>2);
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base + 1;
rqstp->rq_resbuf.len = 1;
rqstp->rq_resbuf.buflen= rqstp->rq_argbuf.buflen - (len>>2) - 1;
rqstp->rq_arg.len = len;
rqstp->rq_arg.page_base = 0;
if (len <= rqstp->rq_arg.head[0].iov_len) {
rqstp->rq_arg.head[0].iov_len = len;
rqstp->rq_arg.page_len = 0;
} else {
rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
}
rqstp->rq_skbuff = 0;
rqstp->rq_argbuf.buf += 1;
rqstp->rq_argbuf.len = (len >> 2);
rqstp->rq_argbuf.buflen = (len >> 2) +1;
rqstp->rq_prot = IPPROTO_TCP;
/* Reset TCP read info */
......@@ -928,23 +939,44 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
static int
svc_tcp_sendto(struct svc_rqst *rqstp)
{
struct svc_buf *bufp = &rqstp->rq_resbuf;
struct xdr_buf *xbufp = &rqstp->rq_res;
struct iovec vec[RPCSVC_MAXPAGES];
int v;
int base, len;
int sent;
u32 reclen;
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
bufp->iov[0].iov_base = bufp->base;
bufp->iov[0].iov_len = bufp->len << 2;
bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
reclen = htonl(0x80000000|((xbufp->len ) - 4));
memcpy(xbufp->head[0].iov_base, &reclen, 4);
vec[0] = rqstp->rq_res.head[0];
v=1;
base= xbufp->page_base;
len = xbufp->page_len;
while (len) {
vec[v].iov_base = page_address(xbufp->pages[v-1]) + base;
vec[v].iov_len = PAGE_SIZE-base;
if (len <= vec[v].iov_len)
vec[v].iov_len = len;
len -= vec[v].iov_len;
base = 0;
v++;
}
if (xbufp->tail[0].iov_len) {
vec[v] = xbufp->tail[0];
v++;
}
sent = svc_sendto(rqstp, bufp->iov, bufp->nriov);
if (sent != bufp->len<<2) {
sent = svc_sendto(rqstp, vec, v);
if (sent != xbufp->len) {
printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only",
sent, bufp->len << 2);
sent, xbufp->len);
svc_delete_socket(rqstp->rq_sock);
sent = -EAGAIN;
}
......@@ -1016,6 +1048,8 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
{
struct svc_sock *svsk =NULL;
int len;
int pages;
struct xdr_buf *arg;
DECLARE_WAITQUEUE(wait, current);
dprintk("svc: server %p waiting for data (to = %ld)\n",
......@@ -1031,9 +1065,35 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
rqstp);
/* Initialize the buffers */
rqstp->rq_argbuf = rqstp->rq_defbuf;
rqstp->rq_resbuf = rqstp->rq_defbuf;
/* first reclaim pages that were moved to response list */
while (rqstp->rq_resused)
rqstp->rq_argpages[rqstp->rq_arghi++] =
rqstp->rq_respages[--rqstp->rq_resused];
/* now allocate needed pages. If we get a failure, sleep briefly */
pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
while (rqstp->rq_arghi < pages) {
struct page *p = alloc_page(GFP_KERNEL);
if (!p) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/2);
current->state = TASK_RUNNING;
continue;
}
rqstp->rq_argpages[rqstp->rq_arghi++] = p;
}
/* Make arg->head point to first page and arg->pages point to rest */
arg = &rqstp->rq_arg;
arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
arg->head[0].iov_len = PAGE_SIZE;
rqstp->rq_argused = 1;
arg->pages = rqstp->rq_argpages + 1;
arg->page_base = 0;
/* save at least one page for response */
arg->page_len = (pages-2)*PAGE_SIZE;
arg->len = (pages-1)*PAGE_SIZE;
arg->tail[0].iov_len = 0;
if (signalled())
return -EINTR;
......@@ -1109,12 +1169,6 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
rqstp->rq_userset = 0;
rqstp->rq_chandle.defer = svc_defer;
svc_getu32(&rqstp->rq_argbuf, rqstp->rq_xid);
svc_putu32(&rqstp->rq_resbuf, rqstp->rq_xid);
/* Assume that the reply consists of a single buffer. */
rqstp->rq_resbuf.nriov = 1;
if (serv->sv_stats)
serv->sv_stats->netcnt++;
return len;
......@@ -1354,23 +1408,25 @@ static struct cache_deferred_req *
svc_defer(struct cache_req *req)
{
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
int size = sizeof(struct svc_deferred_req) + (rqstp->rq_argbuf.buflen << 2);
int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.head[0].iov_len);
struct svc_deferred_req *dr;
if (rqstp->rq_arg.page_len)
return NULL; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred;
rqstp->rq_deferred = NULL;
} else {
/* FIXME maybe discard if size too large */
dr = kmalloc(size<<2, GFP_KERNEL);
dr = kmalloc(size, GFP_KERNEL);
if (dr == NULL)
return NULL;
dr->serv = rqstp->rq_server;
dr->prot = rqstp->rq_prot;
dr->addr = rqstp->rq_addr;
dr->argslen = rqstp->rq_argbuf.buflen;
memcpy(dr->args, rqstp->rq_argbuf.base, dr->argslen<<2);
dr->argslen = rqstp->rq_arg.head[0].iov_len >> 2;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base, dr->argslen<<2);
}
spin_lock(&rqstp->rq_server->sv_lock);
rqstp->rq_sock->sk_inuse++;
......@@ -1388,10 +1444,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
{
struct svc_deferred_req *dr = rqstp->rq_deferred;
rqstp->rq_argbuf.base = dr->args;
rqstp->rq_argbuf.buf = dr->args;
rqstp->rq_argbuf.len = dr->argslen;
rqstp->rq_argbuf.buflen = dr->argslen;
rqstp->rq_arg.head[0].iov_base = dr->args;
rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
rqstp->rq_arg.page_len = 0;
rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_prot = dr->prot;
rqstp->rq_addr = dr->addr;
return dr->argslen<<2;
......
......@@ -655,7 +655,7 @@ skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
* We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM
*/
static int
int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
skb_reader_t desc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment