Commit ec186a65 authored by Trond Myklebust's avatar Trond Myklebust

UDP and TCP zero copy code for the NFS client. The main interest

of this patch is to eliminate the use of xdr_kmap() and xdr_unmap()
by replacing them with MSG_MORE. xdr_kmap() is deadlock-prone
due to the fact that it has to kmap() several pages at the same time.
parent 15b425e3
...@@ -157,6 +157,11 @@ typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); ...@@ -157,6 +157,11 @@ typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
skb_reader_t *, skb_read_actor_t); skb_reader_t *, skb_read_actor_t);
struct socket;
struct sockaddr;
extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
struct xdr_buf *, unsigned int, int);
/* /*
* Provide some simple tools for XDR buffer overflow-checking etc. * Provide some simple tools for XDR buffer overflow-checking etc.
*/ */
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/net.h>
#include <net/sock.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/msg_prot.h>
...@@ -314,8 +316,113 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, ...@@ -314,8 +316,113 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
} while ((pglen -= len) != 0); } while ((pglen -= len) != 0);
copy_tail: copy_tail:
len = xdr->tail[0].iov_len; len = xdr->tail[0].iov_len;
if (len) if (base < len)
copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len); copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
}
int
xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
struct xdr_buf *xdr, unsigned int base, int msgflags)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
int err, ret = 0;
ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
mm_segment_t oldfs;
len = xdr->head[0].iov_len;
if (base < len || (addr != NULL && base == 0)) {
struct iovec iov = {
.iov_base = xdr->head[0].iov_base + base,
.iov_len = len - base,
};
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
.msg_flags = msgflags,
};
if (iov.iov_len != 0) {
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
}
if (xdr->len > len)
msg.msg_flags |= MSG_MORE;
oldfs = get_fs(); set_fs(get_ds());
err = sock_sendmsg(sock, &msg, iov.iov_len);
set_fs(oldfs);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
if (err != iov.iov_len)
goto out;
base = 0;
} else
base -= len;
if (pglen == 0)
goto copy_tail;
if (base >= pglen) {
base -= pglen;
goto copy_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
base &= ~PAGE_CACHE_MASK;
}
sendpage = sock->ops->sendpage ? : sock_no_sendpage;
do {
int flags = msgflags;
len = PAGE_CACHE_SIZE;
if (base)
len -= base;
if (pglen < len)
len = pglen;
if (pglen != len || xdr->tail[0].iov_len != 0)
flags |= MSG_MORE;
/* Hmm... We might be dealing with highmem pages */
if (PageHighMem(*ppage))
sendpage = sock_no_sendpage;
err = sendpage(sock, *ppage, base, len, flags);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
if (err != len)
goto out;
base = 0;
ppage++;
} while ((pglen -= len) != 0);
copy_tail:
len = xdr->tail[0].iov_len;
if (base < len) {
struct iovec iov = {
.iov_base = xdr->tail[0].iov_base + base,
.iov_len = len - base,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_flags = msgflags,
};
oldfs = get_fs(); set_fs(get_ds());
err = sock_sendmsg(sock, &msg, iov.iov_len);
set_fs(oldfs);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
}
out:
return ret;
} }
......
...@@ -213,11 +213,10 @@ static inline int ...@@ -213,11 +213,10 @@ static inline int
xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
{ {
struct socket *sock = xprt->sock; struct socket *sock = xprt->sock;
struct msghdr msg;
struct xdr_buf *xdr = &req->rq_snd_buf; struct xdr_buf *xdr = &req->rq_snd_buf;
struct iovec niv[MAX_IOVEC]; struct sockaddr *addr = NULL;
unsigned int niov, slen, skip; int addrlen = 0;
mm_segment_t oldfs; unsigned int skip;
int result; int result;
if (!sock) if (!sock)
...@@ -227,27 +226,18 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) ...@@ -227,27 +226,18 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
req->rq_svec->iov_base, req->rq_svec->iov_base,
req->rq_svec->iov_len); req->rq_svec->iov_len);
/* For UDP, we need to provide an address */
if (!xprt->stream) {
addr = (struct sockaddr *) &xprt->addr;
addrlen = sizeof(xprt->addr);
}
/* Dont repeat bytes */ /* Dont repeat bytes */
skip = req->rq_bytes_sent; skip = req->rq_bytes_sent;
slen = xdr->len - skip;
niov = xdr_kmap(niv, xdr, skip);
msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL;
msg.msg_iov = niv;
msg.msg_iovlen = niov;
msg.msg_name = (struct sockaddr *) &xprt->addr;
msg.msg_namelen = sizeof(xprt->addr);
msg.msg_control = NULL;
msg.msg_controllen = 0;
oldfs = get_fs(); set_fs(get_ds());
clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
result = sock_sendmsg(sock, &msg, slen);
set_fs(oldfs);
xdr_kunmap(xdr, skip); clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result); dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
if (result >= 0) if (result >= 0)
return result; return result;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment