Commit 67435598 authored by Trond Myklebust's avatar Trond Myklebust

Merge bk://linux.bkbits.net/linux-2.5

into hostme.bitkeeper.com:/ua/repos/n/nfsclient/linux-2.5
parents d498eb57 ec186a65
...@@ -367,7 +367,8 @@ void sync_inodes_sb(struct super_block *sb, int wait) ...@@ -367,7 +367,8 @@ void sync_inodes_sb(struct super_block *sb, int wait)
}; };
get_page_state(&ps); get_page_state(&ps);
wbc.nr_to_write = ps.nr_dirty + ps.nr_dirty / 4; wbc.nr_to_write = ps.nr_dirty + ps.nr_unstable +
(ps.nr_dirty + ps.nr_unstable) / 4;
spin_lock(&inode_lock); spin_lock(&inode_lock);
sync_sb_inodes(sb, &wbc); sync_sb_inodes(sb, &wbc);
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
......
...@@ -187,15 +187,7 @@ nlm_bind_host(struct nlm_host *host) ...@@ -187,15 +187,7 @@ nlm_bind_host(struct nlm_host *host)
host->h_nextrebind - jiffies); host->h_nextrebind - jiffies);
} }
} else { } else {
uid_t saved_fsuid = current->fsuid;
kernel_cap_t saved_cap = current->cap_effective;
/* Create RPC socket as root user so we get a priv port */
current->fsuid = 0;
cap_raise (current->cap_effective, CAP_NET_BIND_SERVICE);
xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
current->fsuid = saved_fsuid;
current->cap_effective = saved_cap;
if (xprt == NULL) if (xprt == NULL)
goto forgetit; goto forgetit;
...@@ -209,6 +201,7 @@ nlm_bind_host(struct nlm_host *host) ...@@ -209,6 +201,7 @@ nlm_bind_host(struct nlm_host *host)
} }
clnt->cl_autobind = 1; /* turn on pmap queries */ clnt->cl_autobind = 1; /* turn on pmap queries */
xprt->nocong = 1; /* No congestion control for NLM */ xprt->nocong = 1; /* No congestion control for NLM */
xprt->resvport = 1; /* NLM requires a reserved port */
host->h_rpcclnt = clnt; host->h_rpcclnt = clnt;
} }
...@@ -276,7 +269,7 @@ nlm_shutdown_hosts(void) ...@@ -276,7 +269,7 @@ nlm_shutdown_hosts(void)
dprintk("lockd: nuking all hosts...\n"); dprintk("lockd: nuking all hosts...\n");
for (i = 0; i < NLM_HOST_NRHASH; i++) { for (i = 0; i < NLM_HOST_NRHASH; i++) {
for (host = nlm_hosts[i]; host; host = host->h_next) for (host = nlm_hosts[i]; host; host = host->h_next)
host->h_expires = 0; host->h_expires = jiffies - 1;
} }
/* Then, perform a garbage collection pass */ /* Then, perform a garbage collection pass */
...@@ -323,6 +316,9 @@ nlm_gc_hosts(void) ...@@ -323,6 +316,9 @@ nlm_gc_hosts(void)
while ((host = *q) != NULL) { while ((host = *q) != NULL) {
if (host->h_count || host->h_inuse if (host->h_count || host->h_inuse
|| time_before(jiffies, host->h_expires)) { || time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
host->h_name, host->h_count,
host->h_inuse, host->h_expires);
q = &host->h_next; q = &host->h_next;
continue; continue;
} }
......
...@@ -123,6 +123,7 @@ nsm_create(void) ...@@ -123,6 +123,7 @@ nsm_create(void)
clnt->cl_softrtry = 1; clnt->cl_softrtry = 1;
clnt->cl_chatty = 1; clnt->cl_chatty = 1;
clnt->cl_oneshot = 1; clnt->cl_oneshot = 1;
xprt->resvport = 1; /* NSM requires a reserved port */
out: out:
return clnt; return clnt;
......
...@@ -83,7 +83,7 @@ nfs_opendir(struct inode *inode, struct file *filp) ...@@ -83,7 +83,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
lock_kernel(); lock_kernel();
/* Do cto revalidation */ /* Do cto revalidation */
if (server->flags & NFS_MOUNT_NOCTO) if (!(server->flags & NFS_MOUNT_NOCTO))
res = __nfs_revalidate_inode(server, inode); res = __nfs_revalidate_inode(server, inode);
/* Call generic open code in order to cache credentials */ /* Call generic open code in order to cache credentials */
if (!res) if (!res)
......
...@@ -83,7 +83,7 @@ nfs_file_open(struct inode *inode, struct file *filp) ...@@ -83,7 +83,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
if ((open = server->rpc_ops->file_open) != NULL) if ((open = server->rpc_ops->file_open) != NULL)
res = open(inode, filp); res = open(inode, filp);
/* Do cto revalidation */ /* Do cto revalidation */
else if (server->flags & NFS_MOUNT_NOCTO) else if (!(server->flags & NFS_MOUNT_NOCTO))
res = __nfs_revalidate_inode(server, inode); res = __nfs_revalidate_inode(server, inode);
/* Call generic open code in order to cache credentials */ /* Call generic open code in order to cache credentials */
if (!res) if (!res)
......
...@@ -280,8 +280,6 @@ nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) ...@@ -280,8 +280,6 @@ nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
err = nfs_wb_all(inode); err = nfs_wb_all(inode);
} else } else
nfs_commit_file(inode, NULL, 0, 0, 0); nfs_commit_file(inode, NULL, 0, 0, 0);
/* Avoid races. Tell upstream we've done all we were told to do */
wbc->nr_to_write = 0;
out: out:
return err; return err;
} }
...@@ -490,7 +488,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, u ...@@ -490,7 +488,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, u
int res; int res;
res = nfs_scan_list(&nfsi->commit, dst, file, idx_start, npages); res = nfs_scan_list(&nfsi->commit, dst, file, idx_start, npages);
nfsi->ncommit -= res; nfsi->ncommit -= res;
sub_page_state(nr_unstable,res);
if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
return res; return res;
...@@ -1009,6 +1006,7 @@ nfs_commit_done(struct rpc_task *task) ...@@ -1009,6 +1006,7 @@ nfs_commit_done(struct rpc_task *task)
{ {
struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
struct nfs_page *req; struct nfs_page *req;
int res = 0;
dprintk("NFS: %4d nfs_commit_done (status %d)\n", dprintk("NFS: %4d nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status); task->tk_pid, task->tk_status);
...@@ -1043,7 +1041,9 @@ nfs_commit_done(struct rpc_task *task) ...@@ -1043,7 +1041,9 @@ nfs_commit_done(struct rpc_task *task)
nfs_mark_request_dirty(req); nfs_mark_request_dirty(req);
next: next:
nfs_unlock_request(req); nfs_unlock_request(req);
res++;
} }
sub_page_state(nr_unstable,res);
} }
#endif #endif
......
...@@ -157,6 +157,11 @@ typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); ...@@ -157,6 +157,11 @@ typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
skb_reader_t *, skb_read_actor_t); skb_reader_t *, skb_read_actor_t);
struct socket;
struct sockaddr;
extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
struct xdr_buf *, unsigned int, int);
/* /*
* Provide some simple tools for XDR buffer overflow-checking etc. * Provide some simple tools for XDR buffer overflow-checking etc.
*/ */
......
...@@ -198,7 +198,7 @@ void xprt_sock_setbufsize(struct rpc_xprt *); ...@@ -198,7 +198,7 @@ void xprt_sock_setbufsize(struct rpc_xprt *);
#define XPRT_CONNECT 0 #define XPRT_CONNECT 0
#define xprt_connected(xp) (!(xp)->stream || test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate))
#define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate))
#define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate))
#define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate))
......
...@@ -270,7 +270,7 @@ int wakeup_bdflush(long nr_pages) ...@@ -270,7 +270,7 @@ int wakeup_bdflush(long nr_pages)
struct page_state ps; struct page_state ps;
get_page_state(&ps); get_page_state(&ps);
nr_pages = ps.nr_dirty; nr_pages = ps.nr_dirty + ps.nr_unstable;
} }
return pdflush_operation(background_writeout, nr_pages); return pdflush_operation(background_writeout, nr_pages);
} }
......
...@@ -57,8 +57,7 @@ static void call_refresh(struct rpc_task *task); ...@@ -57,8 +57,7 @@ static void call_refresh(struct rpc_task *task);
static void call_refreshresult(struct rpc_task *task); static void call_refreshresult(struct rpc_task *task);
static void call_timeout(struct rpc_task *task); static void call_timeout(struct rpc_task *task);
static void call_connect(struct rpc_task *task); static void call_connect(struct rpc_task *task);
static void child_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task);
static void child_connect_status(struct rpc_task *task);
static u32 * call_header(struct rpc_task *task); static u32 * call_header(struct rpc_task *task);
static u32 * call_verify(struct rpc_task *task); static u32 * call_verify(struct rpc_task *task);
...@@ -602,40 +601,48 @@ static void ...@@ -602,40 +601,48 @@ static void
call_connect(struct rpc_task *task) call_connect(struct rpc_task *task)
{ {
struct rpc_clnt *clnt = task->tk_client; struct rpc_clnt *clnt = task->tk_client;
struct rpc_task *child;
dprintk("RPC: %4d call_connect status %d\n", dprintk("RPC: %4d call_connect status %d\n",
task->tk_pid, task->tk_status); task->tk_pid, task->tk_status);
if (xprt_connected(clnt->cl_xprt)) {
task->tk_action = call_transmit; task->tk_action = call_transmit;
if (task->tk_status < 0 || !clnt->cl_xprt->stream)
return; return;
/* Run as a child to ensure it runs as an rpciod task. Rpciod
* guarantees we have the correct capabilities for socket bind
* to succeed. */
child = rpc_new_child(clnt, task);
if (child) {
child->tk_action = child_connect;
rpc_run_child(task, child, NULL);
} }
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
xprt_connect(task);
} }
/*
* 4b. Sort out connect result
*/
static void static void
child_connect(struct rpc_task *task) call_connect_status(struct rpc_task *task)
{ {
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
task->tk_status = 0; task->tk_status = 0;
task->tk_action = child_connect_status; if (status >= 0) {
xprt_connect(task); clnt->cl_stats->netreconn++;
} task->tk_action = call_transmit;
return;
}
static void /* Something failed: we may have to rebind */
child_connect_status(struct rpc_task *task) if (clnt->cl_autobind)
{ clnt->cl_port = 0;
if (task->tk_status == -EAGAIN) switch (status) {
task->tk_action = child_connect; case -ENOTCONN:
else case -ETIMEDOUT:
task->tk_action = NULL; case -EAGAIN:
task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect;
break;
default:
rpc_exit(task, -EIO);
}
} }
/* /*
...@@ -696,6 +703,7 @@ call_status(struct rpc_task *task) ...@@ -696,6 +703,7 @@ call_status(struct rpc_task *task)
break; break;
case -ECONNREFUSED: case -ECONNREFUSED:
case -ENOTCONN: case -ENOTCONN:
req->rq_bytes_sent = 0;
if (clnt->cl_autobind) if (clnt->cl_autobind)
clnt->cl_port = 0; clnt->cl_port = 0;
task->tk_action = call_bind; task->tk_action = call_bind;
......
...@@ -1110,9 +1110,10 @@ void rpc_show_tasks(void) ...@@ -1110,9 +1110,10 @@ void rpc_show_tasks(void)
alltask_for_each(t, le, &all_tasks) alltask_for_each(t, le, &all_tasks)
printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
t->tk_pid, t->tk_pid,
(t->tk_msg.rpc_proc->p_proc ? t->tk_msg.rpc_proc->p_proc : -1), (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
t->tk_flags, t->tk_status, t->tk_flags, t->tk_status,
t->tk_client, t->tk_client->cl_prog, t->tk_client,
(t->tk_client ? t->tk_client->cl_prog : 0),
t->tk_rqstp, t->tk_timeout, t->tk_rqstp, t->tk_timeout,
rpc_qname(t->tk_rpcwait), rpc_qname(t->tk_rpcwait),
t->tk_action, t->tk_exit); t->tk_action, t->tk_exit);
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/net.h>
#include <net/sock.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/msg_prot.h>
...@@ -314,8 +316,113 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, ...@@ -314,8 +316,113 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
} while ((pglen -= len) != 0); } while ((pglen -= len) != 0);
copy_tail: copy_tail:
len = xdr->tail[0].iov_len; len = xdr->tail[0].iov_len;
if (len) if (base < len)
copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len); copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
}
int
xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
struct xdr_buf *xdr, unsigned int base, int msgflags)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
int err, ret = 0;
ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
mm_segment_t oldfs;
len = xdr->head[0].iov_len;
if (base < len || (addr != NULL && base == 0)) {
struct iovec iov = {
.iov_base = xdr->head[0].iov_base + base,
.iov_len = len - base,
};
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
.msg_flags = msgflags,
};
if (iov.iov_len != 0) {
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
}
if (xdr->len > len)
msg.msg_flags |= MSG_MORE;
oldfs = get_fs(); set_fs(get_ds());
err = sock_sendmsg(sock, &msg, iov.iov_len);
set_fs(oldfs);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
if (err != iov.iov_len)
goto out;
base = 0;
} else
base -= len;
if (pglen == 0)
goto copy_tail;
if (base >= pglen) {
base -= pglen;
goto copy_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
base &= ~PAGE_CACHE_MASK;
}
sendpage = sock->ops->sendpage ? : sock_no_sendpage;
do {
int flags = msgflags;
len = PAGE_CACHE_SIZE;
if (base)
len -= base;
if (pglen < len)
len = pglen;
if (pglen != len || xdr->tail[0].iov_len != 0)
flags |= MSG_MORE;
/* Hmm... We might be dealing with highmem pages */
if (PageHighMem(*ppage))
sendpage = sock_no_sendpage;
err = sendpage(sock, *ppage, base, len, flags);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
if (err != len)
goto out;
base = 0;
ppage++;
} while ((pglen -= len) != 0);
copy_tail:
len = xdr->tail[0].iov_len;
if (base < len) {
struct iovec iov = {
.iov_base = xdr->tail[0].iov_base + base,
.iov_len = len - base,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_flags = msgflags,
};
oldfs = get_fs(); set_fs(get_ds());
err = sock_sendmsg(sock, &msg, iov.iov_len);
set_fs(oldfs);
if (ret == 0)
ret = err;
else if (err > 0)
ret += err;
}
out:
return ret;
} }
......
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static inline void do_xprt_reserve(struct rpc_task *); static inline void do_xprt_reserve(struct rpc_task *);
static void xprt_disconnect(struct rpc_xprt *); static void xprt_disconnect(struct rpc_xprt *);
static void xprt_conn_status(struct rpc_task *task); static void xprt_connect_status(struct rpc_task *task);
static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
struct rpc_timeout *to); struct rpc_timeout *to);
static struct socket *xprt_create_socket(int, struct rpc_timeout *, int); static struct socket *xprt_create_socket(int, struct rpc_timeout *, int);
...@@ -213,11 +213,10 @@ static inline int ...@@ -213,11 +213,10 @@ static inline int
xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
{ {
struct socket *sock = xprt->sock; struct socket *sock = xprt->sock;
struct msghdr msg;
struct xdr_buf *xdr = &req->rq_snd_buf; struct xdr_buf *xdr = &req->rq_snd_buf;
struct iovec niv[MAX_IOVEC]; struct sockaddr *addr = NULL;
unsigned int niov, slen, skip; int addrlen = 0;
mm_segment_t oldfs; unsigned int skip;
int result; int result;
if (!sock) if (!sock)
...@@ -227,27 +226,18 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) ...@@ -227,27 +226,18 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
req->rq_svec->iov_base, req->rq_svec->iov_base,
req->rq_svec->iov_len); req->rq_svec->iov_len);
/* For UDP, we need to provide an address */
if (!xprt->stream) {
addr = (struct sockaddr *) &xprt->addr;
addrlen = sizeof(xprt->addr);
}
/* Dont repeat bytes */ /* Dont repeat bytes */
skip = req->rq_bytes_sent; skip = req->rq_bytes_sent;
slen = xdr->len - skip;
niov = xdr_kmap(niv, xdr, skip);
msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL;
msg.msg_iov = niv;
msg.msg_iovlen = niov;
msg.msg_name = (struct sockaddr *) &xprt->addr;
msg.msg_namelen = sizeof(xprt->addr);
msg.msg_control = NULL;
msg.msg_controllen = 0;
oldfs = get_fs(); set_fs(get_ds());
clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
result = sock_sendmsg(sock, &msg, slen);
set_fs(oldfs);
xdr_kunmap(xdr, skip); clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result); dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
if (result >= 0) if (result >= 0)
return result; return result;
...@@ -259,6 +249,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) ...@@ -259,6 +249,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
*/ */
case -EAGAIN: case -EAGAIN:
break; break;
case -ECONNRESET:
case -ENOTCONN: case -ENOTCONN:
case -EPIPE: case -EPIPE:
/* connection broken */ /* connection broken */
...@@ -376,6 +367,7 @@ xprt_close(struct rpc_xprt *xprt) ...@@ -376,6 +367,7 @@ xprt_close(struct rpc_xprt *xprt)
if (!sk) if (!sk)
return; return;
write_lock_bh(&sk->callback_lock);
xprt->inet = NULL; xprt->inet = NULL;
xprt->sock = NULL; xprt->sock = NULL;
...@@ -383,6 +375,7 @@ xprt_close(struct rpc_xprt *xprt) ...@@ -383,6 +375,7 @@ xprt_close(struct rpc_xprt *xprt)
sk->data_ready = xprt->old_data_ready; sk->data_ready = xprt->old_data_ready;
sk->state_change = xprt->old_state_change; sk->state_change = xprt->old_state_change;
sk->write_space = xprt->old_write_space; sk->write_space = xprt->old_write_space;
write_unlock_bh(&sk->callback_lock);
xprt_disconnect(xprt); xprt_disconnect(xprt);
sk->no_check = 0; sk->no_check = 0;
...@@ -397,14 +390,15 @@ static void ...@@ -397,14 +390,15 @@ static void
xprt_disconnect(struct rpc_xprt *xprt) xprt_disconnect(struct rpc_xprt *xprt)
{ {
dprintk("RPC: disconnected transport %p\n", xprt); dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->sock_lock);
xprt_clear_connected(xprt); xprt_clear_connected(xprt);
rpc_wake_up_status(&xprt->pending, -ENOTCONN); rpc_wake_up_status(&xprt->pending, -ENOTCONN);
spin_unlock_bh(&xprt->sock_lock);
} }
/* /*
* Attempt to connect a TCP socket. * Attempt to connect a TCP socket.
* *
* NB: This never collides with TCP reads, as both run from rpciod
*/ */
void void
xprt_connect(struct rpc_task *task) xprt_connect(struct rpc_task *task)
...@@ -442,6 +436,10 @@ xprt_connect(struct rpc_task *task) ...@@ -442,6 +436,10 @@ xprt_connect(struct rpc_task *task)
goto out_write; goto out_write;
} }
xprt_bind_socket(xprt, sock); xprt_bind_socket(xprt, sock);
if (!xprt->stream)
goto out_write;
inet = sock->sk; inet = sock->sk;
/* /*
...@@ -452,6 +450,9 @@ xprt_connect(struct rpc_task *task) ...@@ -452,6 +450,9 @@ xprt_connect(struct rpc_task *task)
dprintk("RPC: %4d connect status %d connected %d sock state %d\n", dprintk("RPC: %4d connect status %d connected %d sock state %d\n",
task->tk_pid, -status, xprt_connected(xprt), inet->state); task->tk_pid, -status, xprt_connected(xprt), inet->state);
if (status >= 0)
return;
switch (status) { switch (status) {
case -EINPROGRESS: case -EINPROGRESS:
case -EALREADY: case -EALREADY:
...@@ -464,53 +465,37 @@ xprt_connect(struct rpc_task *task) ...@@ -464,53 +465,37 @@ xprt_connect(struct rpc_task *task)
/* if the socket is already closing, delay briefly */ /* if the socket is already closing, delay briefly */
if ((1 << inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) if ((1 << inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV))
task->tk_timeout = RPC_REESTABLISH_TIMEOUT; task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->pending, task, xprt_conn_status, rpc_sleep_on(&xprt->pending, task, xprt_connect_status,
NULL); NULL);
release_sock(inet);
/* task status set when task wakes up again */
return;
} }
release_sock(inet); release_sock(inet);
task->tk_status = 0;
break;
case 0:
case -EISCONN: /* not likely, but just in case */
/* Half closed state. No race -- this socket is dead. */
if (inet->state != TCP_ESTABLISHED) {
xprt_close(xprt);
task->tk_status = -EAGAIN;
goto out_write;
}
/* Otherwise, the connection is already established. */
task->tk_status = 0;
break; break;
case -ECONNREFUSED:
case -EPIPE: case -ECONNRESET:
xprt_close(xprt); case -ENOTCONN:
if (!task->tk_client->cl_softrtry) {
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
task->tk_status = -ENOTCONN; task->tk_status = -ENOTCONN;
goto out_write; break;
}
default: default:
/* Report myriad other possible returns. If this file /* Report myriad other possible returns. If this file
* system is soft mounted, just error out, like Solaris. */ * system is soft mounted, just error out, like Solaris. */
xprt_close(xprt);
if (task->tk_client->cl_softrtry) { if (task->tk_client->cl_softrtry) {
printk(KERN_WARNING printk(KERN_WARNING
"RPC: error %d connecting to server %s, exiting\n", "RPC: error %d connecting to server %s, exiting\n",
-status, task->tk_client->cl_server); -status, task->tk_client->cl_server);
task->tk_status = -EIO; task->tk_status = -EIO;
} else { goto out_write;
printk(KERN_WARNING }
"RPC: error %d connecting to server %s\n", printk(KERN_WARNING "RPC: error %d connecting to server %s\n",
-status, task->tk_client->cl_server); -status, task->tk_client->cl_server);
/* This will prevent anybody else from reconnecting */
rpc_delay(task, RPC_REESTABLISH_TIMEOUT); rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
task->tk_status = status; task->tk_status = status;
}
break; break;
} }
return;
out_write: out_write:
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
} }
...@@ -519,33 +504,32 @@ xprt_connect(struct rpc_task *task) ...@@ -519,33 +504,32 @@ xprt_connect(struct rpc_task *task)
* We arrive here when awoken from waiting on connection establishment. * We arrive here when awoken from waiting on connection establishment.
*/ */
static void static void
xprt_conn_status(struct rpc_task *task) xprt_connect_status(struct rpc_task *task)
{ {
struct rpc_xprt *xprt = task->tk_xprt; struct rpc_xprt *xprt = task->tk_xprt;
switch (task->tk_status) { if (task->tk_status >= 0) {
case 0: dprintk("RPC: %4d xprt_connect_status: connection established\n",
dprintk("RPC: %4d xprt_conn_status: connection established\n",
task->tk_pid); task->tk_pid);
goto out; return;
}
/* if soft mounted, just cause this RPC to fail */
if (task->tk_client->cl_softrtry)
task->tk_status = -EIO;
switch (task->tk_status) {
case -ENOTCONN:
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
return;
case -ETIMEDOUT: case -ETIMEDOUT:
dprintk("RPC: %4d xprt_conn_status: timed out\n", dprintk("RPC: %4d xprt_connect_status: timed out\n",
task->tk_pid); task->tk_pid);
/* prevent TCP from continuing to retry SYNs */
xprt_close(xprt);
break; break;
default: default:
printk(KERN_ERR "RPC: error %d connecting to server %s\n", printk(KERN_ERR "RPC: error %d connecting to server %s\n",
-task->tk_status, task->tk_client->cl_server); -task->tk_status, task->tk_client->cl_server);
xprt_close(xprt);
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
break;
} }
/* if soft mounted, cause this RPC to fail */
if (task->tk_client->cl_softrtry)
task->tk_status = -EIO;
out:
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
} }
...@@ -695,6 +679,7 @@ udp_data_ready(struct sock *sk, int len) ...@@ -695,6 +679,7 @@ udp_data_ready(struct sock *sk, int len)
struct sk_buff *skb; struct sk_buff *skb;
int err, repsize, copied; int err, repsize, copied;
read_lock(&sk->callback_lock);
dprintk("RPC: udp_data_ready...\n"); dprintk("RPC: udp_data_ready...\n");
if (!(xprt = xprt_from_sock(sk))) { if (!(xprt = xprt_from_sock(sk))) {
printk("RPC: udp_data_ready request not found!\n"); printk("RPC: udp_data_ready request not found!\n");
...@@ -745,6 +730,7 @@ udp_data_ready(struct sock *sk, int len) ...@@ -745,6 +730,7 @@ udp_data_ready(struct sock *sk, int len)
out: out:
if (sk->sleep && waitqueue_active(sk->sleep)) if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep); wake_up_interruptible(sk->sleep);
read_unlock(&sk->callback_lock);
} }
/* /*
...@@ -939,7 +925,7 @@ tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, ...@@ -939,7 +925,7 @@ tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
} }
/* Skip over any trailing bytes on short reads */ /* Skip over any trailing bytes on short reads */
tcp_read_discard(xprt, &desc); tcp_read_discard(xprt, &desc);
} while (desc.count && xprt_connected(xprt)); } while (desc.count);
dprintk("RPC: tcp_data_recv done\n"); dprintk("RPC: tcp_data_recv done\n");
return len - desc.count; return len - desc.count;
} }
...@@ -949,18 +935,21 @@ static void tcp_data_ready(struct sock *sk, int bytes) ...@@ -949,18 +935,21 @@ static void tcp_data_ready(struct sock *sk, int bytes)
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
read_descriptor_t rd_desc; read_descriptor_t rd_desc;
read_lock(&sk->callback_lock);
dprintk("RPC: tcp_data_ready...\n"); dprintk("RPC: tcp_data_ready...\n");
if (!(xprt = xprt_from_sock(sk))) { if (!(xprt = xprt_from_sock(sk))) {
printk("RPC: tcp_data_ready socket info not found!\n"); printk("RPC: tcp_data_ready socket info not found!\n");
return; goto out;
} }
if (xprt->shutdown) if (xprt->shutdown)
return; goto out;
/* We use rd_desc to pass struct xprt to tcp_data_recv */ /* We use rd_desc to pass struct xprt to tcp_data_recv */
rd_desc.buf = (char *)xprt; rd_desc.buf = (char *)xprt;
rd_desc.count = 65536; rd_desc.count = 65536;
tcp_read_sock(sk, &rd_desc, tcp_data_recv); tcp_read_sock(sk, &rd_desc, tcp_data_recv);
out:
read_unlock(&sk->callback_lock);
} }
static void static void
...@@ -968,6 +957,7 @@ tcp_state_change(struct sock *sk) ...@@ -968,6 +957,7 @@ tcp_state_change(struct sock *sk)
{ {
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
read_lock(&sk->callback_lock);
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
goto out; goto out;
dprintk("RPC: tcp_state_change client %p...\n", xprt); dprintk("RPC: tcp_state_change client %p...\n", xprt);
...@@ -977,19 +967,19 @@ tcp_state_change(struct sock *sk) ...@@ -977,19 +967,19 @@ tcp_state_change(struct sock *sk)
switch (sk->state) { switch (sk->state) {
case TCP_ESTABLISHED: case TCP_ESTABLISHED:
if (xprt_test_and_set_connected(xprt)) spin_lock_bh(&xprt->sock_lock);
break; if (!xprt_test_and_set_connected(xprt)) {
/* Reset TCP record info */ /* Reset TCP record info */
xprt->tcp_offset = 0; xprt->tcp_offset = 0;
xprt->tcp_reclen = 0; xprt->tcp_reclen = 0;
xprt->tcp_copied = 0; xprt->tcp_copied = 0;
xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
spin_lock(&xprt->sock_lock); if (xprt->snd_task)
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
rpc_wake_up_task(xprt->snd_task); rpc_wake_up_task(xprt->snd_task);
spin_unlock(&xprt->sock_lock); rpc_wake_up(&xprt->pending);
}
spin_unlock_bh(&xprt->sock_lock);
break; break;
case TCP_SYN_SENT: case TCP_SYN_SENT:
case TCP_SYN_RECV: case TCP_SYN_RECV:
...@@ -1001,6 +991,7 @@ tcp_state_change(struct sock *sk) ...@@ -1001,6 +991,7 @@ tcp_state_change(struct sock *sk)
out: out:
if (sk->sleep && waitqueue_active(sk->sleep)) if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep); wake_up_interruptible_all(sk->sleep);
read_unlock(&sk->callback_lock);
} }
/* /*
...@@ -1015,24 +1006,25 @@ xprt_write_space(struct sock *sk) ...@@ -1015,24 +1006,25 @@ xprt_write_space(struct sock *sk)
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
struct socket *sock; struct socket *sock;
read_lock(&sk->callback_lock);
if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket)) if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket))
return; goto out;
if (xprt->shutdown) if (xprt->shutdown)
return; goto out;
/* Wait until we have enough socket memory */ /* Wait until we have enough socket memory */
if (xprt->stream) { if (xprt->stream) {
/* from net/ipv4/tcp.c:tcp_write_space */ /* from net/ipv4/tcp.c:tcp_write_space */
if (tcp_wspace(sk) < tcp_min_write_space(sk)) if (tcp_wspace(sk) < tcp_min_write_space(sk))
return; goto out;
} else { } else {
/* from net/core/sock.c:sock_def_write_space */ /* from net/core/sock.c:sock_def_write_space */
if (!sock_writeable(sk)) if (!sock_writeable(sk))
return; goto out;
} }
if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))
return; goto out;
spin_lock_bh(&xprt->sock_lock); spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
...@@ -1040,6 +1032,8 @@ xprt_write_space(struct sock *sk) ...@@ -1040,6 +1032,8 @@ xprt_write_space(struct sock *sk)
spin_unlock_bh(&xprt->sock_lock); spin_unlock_bh(&xprt->sock_lock);
if (sk->sleep && waitqueue_active(sk->sleep)) if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep); wake_up_interruptible(sk->sleep);
out:
read_unlock(&sk->callback_lock);
} }
/* /*
...@@ -1107,9 +1101,6 @@ xprt_prepare_transmit(struct rpc_task *task) ...@@ -1107,9 +1101,6 @@ xprt_prepare_transmit(struct rpc_task *task)
if (xprt->shutdown) if (xprt->shutdown)
return -EIO; return -EIO;
if (!xprt_connected(xprt))
return -ENOTCONN;
if (task->tk_rpcwait) if (task->tk_rpcwait)
rpc_remove_wait_queue(task); rpc_remove_wait_queue(task);
...@@ -1118,6 +1109,12 @@ xprt_prepare_transmit(struct rpc_task *task) ...@@ -1118,6 +1109,12 @@ xprt_prepare_transmit(struct rpc_task *task)
err = -EAGAIN; err = -EAGAIN;
goto out_unlock; goto out_unlock;
} }
if (!xprt_connected(xprt)) {
err = -ENOTCONN;
goto out_unlock;
}
if (list_empty(&req->rq_list)) { if (list_empty(&req->rq_list)) {
list_add_tail(&req->rq_list, &xprt->recv); list_add_tail(&req->rq_list, &xprt->recv);
req->rq_received = 0; req->rq_received = 0;
...@@ -1192,7 +1189,10 @@ xprt_transmit(struct rpc_task *task) ...@@ -1192,7 +1189,10 @@ xprt_transmit(struct rpc_task *task)
if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
/* Protect against races with xprt_write_space */ /* Protect against races with xprt_write_space */
spin_lock_bh(&xprt->sock_lock); spin_lock_bh(&xprt->sock_lock);
if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { /* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
task->tk_timeout = req->rq_timeout.to_current; task->tk_timeout = req->rq_timeout.to_current;
rpc_sleep_on(&xprt->pending, task, NULL, NULL); rpc_sleep_on(&xprt->pending, task, NULL, NULL);
} }
...@@ -1203,20 +1203,17 @@ xprt_transmit(struct rpc_task *task) ...@@ -1203,20 +1203,17 @@ xprt_transmit(struct rpc_task *task)
rpc_delay(task, HZ>>4); rpc_delay(task, HZ>>4);
return; return;
case -ECONNREFUSED: case -ECONNREFUSED:
case -ENOTCONN:
if (!xprt->stream) {
task->tk_timeout = RPC_REESTABLISH_TIMEOUT; task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->sending, task, NULL, NULL); rpc_sleep_on(&xprt->sending, task, NULL, NULL);
case -ENOTCONN:
return; return;
}
/* fall through */
default: default:
if (xprt->stream) if (xprt->stream)
xprt_disconnect(xprt); xprt_disconnect(xprt);
req->rq_bytes_sent = 0;
} }
out_release: out_release:
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
req->rq_bytes_sent = 0;
return; return;
out_receive: out_receive:
dprintk("RPC: %4d xmit complete\n", task->tk_pid); dprintk("RPC: %4d xmit complete\n", task->tk_pid);
...@@ -1230,10 +1227,14 @@ xprt_transmit(struct rpc_task *task) ...@@ -1230,10 +1227,14 @@ xprt_transmit(struct rpc_task *task)
} else } else
task->tk_timeout = req->rq_timeout.to_current; task->tk_timeout = req->rq_timeout.to_current;
spin_lock_bh(&xprt->sock_lock); spin_lock_bh(&xprt->sock_lock);
if (!req->rq_received) /* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
__xprt_release_write(xprt, task); __xprt_release_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock); spin_unlock_bh(&xprt->sock_lock);
req->rq_bytes_sent = 0;
} }
/* /*
...@@ -1417,6 +1418,9 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) ...@@ -1417,6 +1418,9 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
req->rq_next = NULL; req->rq_next = NULL;
xprt->free = xprt->slot; xprt->free = xprt->slot;
/* Check whether we want to use a reserved port */
xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
dprintk("RPC: created transport %p\n", xprt); dprintk("RPC: created transport %p\n", xprt);
return xprt; return xprt;
...@@ -1430,6 +1434,12 @@ xprt_bindresvport(struct socket *sock) ...@@ -1430,6 +1434,12 @@ xprt_bindresvport(struct socket *sock)
{ {
struct sockaddr_in myaddr; struct sockaddr_in myaddr;
int err, port; int err, port;
kernel_cap_t saved_cap = current->cap_effective;
/* Override capabilities.
* They were checked in xprt_create_proto i.e. at mount time
*/
cap_raise(current->cap_effective, CAP_NET_BIND_SERVICE);
memset(&myaddr, 0, sizeof(myaddr)); memset(&myaddr, 0, sizeof(myaddr));
myaddr.sin_family = AF_INET; myaddr.sin_family = AF_INET;
...@@ -1439,6 +1449,7 @@ xprt_bindresvport(struct socket *sock) ...@@ -1439,6 +1449,7 @@ xprt_bindresvport(struct socket *sock)
err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
sizeof(myaddr)); sizeof(myaddr));
} while (err == -EADDRINUSE && --port > 0); } while (err == -EADDRINUSE && --port > 0);
current->cap_effective = saved_cap;
if (err < 0) if (err < 0)
printk("RPC: Can't bind to reserved port (%d).\n", -err); printk("RPC: Can't bind to reserved port (%d).\n", -err);
...@@ -1454,6 +1465,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) ...@@ -1454,6 +1465,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
if (xprt->inet) if (xprt->inet)
return; return;
write_lock_bh(&sk->callback_lock);
sk->user_data = xprt; sk->user_data = xprt;
xprt->old_data_ready = sk->data_ready; xprt->old_data_ready = sk->data_ready;
xprt->old_state_change = sk->state_change; xprt->old_state_change = sk->state_change;
...@@ -1474,6 +1486,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) ...@@ -1474,6 +1486,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
/* Reset to new socket */ /* Reset to new socket */
xprt->sock = sock; xprt->sock = sock;
xprt->inet = sk; xprt->inet = sk;
write_unlock_bh(&sk->callback_lock);
return; return;
} }
...@@ -1544,16 +1557,6 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) ...@@ -1544,16 +1557,6 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
if (!xprt) if (!xprt)
goto out_bad; goto out_bad;
xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
if (!xprt->stream) {
struct socket *sock;
sock = xprt_create_socket(proto, to, xprt->resvport);
if (!sock)
goto out_bad;
xprt_bind_socket(xprt, sock);
}
dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
return xprt; return xprt;
out_bad: out_bad:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment