Commit 4dda9c8a authored by Trond Myklebust's avatar Trond Myklebust

SUNRPC: Set SO_REUSEPORT socket option for TCP connections

When using TCP, we need the ability to reuse port numbers after
a disconnection, so that the NFSv3 server knows that we're the same
client. Currently we use a hack to work around the TCP socket's
TIME_WAIT: we send an RST instead of closing, which doesn't
always work...
The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple
TCP connections to the same source address+port combination, and thus
to use ordinary TCP close() instead of the current hack.
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
parent bc3203cd
...@@ -1666,6 +1666,39 @@ static unsigned short xs_get_random_port(void) ...@@ -1666,6 +1666,39 @@ static unsigned short xs_get_random_port(void)
return rand + xprt_min_resvport; return rand + xprt_min_resvport;
} }
/**
* xs_set_reuseaddr_port - set the socket's port and address reuse options
* @sock: socket
*
* Note that this function has to be called on all sockets that share the
* same port, and it must be called before binding.
*/
static void xs_sock_set_reuseport(struct socket *sock)
{
char opt = 1;
kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
}
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
int buflen;
unsigned short port = 0;
if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
goto out;
switch (buf.ss_family) {
case AF_INET6:
port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
break;
case AF_INET:
port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
}
out:
return port;
}
/** /**
* xs_set_port - reset the port number in the remote endpoint address * xs_set_port - reset the port number in the remote endpoint address
* @xprt: generic transport * @xprt: generic transport
...@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) ...@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
xs_update_peer_port(xprt); xs_update_peer_port(xprt);
} }
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
{
if (transport->srcport == 0)
transport->srcport = xs_sock_getport(sock);
}
static unsigned short xs_get_srcport(struct sock_xprt *transport) static unsigned short xs_get_srcport(struct sock_xprt *transport)
{ {
unsigned short port = transport->srcport; unsigned short port = transport->srcport;
...@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work) ...@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work)
} }
static struct socket *xs_create_sock(struct rpc_xprt *xprt, static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type, int protocol) struct sock_xprt *transport, int family, int type,
int protocol, bool reuseport)
{ {
struct socket *sock; struct socket *sock;
int err; int err;
...@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, ...@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
} }
xs_reclassify_socket(family, sock); xs_reclassify_socket(family, sock);
if (reuseport)
xs_sock_set_reuseport(sock);
err = xs_bind(transport, sock); err = xs_bind(transport, sock);
if (err) { if (err) {
sock_release(sock); sock_release(sock);
...@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work) ...@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
/* Start by resetting any existing state */ /* Start by resetting any existing state */
xs_reset_transport(transport); xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport, sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); xs_addr(xprt)->sa_family, SOCK_DGRAM,
IPPROTO_UDP, false);
if (IS_ERR(sock)) if (IS_ERR(sock))
goto out; goto out;
...@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ...@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_allocation = GFP_ATOMIC; sk->sk_allocation = GFP_ATOMIC;
/* socket options */ /* socket options */
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
sock_reset_flag(sk, SOCK_LINGER); sock_reset_flag(sk, SOCK_LINGER);
tcp_sk(sk)->linger2 = 0; tcp_sk(sk)->linger2 = 0;
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
...@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ...@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
switch (ret) { switch (ret) {
case 0: case 0:
xs_set_srcport(transport, sock);
case -EINPROGRESS: case -EINPROGRESS:
/* SYN_SENT! */ /* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
...@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) ...@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
if (!sock) { if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport, sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); xs_addr(xprt)->sa_family, SOCK_STREAM,
IPPROTO_TCP, true);
if (IS_ERR(sock)) { if (IS_ERR(sock)) {
status = PTR_ERR(sock); status = PTR_ERR(sock);
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment