Commit 0445f92c authored by Trond Myklebust's avatar Trond Myklebust

SUNRPC: Fix disconnection races

When the socket is closed, we need to call xprt_disconnect_done() in order
to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.

However, we also want to ensure that we don't wake them up before the socket
is closed, since that would cause thundering herd issues with everyone
piling up to retransmit before the TCP shutdown dance has completed.
Only the task that holds XPRT_LOCKED needs to wake up early in order to
allow the close to complete.
Reported-by: default avatarDave Wysochanski <dwysocha@redhat.com>
Reported-by: default avatarScott Mayhew <smayhew@redhat.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@hammerspace.com>
Tested-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 7566ec39
...@@ -1952,6 +1952,7 @@ call_connect_status(struct rpc_task *task) ...@@ -1952,6 +1952,7 @@ call_connect_status(struct rpc_task *task)
/* retry with existing socket, after a delay */ /* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
/* fall through */ /* fall through */
case -ENOTCONN:
case -EAGAIN: case -EAGAIN:
/* Check for timeouts before looping back to call_bind */ /* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT: case -ETIMEDOUT:
......
...@@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) ...@@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */ /* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(xprtiod_workqueue, &xprt->task_cleanup); queue_work(xprtiod_workqueue, &xprt->task_cleanup);
xprt_wake_pending_tasks(xprt, -EAGAIN); else if (xprt->snd_task)
rpc_wake_up_queued_task_set_status(&xprt->pending,
xprt->snd_task, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
} }
EXPORT_SYMBOL_GPL(xprt_force_disconnect); EXPORT_SYMBOL_GPL(xprt_force_disconnect);
...@@ -852,6 +854,7 @@ static void xprt_connect_status(struct rpc_task *task) ...@@ -852,6 +854,7 @@ static void xprt_connect_status(struct rpc_task *task)
case -ENETUNREACH: case -ENETUNREACH:
case -EHOSTUNREACH: case -EHOSTUNREACH:
case -EPIPE: case -EPIPE:
case -ENOTCONN:
case -EAGAIN: case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break; break;
......
...@@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport) ...@@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
trace_rpc_socket_close(xprt, sock); trace_rpc_socket_close(xprt, sock);
sock_release(sock); sock_release(sock);
xprt_disconnect_done(xprt);
} }
/** /**
...@@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt) ...@@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
xs_reset_transport(transport); xs_reset_transport(transport);
xprt->reestablish_timeout = 0; xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
} }
static void xs_inject_disconnect(struct rpc_xprt *xprt) static void xs_inject_disconnect(struct rpc_xprt *xprt)
...@@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
&transport->sock_state)) &transport->sock_state))
xprt_clear_connecting(xprt); xprt_clear_connecting(xprt);
clear_bit(XPRT_CLOSING, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state);
if (sk->sk_err)
xprt_wake_pending_tasks(xprt, -sk->sk_err);
/* Trigger the socket release */ /* Trigger the socket release */
xs_tcp_force_close(xprt); xs_tcp_force_close(xprt);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment