Commit 5675add3 authored by Tom Talpey's avatar Tom Talpey Committed by Trond Myklebust

RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls.

Add defensive timeouts to wait_for_completion() calls in RDMA
address resolution, and make them interruptible. Fix the timeout
units to milliseconds (formerly jiffies) and move to private header.
Signed-off-by: default avatarTom Talpey <talpey@netapp.com>
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 1a954051
...@@ -66,9 +66,6 @@ ...@@ -66,9 +66,6 @@
#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/* memory registration strategies */ /* memory registration strategies */
#define RPCRDMA_PERSISTENT_REGISTRATION (1) #define RPCRDMA_PERSISTENT_REGISTRATION (1)
......
...@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch (event->event) { switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED:
ia->ri_async_rc = 0;
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ADDR_ERROR:
...@@ -363,26 +364,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -363,26 +364,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return id; return id;
} }
ia->ri_async_rc = 0; ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
if (rc) { if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc); __func__, rc);
goto out; goto out;
} }
wait_for_completion(&ia->ri_done); wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc; rc = ia->ri_async_rc;
if (rc) if (rc)
goto out; goto out;
ia->ri_async_rc = 0; ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) { if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n", dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc); __func__, rc);
goto out; goto out;
} }
wait_for_completion(&ia->ri_done); wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc; rc = ia->ri_async_rc;
if (rc) if (rc)
goto out; goto out;
......
...@@ -51,6 +51,9 @@ ...@@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/* /*
* Interface Adapter -- one per transport instance * Interface Adapter -- one per transport instance
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment