Commit 3f4eb9ff authored by Chuck Lever's avatar Chuck Lever Committed by Anna Schumaker

xprtrdma: Handle device removal outside of the CM event handler

Wait for all disconnects to complete to ensure the transport has
divested all of its hardware resources before the underlying RDMA
device can be removed.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent 7e86845a
...@@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh, ...@@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh,
DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect); DEFINE_CONN_EVENT(disconnect);
TRACE_EVENT(xprtrdma_device_removal,
TP_PROTO(
const struct rdma_cm_id *id
),
TP_ARGS(id),
TP_STRUCT__entry(
__string(name, id->device->name)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__assign_str(name);
memcpy(__entry->addr, &id->route.addr.dst_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("device %s to be removed, disconnecting %pISpc\n",
__get_str(name), __entry->addr
)
);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
TRACE_EVENT(xprtrdma_op_connect, TRACE_EVENT(xprtrdma_op_connect,
......
...@@ -222,7 +222,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, ...@@ -222,7 +222,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
static int static int
rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{ {
struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context; struct rpcrdma_ep *ep = id->context;
might_sleep(); might_sleep();
...@@ -241,14 +240,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -241,14 +240,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_async_rc = -ENETUNREACH; ep->re_async_rc = -ENETUNREACH;
complete(&ep->re_done); complete(&ep->re_done);
return 0; return 0;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
pr_info("rpcrdma: removing device %s for %pISpc\n",
ep->re_id->device->name, sap);
switch (xchg(&ep->re_connect_status, -ENODEV)) {
case 0: goto wake_connect_worker;
case 1: goto disconnected;
}
return 0;
case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV; ep->re_connect_status = -ENODEV;
goto disconnected; goto disconnected;
...@@ -284,6 +275,14 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -284,6 +275,14 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
return 0; return 0;
} }
static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
{
struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn);
trace_xprtrdma_device_removal(ep->re_id);
xprt_force_disconnect(ep->re_xprt);
}
static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep) struct rpcrdma_ep *ep)
{ {
...@@ -323,6 +322,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, ...@@ -323,6 +322,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
if (rc) if (rc)
goto out; goto out;
rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
if (rc)
goto out;
return id; return id;
out: out:
...@@ -350,6 +353,8 @@ static void rpcrdma_ep_destroy(struct kref *kref) ...@@ -350,6 +353,8 @@ static void rpcrdma_ep_destroy(struct kref *kref)
ib_dealloc_pd(ep->re_pd); ib_dealloc_pd(ep->re_pd);
ep->re_pd = NULL; ep->re_pd = NULL;
rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn);
kfree(ep); kfree(ep);
module_put(THIS_MODULE); module_put(THIS_MODULE);
} }
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */ #include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#include <linux/sunrpc/rdma_rn.h> /* removal notifications */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
...@@ -92,6 +93,7 @@ struct rpcrdma_ep { ...@@ -92,6 +93,7 @@ struct rpcrdma_ep {
struct rpcrdma_connect_private struct rpcrdma_connect_private
re_cm_private; re_cm_private;
struct rdma_conn_param re_remote_cma; struct rdma_conn_param re_remote_cma;
struct rpcrdma_notification re_rn;
int re_receive_count; int re_receive_count;
unsigned int re_max_requests; /* depends on device */ unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */ unsigned int re_inline_send; /* negotiated */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment