Commit a396d43a authored by Sean Hefty's avatar Sean Hefty Committed by Roland Dreier

RDMA/cma: Replace global lock in rdma_destroy_id() with id-specific one

rdma_destroy_id currently uses the global rdma cm 'lock' to test if an
rdma_cm_id has been bound to a device.  This prevents an active
address resolution callback handler from assigning a device to the
rdma_cm_id after rdma_destroy_id checks for one.

Instead, we can replace the use of the global lock around the check to
the rdma_cm_id device pointer by setting the id state to destroying,
then flushing all active callbacks.  The latter is accomplished by
acquiring and releasing the handler_mutex.  Any active handler will
complete first, and any newly scheduled handlers will find the
rdma_cm_id in an invalid state.

In addition to optimizing the current locking scheme, the use of the
rdma_cm_id mutex is a more intuitive synchronization mechanism than
that of the global lock.  These changes are based on feedback from
Doug Ledford <dledford@redhat.com> while he was trying to debug a
crash in the rdma cm destroy path.
Signed-off-by: default avatarSean Hefty <sean.hefty@intel.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 8d8ac865
...@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref) ...@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref)
kfree(mc); kfree(mc);
} }
static void cma_detach_from_dev(struct rdma_id_private *id_priv) static void cma_release_dev(struct rdma_id_private *id_priv)
{ {
mutex_lock(&lock);
list_del(&id_priv->list); list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev); cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL; id_priv->cma_dev = NULL;
mutex_unlock(&lock);
} }
static int cma_set_qkey(struct rdma_id_private *id_priv) static int cma_set_qkey(struct rdma_id_private *id_priv)
...@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) ...@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
mutex_lock(&lock);
iboe_addr_get_sgid(dev_addr, &iboe_gid); iboe_addr_get_sgid(dev_addr, &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr + memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid); rdma_addr_gid_offset(dev_addr), sizeof gid);
...@@ -398,6 +401,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) ...@@ -398,6 +401,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
if (!ret) if (!ret)
cma_attach_to_dev(id_priv, cma_dev); cma_attach_to_dev(id_priv, cma_dev);
mutex_unlock(&lock);
return ret; return ret;
} }
...@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id) ...@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id)
state = cma_exch(id_priv, CMA_DESTROYING); state = cma_exch(id_priv, CMA_DESTROYING);
cma_cancel_operation(id_priv, state); cma_cancel_operation(id_priv, state);
mutex_lock(&lock); /*
* Wait for any active callback to finish. New callbacks will find
* the id_priv state set to destroying and abort.
*/
mutex_lock(&id_priv->handler_mutex);
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) { if (id_priv->cma_dev) {
mutex_unlock(&lock);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) { switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB: case RDMA_TRANSPORT_IB:
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
...@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id) ...@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
break; break;
} }
cma_leave_mc_groups(id_priv); cma_leave_mc_groups(id_priv);
mutex_lock(&lock); cma_release_dev(id_priv);
cma_detach_from_dev(id_priv);
} }
mutex_unlock(&lock);
cma_release_port(id_priv); cma_release_port(id_priv);
cma_deref_id(id_priv); cma_deref_id(id_priv);
...@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ...@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
} }
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
mutex_lock(&lock);
ret = cma_acquire_dev(conn_id); ret = cma_acquire_dev(conn_id);
mutex_unlock(&lock);
if (ret) if (ret)
goto release_conn_id; goto release_conn_id;
...@@ -1401,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ...@@ -1401,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out; goto out;
} }
mutex_lock(&lock);
ret = cma_acquire_dev(conn_id); ret = cma_acquire_dev(conn_id);
mutex_unlock(&lock);
if (ret) { if (ret) {
mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id); rdma_destroy_id(new_cm_id);
...@@ -1966,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr, ...@@ -1966,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memset(&event, 0, sizeof event); memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex); mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
/*
* Grab mutex to block rdma_destroy_id() from removing the device while
* we're trying to acquire it.
*/
mutex_lock(&lock);
if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
mutex_unlock(&lock);
goto out; goto out;
}
if (!status && !id_priv->cma_dev) if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv); status = cma_acquire_dev(id_priv);
mutex_unlock(&lock);
if (status) { if (status) {
if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
...@@ -2280,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ...@@ -2280,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret) if (ret)
goto err1; goto err1;
mutex_lock(&lock);
ret = cma_acquire_dev(id_priv); ret = cma_acquire_dev(id_priv);
mutex_unlock(&lock);
if (ret) if (ret)
goto err1; goto err1;
} }
...@@ -2294,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ...@@ -2294,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0; return 0;
err2: err2:
if (id_priv->cma_dev) { if (id_priv->cma_dev)
mutex_lock(&lock); cma_release_dev(id_priv);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
}
err1: err1:
cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment