Commit 745cbcca authored by Andy Grover's avatar Andy Grover Committed by David S. Miller

RDS: Rewrite connection cleanup, fixing oops on rmmod

This fixes a bug where a connection was unexpectedly
not on *any* list while being destroyed. It also
cleans up some code duplication and regularizes some
function names.

* Grab appropriate lock in conn_free() and explain in comment
* Ensure via locking that a conn is never not on either
  a dev's list or the nodev list
* Add rds_xx_remove_conn() to match rds_xx_add_conn()
* Make rds_xx_add_conn() return void
* Rename remove_{,nodev_}conns() to
  destroy_{,nodev_}conns() and unify their implementation
  in a helper function
* Document lock ordering as nodev conn_lock before
  dev_conn_lock
Reported-by: default avatarYosef Etigin <yosefe@voltaire.com>
Signed-off-by: default avatarAndy Grover <andy.grover@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f1cffcbf
...@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); ...@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
struct list_head rds_ib_devices; struct list_head rds_ib_devices;
/* NOTE: if also grabbing ibdev lock, grab this first */
DEFINE_SPINLOCK(ib_nodev_conns_lock); DEFINE_SPINLOCK(ib_nodev_conns_lock);
LIST_HEAD(ib_nodev_conns); LIST_HEAD(ib_nodev_conns);
...@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device) ...@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
kfree(i_ipaddr); kfree(i_ipaddr);
} }
rds_ib_remove_conns(rds_ibdev); rds_ib_destroy_conns(rds_ibdev);
if (rds_ibdev->mr_pool) if (rds_ibdev->mr_pool)
rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
...@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr) ...@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
void rds_ib_exit(void) void rds_ib_exit(void)
{ {
rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
rds_ib_remove_nodev_conns(); rds_ib_destroy_nodev_conns();
ib_unregister_client(&rds_ib_client); ib_unregister_client(&rds_ib_client);
rds_ib_sysctl_exit(); rds_ib_sysctl_exit();
rds_ib_recv_exit(); rds_ib_recv_exit();
......
...@@ -267,9 +267,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, ...@@ -267,9 +267,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
/* ib_rdma.c */ /* ib_rdma.c */
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_remove_nodev_conns(void); void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
static inline void rds_ib_destroy_nodev_conns(void)
{
__rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
}
static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
{
__rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
}
struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
......
...@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even ...@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
if (err) if (err)
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
err = rds_ib_add_conn(rds_ibdev, conn); rds_ib_add_conn(rds_ibdev, conn);
if (err)
printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
/* If the peer gave us the last packet it saw, process this as if /* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */ * we had received a regular ACK. */
...@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) ...@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
/* /*
* Move connection back to the nodev list. * Move connection back to the nodev list.
*/ */
if (ic->rds_ibdev) { if (ic->rds_ibdev)
rds_ib_remove_conn(ic->rds_ibdev, conn);
spin_lock_irq(&ic->rds_ibdev->spinlock);
BUG_ON(list_empty(&ic->ib_node));
list_del(&ic->ib_node);
spin_unlock_irq(&ic->rds_ibdev->spinlock);
spin_lock_irq(&ib_nodev_conns_lock);
list_add_tail(&ic->ib_node, &ib_nodev_conns);
spin_unlock_irq(&ib_nodev_conns_lock);
ic->rds_ibdev = NULL;
}
ic->i_cm_id = NULL; ic->i_cm_id = NULL;
ic->i_pd = NULL; ic->i_pd = NULL;
...@@ -701,11 +689,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) ...@@ -701,11 +689,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
return 0; return 0;
} }
/*
* Free a connection. Connection must be shut down and not set for reconnect.
*/
void rds_ib_conn_free(void *arg) void rds_ib_conn_free(void *arg)
{ {
struct rds_ib_connection *ic = arg; struct rds_ib_connection *ic = arg;
spinlock_t *lock_ptr;
rdsdebug("ic %p\n", ic); rdsdebug("ic %p\n", ic);
/*
* Conn is either on a dev's list or on the nodev list.
* A race with shutdown() or connect() would cause problems
* (since rds_ibdev would change) but that should never happen.
*/
lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
spin_lock_irq(lock_ptr);
list_del(&ic->ib_node); list_del(&ic->ib_node);
spin_unlock_irq(lock_ptr);
kfree(ic); kfree(ic);
} }
......
...@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) ...@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
return rds_ib_add_ipaddr(rds_ibdev, ipaddr); return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
} }
int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
{ {
struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_connection *ic = conn->c_transport_data;
...@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn ...@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
BUG_ON(list_empty(&ib_nodev_conns)); BUG_ON(list_empty(&ib_nodev_conns));
BUG_ON(list_empty(&ic->ib_node)); BUG_ON(list_empty(&ic->ib_node));
list_del(&ic->ib_node); list_del(&ic->ib_node);
spin_unlock_irq(&ib_nodev_conns_lock);
spin_lock_irq(&rds_ibdev->spinlock); spin_lock_irq(&rds_ibdev->spinlock);
list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
spin_unlock_irq(&rds_ibdev->spinlock); spin_unlock_irq(&rds_ibdev->spinlock);
spin_unlock_irq(&ib_nodev_conns_lock);
ic->rds_ibdev = rds_ibdev; ic->rds_ibdev = rds_ibdev;
return 0;
} }
void rds_ib_remove_nodev_conns(void) void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
{ {
struct rds_ib_connection *ic, *_ic; struct rds_ib_connection *ic = conn->c_transport_data;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */ /* place conn on nodev_conns_list */
spin_lock_irq(&ib_nodev_conns_lock); spin_lock(&ib_nodev_conns_lock);
list_splice(&ib_nodev_conns, &tmp_list);
INIT_LIST_HEAD(&ib_nodev_conns);
spin_unlock_irq(&ib_nodev_conns_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { spin_lock_irq(&rds_ibdev->spinlock);
if (ic->conn->c_passive) BUG_ON(list_empty(&ic->ib_node));
rds_conn_destroy(ic->conn->c_passive); list_del(&ic->ib_node);
rds_conn_destroy(ic->conn); spin_unlock_irq(&rds_ibdev->spinlock);
}
list_add_tail(&ic->ib_node, &ib_nodev_conns);
spin_unlock(&ib_nodev_conns_lock);
ic->rds_ibdev = NULL;
} }
void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
{ {
struct rds_ib_connection *ic, *_ic; struct rds_ib_connection *ic, *_ic;
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */ /* avoid calling conn_destroy with irqs off */
spin_lock_irq(&rds_ibdev->spinlock); spin_lock_irq(list_lock);
list_splice(&rds_ibdev->conn_list, &tmp_list); list_splice(list, &tmp_list);
INIT_LIST_HEAD(&rds_ibdev->conn_list); INIT_LIST_HEAD(list);
spin_unlock_irq(&rds_ibdev->spinlock); spin_unlock_irq(list_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
if (ic->conn->c_passive) if (ic->conn->c_passive)
......
...@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR ...@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
struct list_head rds_iw_devices; struct list_head rds_iw_devices;
/* NOTE: if also grabbing iwdev lock, grab this first */
DEFINE_SPINLOCK(iw_nodev_conns_lock); DEFINE_SPINLOCK(iw_nodev_conns_lock);
LIST_HEAD(iw_nodev_conns); LIST_HEAD(iw_nodev_conns);
...@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device) ...@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
} }
spin_unlock_irq(&rds_iwdev->spinlock); spin_unlock_irq(&rds_iwdev->spinlock);
rds_iw_remove_conns(rds_iwdev); rds_iw_destroy_conns(rds_iwdev);
if (rds_iwdev->mr_pool) if (rds_iwdev->mr_pool)
rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
...@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr) ...@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
void rds_iw_exit(void) void rds_iw_exit(void)
{ {
rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
rds_iw_remove_nodev_conns(); rds_iw_destroy_nodev_conns();
ib_unregister_client(&rds_iw_client); ib_unregister_client(&rds_iw_client);
rds_iw_sysctl_exit(); rds_iw_sysctl_exit();
rds_iw_recv_exit(); rds_iw_recv_exit();
......
...@@ -294,9 +294,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, ...@@ -294,9 +294,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
/* ib_rdma.c */ /* ib_rdma.c */
int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
void rds_iw_remove_nodev_conns(void); void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
static inline void rds_iw_destroy_nodev_conns(void)
{
__rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
}
static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
{
__rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
}
struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
......
...@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even ...@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
if (err) if (err)
printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
err = rds_iw_add_conn(rds_iwdev, conn); rds_iw_add_conn(rds_iwdev, conn);
if (err)
printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
/* If the peer gave us the last packet it saw, process this as if /* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */ * we had received a regular ACK. */
...@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) ...@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
* Move connection back to the nodev list. * Move connection back to the nodev list.
* Remove cm_id from the device cm_id list. * Remove cm_id from the device cm_id list.
*/ */
if (ic->rds_iwdev) { if (ic->rds_iwdev)
rds_iw_remove_conn(ic->rds_iwdev, conn);
spin_lock_irq(&ic->rds_iwdev->spinlock);
BUG_ON(list_empty(&ic->iw_node));
list_del(&ic->iw_node);
spin_unlock_irq(&ic->rds_iwdev->spinlock);
spin_lock_irq(&iw_nodev_conns_lock);
list_add_tail(&ic->iw_node, &iw_nodev_conns);
spin_unlock_irq(&iw_nodev_conns_lock);
rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
ic->rds_iwdev = NULL;
}
rdma_destroy_id(ic->i_cm_id); rdma_destroy_id(ic->i_cm_id);
...@@ -726,11 +713,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) ...@@ -726,11 +713,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
return 0; return 0;
} }
/*
* Free a connection. Connection must be shut down and not set for reconnect.
*/
void rds_iw_conn_free(void *arg) void rds_iw_conn_free(void *arg)
{ {
struct rds_iw_connection *ic = arg; struct rds_iw_connection *ic = arg;
spinlock_t *lock_ptr;
rdsdebug("ic %p\n", ic); rdsdebug("ic %p\n", ic);
/*
* Conn is either on a dev's list or on the nodev list.
* A race with shutdown() or connect() would cause problems
* (since rds_iwdev would change) but that should never happen.
*/
lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
spin_lock_irq(lock_ptr);
list_del(&ic->iw_node); list_del(&ic->iw_node);
spin_unlock_irq(lock_ptr);
kfree(ic); kfree(ic);
} }
......
...@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i ...@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
return rds_iw_add_cm_id(rds_iwdev, cm_id); return rds_iw_add_cm_id(rds_iwdev, cm_id);
} }
int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
{ {
struct rds_iw_connection *ic = conn->c_transport_data; struct rds_iw_connection *ic = conn->c_transport_data;
...@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn ...@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
BUG_ON(list_empty(&iw_nodev_conns)); BUG_ON(list_empty(&iw_nodev_conns));
BUG_ON(list_empty(&ic->iw_node)); BUG_ON(list_empty(&ic->iw_node));
list_del(&ic->iw_node); list_del(&ic->iw_node);
spin_unlock_irq(&iw_nodev_conns_lock);
spin_lock_irq(&rds_iwdev->spinlock); spin_lock_irq(&rds_iwdev->spinlock);
list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
spin_unlock_irq(&rds_iwdev->spinlock); spin_unlock_irq(&rds_iwdev->spinlock);
spin_unlock_irq(&iw_nodev_conns_lock);
ic->rds_iwdev = rds_iwdev; ic->rds_iwdev = rds_iwdev;
return 0;
} }
void rds_iw_remove_nodev_conns(void) void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
{ {
struct rds_iw_connection *ic, *_ic; struct rds_iw_connection *ic = conn->c_transport_data;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */ /* place conn on nodev_conns_list */
spin_lock_irq(&iw_nodev_conns_lock); spin_lock(&iw_nodev_conns_lock);
list_splice(&iw_nodev_conns, &tmp_list);
INIT_LIST_HEAD(&iw_nodev_conns);
spin_unlock_irq(&iw_nodev_conns_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { spin_lock_irq(&rds_iwdev->spinlock);
if (ic->conn->c_passive) BUG_ON(list_empty(&ic->iw_node));
rds_conn_destroy(ic->conn->c_passive); list_del(&ic->iw_node);
rds_conn_destroy(ic->conn); spin_unlock_irq(&rds_iwdev->spinlock);
}
list_add_tail(&ic->iw_node, &iw_nodev_conns);
spin_unlock(&iw_nodev_conns_lock);
rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
ic->rds_iwdev = NULL;
} }
void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
{ {
struct rds_iw_connection *ic, *_ic; struct rds_iw_connection *ic, *_ic;
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */ /* avoid calling conn_destroy with irqs off */
spin_lock_irq(&rds_iwdev->spinlock); spin_lock_irq(list_lock);
list_splice(&rds_iwdev->conn_list, &tmp_list); list_splice(list, &tmp_list);
INIT_LIST_HEAD(&rds_iwdev->conn_list); INIT_LIST_HEAD(list);
spin_unlock_irq(&rds_iwdev->spinlock); spin_unlock_irq(list_lock);
list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
if (ic->conn->c_passive) if (ic->conn->c_passive)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment