Commit 3ba8c625 authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-race-fixes'

Wen Gu says:

====================
net/smc: Fixes for race in smc link group termination

We encountered some crashes recently and they are caused by the
race between the access and free of link/link group in abnormal
smc link group termination. The crashes can be reproduced in
frequent abnormal link group termination, like setting RNICs up/down.

This set of patches tries to fix this by extending the life cycle
of link/link group to ensure that they won't be referred to after
cleared or freed.

v1 -> v2:
- Improve some comments.

- Move codes of waking up lgrs_deleted wait queue from smc_lgr_free()
  to __smc_lgr_free().

- Move codes of waking up links_deleted wait queue from smcr_link_clear()
  to __smcr_link_clear().

- Move codes of smc_ibdev_cnt_dec() and put_device() from smcr_link_clear()
  to __smcr_link_clear()

- Move smc_lgr_put() to the end of __smcr_link_clear().

- Call smc_lgr_put() after 'out' tag in smcr_link_init() when link
  initialization fails.

- Modify the location where smc connection holds the lgr or link.

    before:
      * hold lgr in smc_lgr_register_conn().
      * hold link in smcr_lgr_conn_assign_link().
    after:
      * hold both lgr and link in smc_conn_create().

  Modify the location to symmetrical with the place where smc connections
  put the lgr or link, which is smc_conn_free().

- Initialize conn->freed as zero in smc_conn_create().
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents de0e4447 61f434b0
......@@ -221,6 +221,7 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
u8 freed : 1; /* normal termiation */
u8 out_of_sync : 1; /* out of sync with peer */
};
......
......@@ -218,7 +218,6 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
conn->lgr = NULL;
}
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
......@@ -752,6 +751,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
lnk->link_idx = link_idx;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
......@@ -806,6 +806,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
smc_lgr_put(lgr); /* lgr_hold above */
return rc;
}
......@@ -844,6 +845,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->terminating = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
......@@ -1130,8 +1132,19 @@ void smc_conn_free(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!lgr)
if (!lgr || conn->freed)
/* Connection has never been registered in a
* link group, or has already been freed.
*/
return;
conn->freed = 1;
if (!conn->alert_token_local)
/* Connection has already unregistered from
* link group.
*/
goto lgr_put;
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
......@@ -1148,6 +1161,8 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
lgr_put:
smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
}
/* unregister a link from a buf_desc */
......@@ -1206,9 +1221,10 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_clear(struct smc_link *lnk, bool log)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_ib_device *smcibdev;
if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
if (!lgr || lnk->state == SMC_LNK_UNUSED)
return;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk, log);
......@@ -1226,6 +1242,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
......@@ -1290,6 +1307,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
__smc_lgr_free_bufs(lgr, true);
}
/* won't be freed until no one accesses to lgr anymore */
static void __smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
kfree(lgr);
}
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
......@@ -1305,19 +1337,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
smc_llc_lgr_clear(lgr);
}
smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
kfree(lgr);
smc_lgr_put(lgr); /* theoretically last lgr_put */
}
void smc_lgr_hold(struct smc_link_group *lgr)
{
refcount_inc(&lgr->refcnt);
}
void smc_lgr_put(struct smc_link_group *lgr)
{
if (refcount_dec_and_test(&lgr->refcnt))
__smc_lgr_free(lgr);
}
static void smc_sk_wake_ups(struct smc_sock *smc)
......@@ -1856,6 +1892,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
goto out;
}
}
smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
conn->freed = 0;
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
......
......@@ -249,6 +249,7 @@ struct smc_link_group {
u8 terminating : 1;/* lgr is terminating */
u8 freeing : 1; /* lgr is being freed */
refcount_t refcnt; /* lgr reference count */
bool is_smcd; /* SMC-R or SMC-D */
u8 smc_version;
u8 negotiated_eid[SMC_MAX_EID_LEN];
......@@ -487,6 +488,8 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_cleanup_early(struct smc_link_group *lgr);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
void smc_lgr_hold(struct smc_link_group *lgr);
void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment