Commit 0457e515 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Some medium sized bugs in the various drivers. A couple are more
  recent regressions:

   - Fix two panics in hfi1 and two allocation problems

   - Send the IGMP to the correct address in cma

   - Squash a syzkaller bug related to races reading the multicast list

   - Memory leak in siw and cm

   - Fix a corner case spec compliance for HFI/QIB

   - Correct the implementation of fences in siw

   - Error unwind bug in mlx4"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/mlx4: Don't continue event handler after memory allocation failure
  RDMA/siw: Fix broken RDMA Read Fence/Resume logic.
  IB/rdmavt: Validate remote_addr during loopback atomic tests
  IB/cm: Release previously acquired reference counter in the cm_id_priv
  RDMA/siw: Fix refcounting leak in siw_create_qp()
  RDMA/ucma: Protect mc during concurrent multicast leaves
  RDMA/cma: Use correct address when leaving multicast group
  IB/hfi1: Fix tstats alloc and dealloc
  IB/hfi1: Fix AIP early init panic
  IB/hfi1: Fix alloc failure with larger txqueuelen
  IB/hfi1: Fix panic with larger ipoib send_queue_size
parents fc93310c f3136c4c
......@@ -3322,7 +3322,7 @@ static int cm_lap_handler(struct cm_work *work)
ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
if (ret) {
rdma_destroy_ah_attr(&ah_attr);
return -EINVAL;
goto deref;
}
spin_lock_irq(&cm_id_priv->lock);
......
......@@ -67,8 +67,8 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
union ib_gid *mgid);
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
enum ib_gid_type gid_type);
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
......@@ -1846,17 +1846,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
if (ndev) {
if (ndev && !send_only) {
enum ib_gid_type gid_type;
union ib_gid mgid;
cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
&mgid);
if (!send_only)
cma_igmp_send(ndev, &mgid, false);
dev_put(ndev);
gid_type = id_priv->cma_dev->default_gid_type
[id_priv->id.port_num -
rdma_start_port(
id_priv->cma_dev->device)];
cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
gid_type);
cma_igmp_send(ndev, &mgid, false);
}
dev_put(ndev);
cancel_work_sync(&mc->iboe_join.work);
}
......
......@@ -95,6 +95,7 @@ struct ucma_context {
u64 uid;
struct list_head list;
struct list_head mc_list;
struct work_struct close_work;
};
......@@ -105,6 +106,7 @@ struct ucma_multicast {
u64 uid;
u8 join_state;
struct list_head list;
struct sockaddr_storage addr;
};
......@@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_WORK(&ctx->close_work, ucma_close_id);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
/* So list_del() will work if we don't do ucma_finish_ctx() */
INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
......@@ -484,19 +487,19 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc;
unsigned long index;
struct ucma_multicast *mc, *tmp;
xa_for_each(&multicast_table, index, mc) {
if (mc->ctx != ctx)
continue;
xa_lock(&multicast_table);
list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
list_del(&mc->list);
/*
* At this point mc->ctx->ref is 0 so the mc cannot leave the
* lock on the reader and this is enough serialization
*/
xa_erase(&multicast_table, index);
__xa_erase(&multicast_table, mc->id);
kfree(mc);
}
xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
......@@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file,
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
xa_lock(&multicast_table);
if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
GFP_KERNEL)) {
ret = -ENOMEM;
goto err_free_mc;
}
list_add_tail(&mc->list, &ctx->mc_list);
xa_unlock(&multicast_table);
mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
......@@ -1500,8 +1507,11 @@ static ssize_t ucma_process_join(struct ucma_file *file,
mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
err_xa_erase:
xa_erase(&multicast_table, mc->id);
xa_lock(&multicast_table);
list_del(&mc->list);
__xa_erase(&multicast_table, mc->id);
err_free_mc:
xa_unlock(&multicast_table);
kfree(mc);
err_put_ctx:
ucma_put_ctx(ctx);
......@@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = ERR_PTR(-EINVAL);
else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
else
__xa_erase(&multicast_table, mc->id);
xa_unlock(&multicast_table);
if (IS_ERR(mc)) {
xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
list_del(&mc->list);
__xa_erase(&multicast_table, mc->id);
xa_unlock(&multicast_table);
mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
mutex_unlock(&mc->ctx->mutex);
......
......@@ -55,7 +55,7 @@ union hfi1_ipoib_flow {
*/
struct ipoib_txreq {
struct sdma_txreq txreq;
struct hfi1_sdma_header sdma_hdr;
struct hfi1_sdma_header *sdma_hdr;
int sdma_status;
int complete;
struct hfi1_ipoib_dev_priv *priv;
......
......@@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
int ret;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
ret = priv->netdev_ops->ndo_init(dev);
if (ret)
return ret;
goto out_ret;
ret = hfi1_netdev_add_data(priv->dd,
qpn_from_mac(priv->netdev->dev_addr),
dev);
if (ret < 0) {
priv->netdev_ops->ndo_uninit(dev);
return ret;
goto out_ret;
}
return 0;
out_ret:
free_percpu(dev->tstats);
dev->tstats = NULL;
return ret;
}
static void hfi1_ipoib_dev_uninit(struct net_device *dev)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
free_percpu(dev->tstats);
dev->tstats = NULL;
hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
priv->netdev_ops->ndo_uninit(dev);
......@@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
hfi1_ipoib_rxq_deinit(priv->netdev);
free_percpu(dev->tstats);
}
static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
{
hfi1_ipoib_netdev_dtor(dev);
free_netdev(dev);
dev->tstats = NULL;
}
static void hfi1_ipoib_set_id(struct net_device *dev, int id)
......@@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
priv->port_num = port_num;
priv->netdev_ops = netdev->netdev_ops;
netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
rc = hfi1_ipoib_txreq_init(priv);
if (rc) {
dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
hfi1_ipoib_free_rdma_netdev(netdev);
return rc;
}
rc = hfi1_ipoib_rxq_init(netdev);
if (rc) {
dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
hfi1_ipoib_free_rdma_netdev(netdev);
hfi1_ipoib_txreq_deinit(priv);
return rc;
}
netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
netdev->needs_free_netdev = true;
......
......@@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
dd_dev_warn(priv->dd,
"%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
__func__, tx->sdma_status,
le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
tx->txq->sde->this_idx);
}
......@@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
{
struct hfi1_devdata *dd = txp->dd;
struct sdma_txreq *txreq = &tx->txreq;
struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
u16 pkt_bytes =
sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
int ret;
......@@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
struct ipoib_txparms *txp)
{
struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
struct sk_buff *skb = tx->skb;
struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
struct rdma_ah_attr *ah_attr = txp->ah_attr;
......@@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
if (likely(!ret)) {
tx_ok:
trace_sdma_output_ibhdr(txq->priv->dd,
&tx->sdma_hdr.hdr,
&tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
hfi1_ipoib_check_queue_depth(txq);
return NETDEV_TX_OK;
......@@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
hfi1_ipoib_check_queue_depth(txq);
trace_sdma_output_ibhdr(txq->priv->dd,
&tx->sdma_hdr.hdr,
&tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
if (!netdev_xmit_more())
......@@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
{
struct net_device *dev = priv->netdev;
u32 tx_ring_size, tx_item_size;
int i;
struct hfi1_ipoib_circ_buf *tx_ring;
int i, j;
/*
* Ring holds 1 less than tx_ring_size
......@@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
struct ipoib_txreq *tx;
tx_ring = &txq->tx_ring;
iowait_init(&txq->wait,
0,
hfi1_ipoib_flush_txq,
......@@ -725,14 +728,19 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
priv->dd->node);
txq->tx_ring.items =
kcalloc_node(tx_ring_size, tx_item_size,
GFP_KERNEL, priv->dd->node);
kvzalloc_node(array_size(tx_ring_size, tx_item_size),
GFP_KERNEL, priv->dd->node);
if (!txq->tx_ring.items)
goto free_txqs;
txq->tx_ring.max_items = tx_ring_size;
txq->tx_ring.shift = ilog2(tx_ring_size);
txq->tx_ring.shift = ilog2(tx_item_size);
txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
tx_ring = &txq->tx_ring;
for (j = 0; j < tx_ring_size; j++)
hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
kzalloc_node(sizeof(*tx->sdma_hdr),
GFP_KERNEL, priv->dd->node);
netif_tx_napi_add(dev, &txq->napi,
hfi1_ipoib_poll_tx_ring,
......@@ -746,7 +754,10 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
netif_napi_del(&txq->napi);
kfree(txq->tx_ring.items);
tx_ring = &txq->tx_ring;
for (j = 0; j < tx_ring_size; j++)
kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
kvfree(tx_ring->items);
}
kfree(priv->txqs);
......@@ -780,17 +791,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
{
int i;
int i, j;
for (i = 0; i < priv->netdev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
iowait_cancel_work(&txq->wait);
iowait_sdma_drain(&txq->wait);
hfi1_ipoib_drain_tx_list(txq);
netif_napi_del(&txq->napi);
hfi1_ipoib_drain_tx_ring(txq);
kfree(txq->tx_ring.items);
for (j = 0; j < tx_ring->max_items; j++)
kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
kvfree(tx_ring->items);
}
kfree(priv->txqs);
......
......@@ -3237,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
break;
return;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
......
......@@ -3073,6 +3073,8 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
......
......@@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
return &qp->orq[qp->orq_get % qp->attrs.orq_size];
}
static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
{
return &qp->orq[qp->orq_put % qp->attrs.orq_size];
}
static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
struct siw_sqe *orq_e = orq_get_tail(qp);
struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
......
......@@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
spin_lock_irqsave(&qp->orq_lock, flags);
rreq = orq_get_current(qp);
/* free current orq entry */
rreq = orq_get_current(qp);
WRITE_ONCE(rreq->flags, 0);
qp->orq_get++;
if (qp->tx_ctx.orq_fence) {
if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
......@@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
rv = -EPROTO;
goto out;
}
/* resume SQ processing */
/* resume SQ processing, if possible */
if (tx_waiting->sqe.opcode == SIW_OP_READ ||
tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
rreq = orq_get_tail(qp);
/* SQ processing was stopped because of a full ORQ */
rreq = orq_get_free(qp);
if (unlikely(!rreq)) {
pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
rv = -EPROTO;
......@@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
resume_tx = 1;
} else if (siw_orq_empty(qp)) {
/*
* SQ processing was stopped by fenced work request.
* Resume since all previous Read's are now completed.
*/
qp->tx_ctx.orq_fence = 0;
resume_tx = 1;
} else {
pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
qp_id(qp), qp->orq_get, qp->orq_put);
rv = -EPROTO;
}
}
qp->orq_get++;
out:
spin_unlock_irqrestore(&qp->orq_lock, flags);
......
......@@ -313,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
siw_dbg(base_dev, "too many QP's\n");
return -ENOMEM;
rv = -ENOMEM;
goto err_atomic;
}
if (attrs->qp_type != IB_QPT_RC) {
siw_dbg(base_dev, "only RC QP's supported\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment