Commit 6ff64d25 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Nothing profound here, just a last set of long standing bug fixes:

   - Incorrect error unwind in qib and pvrdma

   - User triggerable NULL pointer crash in mlx5 with ODP prefetch

   - syzkaller RCU race in uverbs

   - Rare double free crash in ipoib"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode
  RDMA/core: Fix double destruction of uobject
  RDMA/pvrdma: Fix missing pci disable in pvrdma_pci_probe()
  RDMA/mlx5: Fix NULL pointer dereference in destroy_prefetch_work
  IB/qib: Call kobject_put() when kobject_init_and_add() fails
parents 411ea679 1acba6a8
......@@ -153,9 +153,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->context = NULL;
/*
* For DESTROY the usecnt is held write locked, the caller is expected
* to put it unlock and put the object when done with it. Only DESTROY
* can remove the IDR handle.
* For DESTROY the usecnt is not changed, the caller is expected to
* manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
* handle.
*/
if (reason != RDMA_REMOVE_DESTROY)
atomic_set(&uobj->usecnt, 0);
......@@ -187,7 +187,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
/*
* This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
* sequence. It should only be used from command callbacks. On success the
* caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
* caller must pair this with uobj_put_destroy(). This
* version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref.
*/
......@@ -198,6 +198,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
down_read(&ufile->hw_destroy_rwsem);
/*
* Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
* write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
* This is because any other concurrent thread can still see the object
* in the xarray due to RCU. Leaving it locked ensures nothing else will
* touch it.
*/
ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
if (ret)
goto out_unlock;
......@@ -216,7 +223,7 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
/*
* uobj_get_destroy destroys the HW object and returns a handle to the uobj
* with a NULL object pointer. The caller must pair this with
* uverbs_put_destroy.
* uobj_put_destroy().
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id, struct uverbs_attr_bundle *attrs)
......@@ -250,8 +257,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
uobj_put_destroy(uobj);
return 0;
}
......
......@@ -1439,6 +1439,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (is_odp_mr(mr)) {
to_ib_umem_odp(mr->umem)->private = mr;
init_waitqueue_head(&mr->q_deferred_work);
atomic_set(&mr->num_deferred_work, 0);
err = xa_err(xa_store(&dev->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
......
......@@ -760,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping linkcontrol sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail;
goto bail_link;
}
kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
......@@ -770,7 +770,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping sl2vl sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_link;
goto bail_sl;
}
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
......@@ -780,7 +780,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping diag_counters sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_sl;
goto bail_diagc;
}
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
......@@ -793,7 +793,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_diagc;
goto bail_cc;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
......@@ -854,6 +854,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
&cc_table_bin_attr);
kobject_put(&ppd->pport_cc_kobj);
}
kobject_put(&ppd->diagc_kobj);
kobject_put(&ppd->sl2vl_kobj);
kobject_put(&ppd->pport_kobj);
}
......
......@@ -829,7 +829,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
ret = -ENOMEM;
goto err_free_device;
goto err_disable_pdev;
}
ret = pci_request_regions(pdev, DRV_NAME);
......
......@@ -377,8 +377,12 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring;
/* cyclic ring variables for managing tx_ring, for UD only */
unsigned int tx_head;
unsigned int tx_tail;
/* cyclic ring variables for counting overall outstanding send WRs */
unsigned int global_tx_head;
unsigned int global_tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_ud_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE];
......
......@@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return;
}
if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) {
if ((priv->global_tx_head - priv->global_tx_tail) ==
ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
netif_stop_queue(dev);
......@@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
} else {
netif_trans_update(dev);
++tx->tx_head;
++priv->tx_head;
++priv->global_tx_head;
}
}
......@@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_tx_lock(dev);
++tx->tx_tail;
++priv->tx_tail;
++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) &&
(priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 &&
((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev);
......@@ -1232,8 +1234,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
dev_kfree_skb_any(tx_req->skb);
netif_tx_lock_bh(p->dev);
++p->tx_tail;
++priv->tx_tail;
if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) &&
++priv->global_tx_tail;
if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
......
......@@ -407,9 +407,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) &&
((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) &&
((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev);
......@@ -634,7 +636,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
else
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
/* increase the tx_head after send success, but use it for queue state */
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) {
if ((priv->global_tx_head - priv->global_tx_tail) ==
ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
}
......@@ -662,6 +665,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
rc = priv->tx_head;
++priv->tx_head;
++priv->global_tx_head;
}
return rc;
}
......@@ -807,6 +811,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
++priv->global_tx_tail;
}
for (i = 0; i < ipoib_recvq_size; ++i) {
......
......@@ -1184,9 +1184,11 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
netif_queue_stopped(dev),
priv->tx_head, priv->tx_tail);
ipoib_warn(priv,
"queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
priv->global_tx_head, priv->global_tx_tail);
/* XXX reset QP, etc. */
}
......@@ -1701,7 +1703,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
/* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */
if (ipoib_transport_dev_init(dev, priv->ca)) {
pr_warn("%s: ipoib_transport_dev_init failed\n",
......
......@@ -88,7 +88,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
static inline void uobj_put_destroy(struct ib_uobject *uobj)
{
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
}
static inline void uobj_put_read(struct ib_uobject *uobj)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment