Commit f794809a authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Jason Gunthorpe

IB/core: Add an unbound WQ type to the new CQ API

The upstream kernel commit cited below modified the workqueue in the
new CQ API to be bound to a specific CPU (instead of being unbound).
This caused ALL users of the new CQ API to use the same bound WQ.

Specifically, MAD handling was severely delayed when the CPU bound
to the WQ was busy handling (higher priority) interrupts.

This caused a delay in the MAD "heartbeat" response handling,
which resulted in ports being incorrectly classified as "down".

To fix this, add a new "unbound" WQ type to the new CQ API, so that users
have the option to choose either a bound WQ or an unbound WQ.

For MADs, choose the new "unbound" WQ.

Fixes: b7363e67 ("IB/device: Convert ib-comp-wq to be CPU-bound")
Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.m>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 08920b8f
...@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work) ...@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
IB_POLL_BATCH); IB_POLL_BATCH);
if (completed >= IB_POLL_BUDGET_WORKQUEUE || if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work); queue_work(cq->comp_wq, &cq->work);
} }
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{ {
queue_work(ib_comp_wq, &cq->work); queue_work(cq->comp_wq, &cq->work);
} }
/** /**
...@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, ...@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break; break;
case IB_POLL_WORKQUEUE: case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue; cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work); INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
ib_comp_wq : ib_comp_unbound_wq;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq) ...@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop); irq_poll_disable(&cq->iop);
break; break;
case IB_POLL_WORKQUEUE: case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cancel_work_sync(&cq->work); cancel_work_sync(&cq->work);
break; break;
default: default:
......
...@@ -61,6 +61,7 @@ struct ib_client_data { ...@@ -61,6 +61,7 @@ struct ib_client_data {
}; };
struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq; struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq); EXPORT_SYMBOL_GPL(ib_wq);
...@@ -1166,10 +1167,19 @@ static int __init ib_core_init(void) ...@@ -1166,10 +1167,19 @@ static int __init ib_core_init(void)
goto err; goto err;
} }
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_unbound_wq) {
ret = -ENOMEM;
goto err_comp;
}
ret = class_register(&ib_class); ret = class_register(&ib_class);
if (ret) { if (ret) {
pr_warn("Couldn't create InfiniBand device class\n"); pr_warn("Couldn't create InfiniBand device class\n");
goto err_comp; goto err_comp_unbound;
} }
ret = rdma_nl_init(); ret = rdma_nl_init();
...@@ -1218,6 +1228,8 @@ static int __init ib_core_init(void) ...@@ -1218,6 +1228,8 @@ static int __init ib_core_init(void)
rdma_nl_exit(); rdma_nl_exit();
err_sysfs: err_sysfs:
class_unregister(&ib_class); class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp: err_comp:
destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq);
err: err:
...@@ -1236,6 +1248,7 @@ static void __exit ib_core_cleanup(void) ...@@ -1236,6 +1248,7 @@ static void __exit ib_core_cleanup(void)
addr_cleanup(); addr_cleanup();
rdma_nl_exit(); rdma_nl_exit();
class_unregister(&ib_class); class_unregister(&ib_class);
destroy_workqueue(ib_comp_unbound_wq);
destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */ /* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq); destroy_workqueue(ib_wq);
......
...@@ -3183,7 +3183,7 @@ static int ib_mad_port_open(struct ib_device *device, ...@@ -3183,7 +3183,7 @@ static int ib_mad_port_open(struct ib_device *device,
cq_size *= 2; cq_size *= 2;
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_WORKQUEUE); IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) { if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq); ret = PTR_ERR(port_priv->cq);
......
...@@ -71,6 +71,7 @@ ...@@ -71,6 +71,7 @@
extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_wq;
extern struct workqueue_struct *ib_comp_unbound_wq;
union ib_gid { union ib_gid {
u8 raw[16]; u8 raw[16];
...@@ -1570,9 +1571,10 @@ struct ib_ah { ...@@ -1570,9 +1571,10 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context { enum ib_poll_context {
IB_POLL_DIRECT, /* caller context, no hw completions */ IB_POLL_DIRECT, /* caller context, no hw completions */
IB_POLL_SOFTIRQ, /* poll from softirq context */ IB_POLL_SOFTIRQ, /* poll from softirq context */
IB_POLL_WORKQUEUE, /* poll from workqueue */ IB_POLL_WORKQUEUE, /* poll from workqueue */
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
}; };
struct ib_cq { struct ib_cq {
...@@ -1589,6 +1591,7 @@ struct ib_cq { ...@@ -1589,6 +1591,7 @@ struct ib_cq {
struct irq_poll iop; struct irq_poll iop;
struct work_struct work; struct work_struct work;
}; };
struct workqueue_struct *comp_wq;
/* /*
* Implementation details of the RDMA core, don't use in drivers: * Implementation details of the RDMA core, don't use in drivers:
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment