Commit 287f329e authored by Yamin Friedman's avatar Yamin Friedman Committed by Christoph Hellwig

nvme-rdma: use new shared CQ mechanism

Has the driver use shared CQs providing ~10%-20% improvement as seen in
the patch introducing shared CQs. Instead of opening a CQ for each QP
per controller connected, a CQ for each QP will be provided by the RDMA
core driver that will be shared between the QPs on that core reducing
interrupt overhead.
Signed-off-by: default avatarYamin Friedman <yaminf@mellanox.com>
Signed-off-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent df4f9bc4
...@@ -96,6 +96,7 @@ struct nvme_rdma_queue { ...@@ -96,6 +96,7 @@ struct nvme_rdma_queue {
int cm_error; int cm_error;
struct completion cm_done; struct completion cm_done;
bool pi_support; bool pi_support;
int cq_size;
}; };
struct nvme_rdma_ctrl { struct nvme_rdma_ctrl {
...@@ -275,6 +276,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) ...@@ -275,6 +276,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.recv_cq = queue->ib_cq; init_attr.recv_cq = queue->ib_cq;
if (queue->pi_support) if (queue->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
init_attr.qp_context = queue;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
...@@ -409,6 +411,14 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ...@@ -409,6 +411,14 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
return NULL; return NULL;
} }
static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
{
if (nvme_rdma_poll_queue(queue))
ib_free_cq(queue->ib_cq);
else
ib_cq_pool_put(queue->ib_cq, queue->cq_size);
}
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{ {
struct nvme_rdma_device *dev; struct nvme_rdma_device *dev;
...@@ -430,7 +440,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) ...@@ -430,7 +440,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
* the destruction of the QP shouldn't use rdma_cm API. * the destruction of the QP shouldn't use rdma_cm API.
*/ */
ib_destroy_qp(queue->qp); ib_destroy_qp(queue->qp);
ib_free_cq(queue->ib_cq); nvme_rdma_free_cq(queue);
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE); sizeof(struct nvme_completion), DMA_FROM_DEVICE);
...@@ -450,13 +460,42 @@ static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) ...@@ -450,13 +460,42 @@ static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
} }
static int nvme_rdma_create_cq(struct ib_device *ibdev,
struct nvme_rdma_queue *queue)
{
int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
enum ib_poll_context poll_ctx;
/*
* Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0.
*/
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue)) {
poll_ctx = IB_POLL_DIRECT;
queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
comp_vector, poll_ctx);
} else {
poll_ctx = IB_POLL_SOFTIRQ;
queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
comp_vector, poll_ctx);
}
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
return ret;
}
return 0;
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{ {
struct ib_device *ibdev; struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */ const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */ const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
enum ib_poll_context poll_ctx;
int ret, pages_per_mr; int ret, pages_per_mr;
queue->device = nvme_rdma_find_get_device(queue->cm_id); queue->device = nvme_rdma_find_get_device(queue->cm_id);
...@@ -467,26 +506,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -467,26 +506,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
} }
ibdev = queue->device->dev; ibdev = queue->device->dev;
/*
* Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0.
*/
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue))
poll_ctx = IB_POLL_DIRECT;
else
poll_ctx = IB_POLL_SOFTIRQ;
/* +1 for ib_stop_cq */ /* +1 for ib_stop_cq */
queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size = cq_factor * queue->queue_size + 1;
cq_factor * queue->queue_size + 1,
comp_vector, poll_ctx); ret = nvme_rdma_create_cq(ibdev, queue);
if (IS_ERR(queue->ib_cq)) { if (ret)
ret = PTR_ERR(queue->ib_cq);
goto out_put_dev; goto out_put_dev;
}
ret = nvme_rdma_create_qp(queue, send_wr_factor); ret = nvme_rdma_create_qp(queue, send_wr_factor);
if (ret) if (ret)
...@@ -512,7 +537,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -512,7 +537,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
if (ret) { if (ret) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n", "failed to initialize MR pool sized %d for QID %d\n",
queue->queue_size, idx); queue->queue_size, nvme_rdma_queue_idx(queue));
goto out_destroy_ring; goto out_destroy_ring;
} }
...@@ -523,7 +548,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -523,7 +548,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
if (ret) { if (ret) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"failed to initialize PI MR pool sized %d for QID %d\n", "failed to initialize PI MR pool sized %d for QID %d\n",
queue->queue_size, idx); queue->queue_size, nvme_rdma_queue_idx(queue));
goto out_destroy_mr_pool; goto out_destroy_mr_pool;
} }
} }
...@@ -540,7 +565,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -540,7 +565,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
out_destroy_qp: out_destroy_qp:
rdma_destroy_qp(queue->cm_id); rdma_destroy_qp(queue->cm_id);
out_destroy_ib_cq: out_destroy_ib_cq:
ib_free_cq(queue->ib_cq); nvme_rdma_free_cq(queue);
out_put_dev: out_put_dev:
nvme_rdma_dev_put(queue->device); nvme_rdma_dev_put(queue->device);
return ret; return ret;
...@@ -1163,7 +1188,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req) ...@@ -1163,7 +1188,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
const char *op) const char *op)
{ {
struct nvme_rdma_queue *queue = cq->cq_context; struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_ctrl *ctrl = queue->ctrl;
if (ctrl->ctrl.state == NVME_CTRL_LIVE) if (ctrl->ctrl.state == NVME_CTRL_LIVE)
...@@ -1706,7 +1731,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -1706,7 +1731,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct nvme_rdma_qe *qe = struct nvme_rdma_qe *qe =
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
struct nvme_rdma_queue *queue = cq->cq_context; struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct ib_device *ibdev = queue->device->dev; struct ib_device *ibdev = queue->device->dev;
struct nvme_completion *cqe = qe->data; struct nvme_completion *cqe = qe->data;
const size_t len = sizeof(struct nvme_completion); const size_t len = sizeof(struct nvme_completion);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment