Commit 86f767e6 authored by Lang Cheng's avatar Lang Cheng Committed by Jason Gunthorpe

RDMA/hns: Replace wmb&__raw_writeq with writeq

Currently, the driver updates doorbell looks like this:

post()
{
	wqe.field = 0x111;
	wmb();
	update_wq_db();
}

update_wq_db()
{
	db.field = 0x222;
	__raw_writeq(db, db_reg);
}

writeq() is a better choice than __raw_writeq() because it calls dma_wmb()
to barrier in ARM64, and dma_wmb() is better than wmb() for ROCEE device.

This patch removes all wmb() before updating doorbell of SQ/RQ/CQ/SRQ by
replacing __raw_writeq() with writeq() to improve performence.  The new
process looks like this:

post()
{
	wqe.field = 0x111;
	update_wq_db();
}

update_wq_db()
{
	db.field = 0x222;
	writeq(db, db_reg);
}

Link: https://lore.kernel.org/r/1612517974-31867-8-git-send-email-liweihang@huawei.comSigned-off-by: default avatarLang Cheng <chenglang@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 3fe07a00
...@@ -1077,7 +1077,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) ...@@ -1077,7 +1077,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{ {
__raw_writeq(*(u64 *) val, dest); writeq(*(u64 *)val, dest);
} }
static inline struct hns_roce_qp static inline struct hns_roce_qp
......
...@@ -330,8 +330,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ...@@ -330,8 +330,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
/* Set DB return */ /* Set DB return */
if (likely(nreq)) { if (likely(nreq)) {
qp->sq.head += nreq; qp->sq.head += nreq;
/* Memory barrier */
wmb();
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
SQ_DOORBELL_U32_4_SQ_HEAD_S, SQ_DOORBELL_U32_4_SQ_HEAD_S,
...@@ -411,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, ...@@ -411,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
out: out:
if (likely(nreq)) { if (likely(nreq)) {
hr_qp->rq.head += nreq; hr_qp->rq.head += nreq;
/* Memory barrier */
wmb();
if (ibqp->qp_type == IB_QPT_GSI) { if (ibqp->qp_type == IB_QPT_GSI) {
__le32 tmp; __le32 tmp;
...@@ -1984,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, ...@@ -1984,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) { if (nfreed) {
hr_cq->cons_index += nfreed; hr_cq->cons_index += nfreed;
/*
* Make sure update of buffer contents is done before
* updating consumer index.
*/
wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
} }
} }
...@@ -2330,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) ...@@ -2330,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
*hr_cq->tptr_addr = hr_cq->cons_index & *hr_cq->tptr_addr = hr_cq->cons_index &
((hr_cq->cq_depth << 1) - 1); ((hr_cq->cq_depth << 1) - 1);
/* Memroy barrier */
wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
} }
...@@ -3220,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, ...@@ -3220,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
* need to hw to flash RQ HEAD by DB again * need to hw to flash RQ HEAD by DB again
*/ */
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
/* Memory barrier */
wmb();
roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
......
...@@ -744,8 +744,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -744,8 +744,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
if (likely(nreq)) { if (likely(nreq)) {
qp->sq.head += nreq; qp->sq.head += nreq;
qp->next_sge = sge_idx; qp->next_sge = sge_idx;
/* Memory barrier */
wmb();
if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 &&
(qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
...@@ -875,8 +873,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, ...@@ -875,8 +873,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
out: out:
if (likely(nreq)) { if (likely(nreq)) {
hr_qp->rq.head += nreq; hr_qp->rq.head += nreq;
/* Memory barrier */
wmb();
/* /*
* Hip08 hardware cannot flush the WQEs in RQ if the QP state * Hip08 hardware cannot flush the WQEs in RQ if the QP state
...@@ -1015,12 +1011,6 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, ...@@ -1015,12 +1011,6 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
} }
if (likely(nreq)) { if (likely(nreq)) {
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
srq_db.byte_4 = srq_db.byte_4 =
cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
(srq->srqn & V2_DB_BYTE_4_TAG_M)); (srq->srqn & V2_DB_BYTE_4_TAG_M));
...@@ -3198,11 +3188,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, ...@@ -3198,11 +3188,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) { if (nfreed) {
hr_cq->cons_index += nfreed; hr_cq->cons_index += nfreed;
/*
* Make sure update of buffer contents is done before
* updating consumer index.
*/
wmb();
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
} }
} }
...@@ -3711,11 +3696,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, ...@@ -3711,11 +3696,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
break; break;
} }
if (npolled) { if (npolled)
/* Memory barrier */
wmb();
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
}
out: out:
spin_unlock_irqrestore(&hr_cq->lock, flags); spin_unlock_irqrestore(&hr_cq->lock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment