Commit 384f8818 authored by Lijun Ou's avatar Lijun Ou Committed by Jason Gunthorpe

RDMA/hns: Add atomic support

This patch adds atomic operations for hip08, includes fetchadd and cmpswap
operation.  In order to enable atomic, the driver needs to do the
following steps:

1. Enable the atomic caps for RoCE device
2. Post the wqe context of atomic type
3. Configure the atomic type of mtpt
Signed-off-by: default avatarLijun Ou <oulijun@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent b9c1ea40
...@@ -193,6 +193,7 @@ enum { ...@@ -193,6 +193,7 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
}; };
enum hns_roce_mtt_type { enum hns_roce_mtt_type {
......
...@@ -54,6 +54,18 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, ...@@ -54,6 +54,18 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length); dseg->len = cpu_to_le32(sg->length);
} }
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
const struct ib_atomic_wr *wr)
{
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
aseg->cmp_data = cpu_to_le64(wr->compare_add);
} else {
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
aseg->cmp_data = 0;
}
}
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind) unsigned int *sge_ind)
{ {
...@@ -179,6 +191,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -179,6 +191,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp); struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct hns_roce_v2_wqe_data_seg *dseg;
struct device *dev = hr_dev->dev; struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db; struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr; struct ib_qp_attr attr;
...@@ -407,6 +420,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -407,6 +420,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_bit(rc_sq_wqe->byte_4, roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) { switch (wr->opcode) {
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ; hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
...@@ -443,9 +457,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -443,9 +457,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
break; break;
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP; hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
rc_sq_wqe->rkey =
cpu_to_le32(atomic_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le32(atomic_wr(wr)->remote_addr);
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
set_atomic_seg(wqe, atomic_wr(wr));
break; break;
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD; hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
rc_sq_wqe->rkey =
cpu_to_le32(atomic_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le32(atomic_wr(wr)->remote_addr);
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
set_atomic_seg(wqe, atomic_wr(wr));
break; break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
hr_op = hr_op =
...@@ -463,7 +489,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -463,7 +489,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(rc_sq_wqe->byte_4, roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op); V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
wqe += sizeof(struct hns_roce_v2_rc_send_wqe); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
dseg =
wqe - sizeof(struct hns_roce_v2_wqe_data_seg);
else
dseg = wqe;
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
&sge_ind, bad_wr); &sge_ind, bad_wr);
...@@ -1232,6 +1263,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) ...@@ -1232,6 +1263,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0; caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096; caps->max_mtu = IB_MTU_4096;
if (hr_dev->pci_dev->revision == 0x21)
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
ret = hns_roce_v2_set_bt(hr_dev); ret = hns_roce_v2_set_bt(hr_dev);
if (ret) if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
...@@ -1663,7 +1697,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, ...@@ -1663,7 +1697,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
......
...@@ -1564,4 +1564,9 @@ struct hns_roce_eq_context { ...@@ -1564,4 +1564,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
__le64 cmp_data;
};
#endif #endif
...@@ -215,7 +215,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, ...@@ -215,7 +215,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pd = hr_dev->caps.num_pds; props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
props->atomic_cap = IB_ATOMIC_NONE; props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1; props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment