Commit 54d66387 authored by Xi Wang's avatar Xi Wang Committed by Jason Gunthorpe

RDMA/hns: Optimize WQE buffer size calculating process

Optimize the QP's WQE buffer parameters calculating process to make the
codes more readable mainly by merging calculation of extended sge space of
kernel and userspace. In addition, add some inline functions to simply
codes about multi-hop addressing.

Link: https://lore.kernel.org/r/1588071823-40200-4-git-send-email-liweihang@huawei.comSigned-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 2929c40f
...@@ -1079,6 +1079,8 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) ...@@ -1079,6 +1079,8 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
return buf->page_list[idx].map; return buf->page_list[idx].map;
} }
#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT)
static inline u64 to_hr_hw_page_addr(u64 addr) static inline u64 to_hr_hw_page_addr(u64 addr)
{ {
return addr >> PAGE_ADDR_SHIFT; return addr >> PAGE_ADDR_SHIFT;
...@@ -1089,6 +1091,29 @@ static inline u32 to_hr_hw_page_shift(u32 page_shift) ...@@ -1089,6 +1091,29 @@ static inline u32 to_hr_hw_page_shift(u32 page_shift)
return page_shift - PAGE_ADDR_SHIFT; return page_shift - PAGE_ADDR_SHIFT;
} }
static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
{
if (count > 0)
return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
return 0;
}
static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
{
return hr_hw_page_align(count << buf_shift);
}
static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
{
return hr_hw_page_align(count << buf_shift) >> buf_shift;
}
static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
{
return ilog2(to_hr_hem_entries_count(count, buf_shift));
}
int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
......
...@@ -154,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, ...@@ -154,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind, int valid_num_sge) unsigned int *sge_ind, int valid_num_sge)
{ {
struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_wqe_data_seg *dseg;
struct ib_sge *sg; struct ib_sge *sge = wr->sg_list;
int num_in_wqe = 0; unsigned int idx = *sge_ind;
int extend_sge_num; int cnt = valid_num_sge;
int fi_sge_num;
int se_sge_num;
int shift;
int i;
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; cnt -= HNS_ROCE_SGE_IN_WQE;
extend_sge_num = valid_num_sge - num_in_wqe; sge += HNS_ROCE_SGE_IN_WQE;
sg = wr->sg_list + num_in_wqe; }
shift = qp->mtr.hem_cfg.buf_pg_shift;
/* while (cnt > 0) {
* Check whether wr->num_sge sges are in the same page. If not, we dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
* should calculate how many sges in the first page and the second set_data_seg_v2(dseg, sge);
* page. idx++;
*/ sge++;
dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); cnt--;
fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
(uintptr_t)dseg) /
sizeof(struct hns_roce_v2_wqe_data_seg);
if (extend_sge_num > fi_sge_num) {
se_sge_num = extend_sge_num - fi_sge_num;
for (i = 0; i < fi_sge_num; i++) {
set_data_seg_v2(dseg++, sg + i);
(*sge_ind)++;
}
dseg = hns_roce_get_extend_sge(qp,
(*sge_ind) & (qp->sge.sge_cnt - 1));
for (i = 0; i < se_sge_num; i++) {
set_data_seg_v2(dseg++, sg + fi_sge_num + i);
(*sge_ind)++;
}
} else {
for (i = 0; i < extend_sge_num; i++) {
set_data_seg_v2(dseg++, sg + i);
(*sge_ind)++;
}
} }
*sge_ind = idx;
} }
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
...@@ -232,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, ...@@ -232,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1); 1);
} else { } else {
if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) { for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) { if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i); set_data_seg_v2(dseg, wr->sg_list + i);
...@@ -245,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, ...@@ -245,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1)); (*sge_ind) & (qp->sge.sge_cnt - 1));
for (i = 0; i < wr->num_sge && for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { i++) {
if (likely(wr->sg_list[i].length)) { if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i); set_data_seg_v2(dseg, wr->sg_list + i);
dseg++; dseg++;
...@@ -675,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, ...@@ -675,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
} }
/* rq support inline data */ /* rq support inline data */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { if (hr_qp->rq_inl_buf.wqe_cnt) {
sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
(u32)wr->num_sge; (u32)wr->num_sge;
...@@ -3491,29 +3468,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, ...@@ -3491,29 +3468,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask) struct hns_roce_v2_qp_context *qpc_mask)
{ {
if (hr_qp->ibqp.qp_type == IB_QPT_GSI) roce_set_field(context->byte_4_sqpn_tst,
roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S,
V2_QPC_BYTE_4_SGE_SHIFT_M, to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sge.sge_shift));
ilog2((unsigned int)hr_qp->sge.sge_cnt));
else
roce_set_field(context->byte_4_sqpn_tst,
V2_QPC_BYTE_4_SGE_SHIFT_M,
V2_QPC_BYTE_4_SGE_SHIFT_S,
hr_qp->sq.max_gs >
HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
roce_set_field(context->byte_20_smac_sgid_idx, roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
ilog2((unsigned int)hr_qp->sq.wqe_cnt)); ilog2(hr_qp->sq.wqe_cnt));
roce_set_field(context->byte_20_smac_sgid_idx, roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
(hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || ilog2(hr_qp->rq.wqe_cnt));
hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
hr_qp->ibqp.srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
} }
static void modify_qp_reset_to_init(struct ib_qp *ibqp, static void modify_qp_reset_to_init(struct ib_qp *ibqp,
...@@ -3781,17 +3747,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, ...@@ -3781,17 +3747,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S, V2_QPC_BYTE_12_SQ_HOP_NUM_S,
hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
0 : hr_dev->caps.wqe_sq_hop_num); hr_qp->sq.wqe_cnt));
roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
roce_set_field(context->byte_20_smac_sgid_idx, roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S, V2_QPC_BYTE_20_SGE_HOP_NUM_S,
((ibqp->qp_type == IB_QPT_GSI) || to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? hr_qp->sge.sge_cnt));
hr_dev->caps.wqe_sge_hop_num : 0);
roce_set_field(qpc_mask->byte_20_smac_sgid_idx, roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
...@@ -3799,8 +3764,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, ...@@ -3799,8 +3764,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx, roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S, V2_QPC_BYTE_20_RQ_HOP_NUM_S,
hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
0 : hr_dev->caps.wqe_rq_hop_num); hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx, roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0);
...@@ -3977,7 +3943,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ...@@ -3977,7 +3943,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return -EINVAL; return -EINVAL;
} }
if (hr_qp->sge.offset) { if (hr_qp->sge.sge_cnt > 0) {
page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
hr_qp->sge.offset / page_size, hr_qp->sge.offset / page_size,
...@@ -4011,15 +3977,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ...@@ -4011,15 +3977,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || context->sq_cur_sge_blk_addr =
hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk));
cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0;
roce_set_field(context->byte_184_irrl_idx, roce_set_field(context->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0);
qpc_mask->sq_cur_sge_blk_addr = 0; qpc_mask->sq_cur_sge_blk_addr = 0;
roce_set_field(qpc_mask->byte_184_irrl_idx, roce_set_field(qpc_mask->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
......
...@@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ...@@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR);
} }
static int set_rq_size(struct hns_roce_dev *hr_dev, static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp, int has_rq)
struct hns_roce_qp *hr_qp)
{ {
u32 max_cnt; u32 cnt;
/* If srq exist, set zero for relative number of rq */ /* If srq exist, set zero for relative number of rq */
if (!has_rq) { if (!has_rq) {
hr_qp->rq.wqe_cnt = 0; hr_qp->rq.wqe_cnt = 0;
hr_qp->rq.max_gs = 0; hr_qp->rq.max_gs = 0;
hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = 0; cap->max_recv_wr = 0;
cap->max_recv_sge = 0; cap->max_recv_sge = 0;
...@@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, ...@@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev,
return -EINVAL; return -EINVAL;
} }
max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
if (cnt > hr_dev->caps.max_wqes) {
hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n", ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
cap->max_recv_wr); cap->max_recv_wr);
return -EINVAL; return -EINVAL;
} }
max_cnt = max(1U, cap->max_recv_sge); hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
...@@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, ...@@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev,
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
hr_qp->rq.max_gs); hr_qp->rq.max_gs);
cap->max_recv_wr = hr_qp->rq.wqe_cnt; hr_qp->rq.wqe_cnt = cnt;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
hr_qp->rq_inl_buf.wqe_cnt = cnt;
else
hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt;
cap->max_recv_sge = hr_qp->rq.max_gs; cap->max_recv_sge = hr_qp->rq.max_gs;
return 0; return 0;
} }
static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp,
struct ib_qp_cap *cap)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
u32 cnt;
cnt = max(1U, cap->max_send_sge);
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
hr_qp->sq.max_gs = roundup_pow_of_two(cnt);
hr_qp->sge.sge_cnt = 0;
return 0;
}
hr_qp->sq.max_gs = cnt;
/* UD sqwqe's sge use extend sge */
if (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
hr_qp->ibqp.qp_type == IB_QPT_UD) {
cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs);
} else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) {
cnt = roundup_pow_of_two(sq_wqe_cnt *
(hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE));
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
if (cnt > hr_dev->caps.max_extend_sg) {
ibdev_err(ibdev,
"failed to check exSGE num, exSGE num = %d.\n",
cnt);
return -EINVAL;
}
}
} else {
cnt = 0;
}
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
hr_qp->sge.sge_cnt = cnt;
return 0;
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, struct ib_qp_cap *cap,
struct hns_roce_ib_create_qp *ucmd) struct hns_roce_ib_create_qp *ucmd)
...@@ -430,82 +476,27 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, ...@@ -430,82 +476,27 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd) struct hns_roce_ib_create_qp *ucmd)
{ {
u32 ex_sge_num; struct ib_device *ibdev = &hr_dev->ib_dev;
u32 page_size; u32 cnt = 0;
u32 max_cnt;
int ret; int ret;
if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) cnt > hr_dev->caps.max_wqes)
return -EINVAL; return -EINVAL;
ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
if (ret) { if (ret) {
ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n"); ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
ret);
return ret; return ret;
} }
hr_qp->sq.wqe_shift = ucmd->log_sq_stride; ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
if (ret)
max_cnt = max(1U, cap->max_send_sge); return ret;
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
else
hr_qp->sq.max_gs = max_cnt;
if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2));
if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE &&
hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
ibdev_err(&hr_dev->ib_dev,
"Failed to check extended SGE size limit %d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
hr_qp->sge.sge_shift = 4;
ex_sge_num = hr_qp->sge.sge_cnt;
/* Get buf size, SQ and RQ are aligned to page_szie */ hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { hr_qp->sq.wqe_cnt = cnt;
hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), PAGE_SIZE) +
round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), PAGE_SIZE);
hr_qp->sq.offset = 0;
hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sge.sge_cnt = ex_sge_num ?
max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
round_up((hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift), page_size) +
round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0;
if (ex_sge_num) {
hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift),
page_size);
hr_qp->rq.offset = hr_qp->sge.offset +
round_up((hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift),
page_size);
} else {
hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift),
page_size);
}
}
return 0; return 0;
} }
...@@ -514,84 +505,50 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, ...@@ -514,84 +505,50 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp, struct hns_roce_qp *hr_qp,
struct hns_roce_buf_attr *buf_attr) struct hns_roce_buf_attr *buf_attr)
{ {
bool is_extend_sge;
int buf_size; int buf_size;
int idx = 0; int idx = 0;
if (hr_qp->buff_size < 1) hr_qp->buff_size = 0;
return -EINVAL;
buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
buf_attr->fixed_page = true;
buf_attr->region_count = 0;
if (hr_qp->sge.sge_cnt > 0)
is_extend_sge = true;
else
is_extend_sge = false;
/* SQ WQE */ /* SQ WQE */
if (is_extend_sge) hr_qp->sq.offset = 0;
buf_size = hr_qp->sge.offset - hr_qp->sq.offset; buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
else hr_qp->sq.wqe_shift);
buf_size = hr_qp->rq.offset - hr_qp->sq.offset;
if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
buf_attr->region[idx].size = buf_size; buf_attr->region[idx].size = buf_size;
buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
idx++; idx++;
hr_qp->buff_size += buf_size;
} }
/* extend SGE in SQ WQE */ /* extend SGE WQE in SQ */
buf_size = hr_qp->rq.offset - hr_qp->sge.offset; hr_qp->sge.offset = hr_qp->buff_size;
if (buf_size > 0 && is_extend_sge && buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
idx < ARRAY_SIZE(buf_attr->region)) { hr_qp->sge.sge_shift);
if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
buf_attr->region[idx].size = buf_size; buf_attr->region[idx].size = buf_size;
buf_attr->region[idx].hopnum = buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
hr_dev->caps.wqe_sge_hop_num;
idx++; idx++;
hr_qp->buff_size += buf_size;
} }
/* RQ WQE */ /* RQ WQE */
buf_size = hr_qp->buff_size - hr_qp->rq.offset; hr_qp->rq.offset = hr_qp->buff_size;
buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
hr_qp->rq.wqe_shift);
if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
buf_attr->region[idx].size = buf_size; buf_attr->region[idx].size = buf_size;
buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
idx++; idx++;
hr_qp->buff_size += buf_size;
} }
buf_attr->region_count = idx; if (hr_qp->buff_size < 1)
return -EINVAL;
return 0;
}
static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
struct device *dev = hr_dev->dev;
if (hr_qp->sq.max_gs > 2) {
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2));
hr_qp->sge.sge_shift = 4;
}
/* ud sqwqe's sge use extend sge */
if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
hr_qp->ibqp.qp_type == IB_QPT_GSI) {
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
hr_qp->sq.max_gs);
hr_qp->sge.sge_shift = 4;
}
if (hr_qp->sq.max_gs > 2 && buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { buf_attr->fixed_page = true;
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { buf_attr->region_count = idx;
dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
return 0; return 0;
} }
...@@ -599,62 +556,35 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, ...@@ -599,62 +556,35 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
{ {
u32 page_size; struct ib_device *ibdev = &hr_dev->ib_dev;
u32 max_cnt; u32 cnt;
int size;
int ret; int ret;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_send_sge > hr_dev->caps.max_sq_sg ||
cap->max_inline_data > hr_dev->caps.max_sq_inline) { cap->max_inline_data > hr_dev->caps.max_sq_inline) {
ibdev_err(&hr_dev->ib_dev, ibdev_err(ibdev,
"SQ WR or sge or inline data error!\n"); "failed to check SQ WR, SGE or inline num, ret = %d.\n",
-EINVAL);
return -EINVAL; return -EINVAL;
} }
hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
if (cnt > hr_dev->caps.max_wqes) {
max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n",
cnt);
hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
ibdev_err(&hr_dev->ib_dev,
"while setting kernel sq size, sq.wqe_cnt too large\n");
return -EINVAL; return -EINVAL;
} }
/* Get data_seg numbers */ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
max_cnt = max(1U, cap->max_send_sge); hr_qp->sq.wqe_cnt = cnt;
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
else
hr_qp->sq.max_gs = max_cnt;
ret = set_extend_sge_param(hr_dev, hr_qp); ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
if (ret) { if (ret)
ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n");
return ret; return ret;
}
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ /* sync the parameters of kernel QP to user's configuration */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); cap->max_send_wr = cnt;
hr_qp->sq.offset = 0;
size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) {
hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
(u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size;
size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift,
page_size);
}
hr_qp->rq.offset = size;
size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size);
hr_qp->buff_size = size;
/* Get wr and sge number which send */
cap->max_send_wr = hr_qp->sq.wqe_cnt;
cap->max_send_sge = hr_qp->sq.max_gs; cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */ /* We don't support inline sends for kernel QPs (yet) */
...@@ -685,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, ...@@ -685,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr) struct ib_qp_init_attr *init_attr)
{ {
u32 max_recv_sge = init_attr->cap.max_recv_sge; u32 max_recv_sge = init_attr->cap.max_recv_sge;
u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt;
struct hns_roce_rinl_wqe *wqe_list; struct hns_roce_rinl_wqe *wqe_list;
u32 wqe_cnt = hr_qp->rq.wqe_cnt;
int i; int i;
/* allocate recv inline buf */ /* allocate recv inline buf */
...@@ -708,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, ...@@ -708,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge]; wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
hr_qp->rq_inl_buf.wqe_list = wqe_list; hr_qp->rq_inl_buf.wqe_list = wqe_list;
hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt;
return 0; return 0;
...@@ -721,7 +650,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, ...@@ -721,7 +650,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
{ {
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); if (hr_qp->rq_inl_buf.wqe_list)
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
kfree(hr_qp->rq_inl_buf.wqe_list); kfree(hr_qp->rq_inl_buf.wqe_list);
} }
...@@ -731,36 +661,36 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -731,36 +661,36 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
{ {
struct ib_device *ibdev = &hr_dev->ib_dev; struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_attr buf_attr = {}; struct hns_roce_buf_attr buf_attr = {};
bool is_rq_buf_inline;
int ret; int ret;
is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && if (!udata && hr_qp->rq_inl_buf.wqe_cnt) {
hns_roce_qp_has_rq(init_attr);
if (is_rq_buf_inline) {
ret = alloc_rq_inline_buf(hr_qp, init_attr); ret = alloc_rq_inline_buf(hr_qp, init_attr);
if (ret) { if (ret) {
ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n"); ibdev_err(ibdev,
"failed to alloc inline buf, ret = %d.\n",
ret);
return ret; return ret;
} }
} else {
hr_qp->rq_inl_buf.wqe_list = NULL;
} }
ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr);
if (ret) { if (ret) {
ibdev_err(ibdev, "Failed to split WQE buf, ret %d\n", ret); ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
goto err_inline; goto err_inline;
} }
ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
udata, addr); udata, addr);
if (ret) { if (ret) {
ibdev_err(ibdev, "Failed to create WQE mtr, ret %d\n", ret); ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
goto err_inline; goto err_inline;
} }
return 0; return 0;
err_inline: err_inline:
if (is_rq_buf_inline) free_rq_inline_buf(hr_qp);
free_rq_inline_buf(hr_qp);
return ret; return ret;
} }
...@@ -768,9 +698,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -768,9 +698,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{ {
hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && free_rq_inline_buf(hr_qp);
hr_qp->rq.wqe_cnt)
free_rq_inline_buf(hr_qp);
} }
static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
...@@ -935,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -935,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
else else
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = set_rq_size(hr_dev, &init_attr->cap, udata, ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
hns_roce_qp_has_rq(init_attr), hr_qp); hns_roce_qp_has_rq(init_attr));
if (ret) { if (ret) {
ibdev_err(ibdev, "Failed to set user RQ size\n"); ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
ret);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment