Commit 7b0006db authored by Xi Wang's avatar Xi Wang Committed by Jason Gunthorpe

RDMA/hns: Optimize the base address table config for MTR

The base address table is allocated by dma allocator, and the size is
always aligned to PAGE_SIZE. If a fixed size is used to allocate the
table, the number of base address entries stored in the table will be
smaller than that can actually stored.

Link: https://lore.kernel.org/r/1621589395-2435-2-git-send-email-liweihang@huawei.comSigned-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 9ecf6ac1
...@@ -208,10 +208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, ...@@ -208,10 +208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
/* Calc the trunk size and num by required size and page_shift */ /* Calc the trunk size and num by required size and page_shift */
if (flags & HNS_ROCE_BUF_DIRECT) { if (flags & HNS_ROCE_BUF_DIRECT) {
buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
ntrunk = 1; ntrunk = 1;
} else { } else {
buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
} }
...@@ -252,51 +252,42 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, ...@@ -252,51 +252,42 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
} }
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct hns_roce_buf *buf) int buf_cnt, struct hns_roce_buf *buf,
unsigned int page_shift)
{ {
int i, end; unsigned int offset, max_size;
int total; int total = 0;
int i;
end = start + buf_cnt;
if (end > buf->npages) { if (page_shift > buf->trunk_shift) {
dev_err(hr_dev->dev, dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n",
"failed to check kmem bufs, end %d + %d total %u!\n", page_shift, buf->trunk_shift);
start, buf_cnt, buf->npages);
return -EINVAL; return -EINVAL;
} }
total = 0; offset = 0;
for (i = start; i < end; i++) max_size = buf->ntrunks << buf->trunk_shift;
bufs[total++] = hns_roce_buf_page(buf, i); for (i = 0; i < buf_cnt && offset < max_size; i++) {
bufs[total++] = hns_roce_buf_dma_addr(buf, offset);
offset += (1 << page_shift);
}
return total; return total;
} }
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct ib_umem *umem, int buf_cnt, struct ib_umem *umem,
unsigned int page_shift) unsigned int page_shift)
{ {
struct ib_block_iter biter; struct ib_block_iter biter;
int total = 0; int total = 0;
int idx = 0;
u64 addr;
if (page_shift < HNS_HW_PAGE_SHIFT) {
dev_err(hr_dev->dev, "failed to check umem page shift %u!\n",
page_shift);
return -EINVAL;
}
/* convert system page cnt to hw page cnt */ /* convert system page cnt to hw page cnt */
rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
addr = rdma_block_iter_dma_address(&biter); bufs[total++] = rdma_block_iter_dma_address(&biter);
if (idx >= start) {
bufs[total++] = addr;
if (total >= buf_cnt) if (total >= buf_cnt)
goto done; goto done;
} }
idx++;
}
done: done:
return total; return total;
......
...@@ -202,13 +202,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, ...@@ -202,13 +202,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
struct hns_roce_buf_attr buf_attr = {}; struct hns_roce_buf_attr buf_attr = {};
int ret; int ret;
buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
buf_attr.region_count = 1; buf_attr.region_count = 1;
ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
udata, addr); udata, addr);
if (ret) if (ret)
ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
......
...@@ -1060,14 +1060,18 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, ...@@ -1060,14 +1060,18 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
(offset & ((1 << buf->trunk_shift) - 1)); (offset & ((1 << buf->trunk_shift) - 1));
} }
static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
unsigned int offset)
{ {
unsigned int offset = idx << buf->page_shift;
return buf->trunk_list[offset >> buf->trunk_shift].map + return buf->trunk_list[offset >> buf->trunk_shift].map +
(offset & ((1 << buf->trunk_shift) - 1)); (offset & ((1 << buf->trunk_shift) - 1));
} }
static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
{
return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
}
#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
static inline u64 to_hr_hw_page_addr(u64 addr) static inline u64 to_hr_hw_page_addr(u64 addr)
...@@ -1204,9 +1208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, ...@@ -1204,9 +1208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
u32 page_shift, u32 flags); u32 page_shift, u32 flags);
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct hns_roce_buf *buf); int buf_cnt, struct hns_roce_buf *buf,
unsigned int page_shift);
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct ib_umem *umem, int buf_cnt, struct ib_umem *umem,
unsigned int page_shift); unsigned int page_shift);
int hns_roce_create_srq(struct ib_srq *srq, int hns_roce_create_srq(struct ib_srq *srq,
......
...@@ -2018,6 +2018,8 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) ...@@ -2018,6 +2018,8 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
caps->llm_buf_pg_sz = 0; caps->llm_buf_pg_sz = 0;
/* MR */ /* MR */
caps->mpt_ba_pg_sz = 0;
caps->mpt_buf_pg_sz = 0;
caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
caps->pbl_buf_pg_sz = 0; caps->pbl_buf_pg_sz = 0;
calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
...@@ -2025,8 +2027,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) ...@@ -2025,8 +2027,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
HEM_TYPE_MTPT); HEM_TYPE_MTPT);
/* QP */ /* QP */
caps->qpc_ba_pg_sz = 0;
caps->qpc_buf_pg_sz = 0;
caps->qpc_timer_ba_pg_sz = 0; caps->qpc_timer_ba_pg_sz = 0;
caps->qpc_timer_buf_pg_sz = 0; caps->qpc_timer_buf_pg_sz = 0;
caps->sccc_ba_pg_sz = 0;
caps->sccc_buf_pg_sz = 0;
caps->mtt_ba_pg_sz = 0; caps->mtt_ba_pg_sz = 0;
caps->mtt_buf_pg_sz = 0; caps->mtt_buf_pg_sz = 0;
calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
...@@ -2039,6 +2045,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) ...@@ -2039,6 +2045,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
&caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC);
/* CQ */ /* CQ */
caps->cqc_ba_pg_sz = 0;
caps->cqc_buf_pg_sz = 0;
caps->cqc_timer_ba_pg_sz = 0;
caps->cqc_timer_buf_pg_sz = 0;
caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
caps->cqe_buf_pg_sz = 0;
calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num, calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz, caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
HEM_TYPE_CQC); HEM_TYPE_CQC);
...@@ -2053,6 +2065,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) ...@@ -2053,6 +2065,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
/* SRQ */ /* SRQ */
if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) { if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
caps->srqc_ba_pg_sz = 0;
caps->srqc_buf_pg_sz = 0;
caps->srqwqe_ba_pg_sz = 0;
caps->srqwqe_buf_pg_sz = 0;
caps->idx_ba_pg_sz = 0;
caps->idx_buf_pg_sz = 0;
calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz,
caps->srqc_hop_num, caps->srqc_bt_num, caps->srqc_hop_num, caps->srqc_bt_num,
&caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz,
...@@ -6161,14 +6179,14 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) ...@@ -6161,14 +6179,14 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
else else
eq->hop_num = hr_dev->caps.eqe_hop_num; eq->hop_num = hr_dev->caps.eqe_hop_num;
buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT;
buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].size = eq->entries * eq->eqe_size;
buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region[0].hopnum = eq->hop_num;
buf_attr.region_count = 1; buf_attr.region_count = 1;
err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
hr_dev->caps.eqe_ba_pg_sz + hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
HNS_HW_PAGE_SHIFT, NULL, 0); 0);
if (err) if (err)
dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err);
......
...@@ -122,7 +122,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, ...@@ -122,7 +122,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
buf_attr.mtt_only = is_fast; buf_attr.mtt_only = is_fast;
err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
udata, start); udata, start);
if (err) if (err)
ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
...@@ -737,11 +737,11 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ...@@ -737,11 +737,11 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return -ENOMEM; return -ENOMEM;
if (mtr->umem) if (mtr->umem)
npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
mtr->umem, page_shift); mtr->umem, page_shift);
else else
npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
mtr->kmem); mtr->kmem, page_shift);
if (npage != page_count) { if (npage != page_count) {
ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
...@@ -753,8 +753,8 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ...@@ -753,8 +753,8 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
if (mtr->hem_cfg.is_direct && npage > 1) { if (mtr->hem_cfg.is_direct && npage > 1) {
ret = mtr_check_direct_pages(pages, npage, page_shift); ret = mtr_check_direct_pages(pages, npage, page_shift);
if (ret) { if (ret) {
ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
mtr->umem ? "user" : "kernel", ret); mtr->umem ? "umtr" : "kmtr", ret, npage);
ret = -ENOBUFS; ret = -ENOBUFS;
goto err_alloc_list; goto err_alloc_list;
} }
...@@ -799,7 +799,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ...@@ -799,7 +799,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
if (r->offset + r->count > page_cnt) { if (r->offset + r->count > page_cnt) {
ret = -EINVAL; ret = -EINVAL;
ibdev_err(ibdev, ibdev_err(ibdev,
"failed to check mtr%u end %u + %u, max %u.\n", "failed to check mtr%u count %u + %u > %u.\n",
i, r->offset, r->count, page_cnt); i, r->offset, r->count, page_cnt);
return ret; return ret;
} }
......
...@@ -761,7 +761,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -761,7 +761,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
goto err_inline; goto err_inline;
} }
ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
udata, addr); udata, addr);
if (ret) { if (ret) {
ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
......
...@@ -167,14 +167,14 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ...@@ -167,14 +167,14 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT;
buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
srq->idx_que.entry_shift); srq->idx_que.entry_shift);
buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
buf_attr.region_count = 1; buf_attr.region_count = 1;
ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT,
udata, addr); udata, addr);
if (ret) { if (ret) {
ibdev_err(ibdev, ibdev_err(ibdev,
...@@ -222,15 +222,15 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, ...@@ -222,15 +222,15 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
HNS_ROCE_SGE_SIZE * HNS_ROCE_SGE_SIZE *
srq->max_gs))); srq->max_gs)));
buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT;
buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
srq->wqe_shift); srq->wqe_shift);
buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
buf_attr.region_count = 1; buf_attr.region_count = 1;
ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
hr_dev->caps.srqwqe_ba_pg_sz + hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT,
HNS_HW_PAGE_SHIFT, udata, addr); udata, addr);
if (ret) if (ret)
ibdev_err(ibdev, ibdev_err(ibdev,
"failed to alloc SRQ buf mtr, ret = %d.\n", ret); "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment