Commit 8d18ad83 authored by Lijun Ou's avatar Lijun Ou Committed by Doug Ledford

RDMA/hns: Fix bug when wqe num is larger than 16K

hip08 can support up to 32768 wqes in one qp. currently if the wqe num
is larger than 16384, the driver will lead a calltrace as follows.

[21361.393725] Call trace:
[21361.398605]  hns_roce_v2_modify_qp+0xbcc/0x1360 [hns_roce_hw_v2]
[21361.410627]  hns_roce_modify_qp+0x1d8/0x2f8 [hns_roce]
[21361.420906]  _ib_modify_qp+0x70/0x118
[21361.428222]  ib_modify_qp+0x14/0x1c
[21361.435193]  rt_ktest_modify_qp+0xb8/0x650 [rdma_test]
[21361.445472]  exec_modify_qp_cmd+0x110/0x4d8 [rdma_test]
[21361.455924]  rt_ktest_dispatch_cmd_3+0xa94/0x2edc [rdma_test]
[21361.467422]  rt_ktest_dispatch_cmd_2+0x9c/0x108 [rdma_test]
[21361.478570]  rt_ktest_dispatch_cmd+0x138/0x904 [rdma_test]
[21361.489545]  rt_ktest_dev_write+0x328/0x4b0 [rdma_test]
[21361.499998]  __vfs_write+0x38/0x15c
[21361.506966]  vfs_write+0xa8/0x1a0
[21361.513586]  ksys_write+0x50/0xb0
[21361.520206]  sys_write+0xc/0x14
[21361.526479]  el0_svc_naked+0x30/0x34
[21361.533622] Code: 1ac10841 d37d7c22 0b000021 d37df021 (f86268c0)
[21361.545815] ---[ end trace e2a1feb2c3d7f13c ]---

When the wqe num is larger than 16384, hns_roce_table_find will return an
invalid mtt, this will lead an kernel paging requet error if the driver try
to access it. It's the mtt design defect which can't support up to the max
wqe num of hip08.

This patch fixs it by replacing mtt with mtr for wqe.

Fixes: 926a01dc ("RDMA/hns: Add QP operations support for hip08 SoC")
Signed-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarLijun Ou <oulijun@huawei.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 2ac0bc5e
......@@ -660,6 +660,14 @@ struct hns_roce_qp {
struct ib_umem *umem;
struct hns_roce_mtt mtt;
struct hns_roce_mtr mtr;
/* this define must less than HNS_ROCE_MAX_BT_REGION */
#define HNS_ROCE_WQE_REGION_MAX 3
struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX];
int region_cnt;
int wqe_bt_pg_shift;
u32 buff_size;
struct mutex mutex;
u8 port;
......@@ -870,6 +878,9 @@ struct hns_roce_caps {
u32 mtt_ba_pg_sz;
u32 mtt_buf_pg_sz;
u32 mtt_hop_num;
u32 wqe_sq_hop_num;
u32 wqe_sge_hop_num;
u32 wqe_rq_hop_num;
u32 sccc_ba_pg_sz;
u32 sccc_buf_pg_sz;
u32 sccc_hop_num;
......
This diff is collapsed.
......@@ -422,6 +422,91 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
return 0;
}
static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct hns_roce_buf_region *regions,
int region_max, int page_shift)
{
int page_size = 1 << page_shift;
bool is_extend_sge;
int region_cnt = 0;
int buf_size;
int buf_cnt;
if (hr_qp->buff_size < 1 || region_max < 1)
return region_cnt;
if (hr_qp->sge.sge_cnt > 0)
is_extend_sge = true;
else
is_extend_sge = false;
/* sq region */
if (is_extend_sge)
buf_size = hr_qp->sge.offset - hr_qp->sq.offset;
else
buf_size = hr_qp->rq.offset - hr_qp->sq.offset;
if (buf_size > 0 && region_cnt < region_max) {
buf_cnt = DIV_ROUND_UP(buf_size, page_size);
hns_roce_init_buf_region(&regions[region_cnt],
hr_dev->caps.wqe_sq_hop_num,
hr_qp->sq.offset / page_size,
buf_cnt);
region_cnt++;
}
/* sge region */
if (is_extend_sge) {
buf_size = hr_qp->rq.offset - hr_qp->sge.offset;
if (buf_size > 0 && region_cnt < region_max) {
buf_cnt = DIV_ROUND_UP(buf_size, page_size);
hns_roce_init_buf_region(&regions[region_cnt],
hr_dev->caps.wqe_sge_hop_num,
hr_qp->sge.offset / page_size,
buf_cnt);
region_cnt++;
}
}
/* rq region */
buf_size = hr_qp->buff_size - hr_qp->rq.offset;
if (buf_size > 0) {
buf_cnt = DIV_ROUND_UP(buf_size, page_size);
hns_roce_init_buf_region(&regions[region_cnt],
hr_dev->caps.wqe_rq_hop_num,
hr_qp->rq.offset / page_size,
buf_cnt);
region_cnt++;
}
return region_cnt;
}
static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
struct hns_roce_buf_region *regions,
int region_cnt)
{
int bt_pg_shift;
int ba_num;
int ret;
bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz;
/* all root ba entries must in one bt page */
do {
ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN;
ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt,
ba_num);
if (ret <= ba_num)
break;
bt_pg_shift++;
} while (ret > ba_num);
return bt_pg_shift - PAGE_SHIFT;
}
static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_qp *hr_qp)
......@@ -534,15 +619,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_udata *udata, unsigned long sqpn,
struct hns_roce_qp *hr_qp)
{
dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 };
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_qp ucmd;
struct hns_roce_ib_create_qp_resp resp = {};
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct hns_roce_ucontext, ibucontext);
struct hns_roce_buf_region *r;
unsigned long qpn = 0;
int ret = 0;
u32 page_shift;
u32 npages;
int buf_count;
int ret;
int i;
mutex_init(&hr_qp->mutex);
......@@ -596,6 +683,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
init_attr->cap.max_recv_sge];
}
page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
......@@ -617,32 +705,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = PTR_ERR(hr_qp->umem);
goto err_rq_sge_list;
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
page_shift = PAGE_SHIFT;
if (hr_dev->caps.mtt_buf_pg_sz) {
npages = (ib_umem_page_count(hr_qp->umem) +
(1 << hr_dev->caps.mtt_buf_pg_sz) - 1) /
(1 << hr_dev->caps.mtt_buf_pg_sz);
page_shift += hr_dev->caps.mtt_buf_pg_sz;
ret = hns_roce_mtt_init(hr_dev, npages,
page_shift,
&hr_qp->mtt);
} else {
ret = hns_roce_mtt_init(hr_dev,
ib_umem_page_count(hr_qp->umem),
page_shift, &hr_qp->mtt);
}
hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
page_shift);
ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
hr_qp->region_cnt);
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for create qp\n");
goto err_buf;
dev_err(dev, "alloc buf_list error for create qp\n");
goto err_alloc_list;
}
ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt,
hr_qp->umem);
if (ret) {
dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
goto err_mtt;
for (i = 0; i < hr_qp->region_cnt; i++) {
r = &hr_qp->regions[i];
buf_count = hns_roce_get_umem_bufs(hr_dev,
buf_list[i], r->count, r->offset,
hr_qp->umem, page_shift);
if (buf_count != r->count) {
dev_err(dev,
"get umem buf err, expect %d,ret %d.\n",
r->count, buf_count);
ret = -ENOBUFS;
goto err_get_bufs;
}
}
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
......@@ -653,7 +737,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
&hr_qp->sdb);
if (ret) {
dev_err(dev, "sq record doorbell map failed!\n");
goto err_mtt;
goto err_get_bufs;
}
/* indicate kernel supports sq record db */
......@@ -715,7 +799,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
/* Allocate QP buf */
page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
(1 << page_shift) * 2,
&hr_qp->hr_buf, page_shift)) {
......@@ -723,21 +806,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = -ENOMEM;
goto err_db;
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
/* Write MTT */
ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages,
hr_qp->hr_buf.page_shift, &hr_qp->mtt);
hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
page_shift);
ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
hr_qp->region_cnt);
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n");
goto err_buf;
dev_err(dev, "alloc buf_list error for create qp!\n");
goto err_alloc_list;
}
ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt,
for (i = 0; i < hr_qp->region_cnt; i++) {
r = &hr_qp->regions[i];
buf_count = hns_roce_get_kmem_bufs(hr_dev,
buf_list[i], r->count, r->offset,
&hr_qp->hr_buf);
if (ret) {
dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n");
goto err_mtt;
if (buf_count != r->count) {
dev_err(dev,
"get kmem buf err, expect %d,ret %d.\n",
r->count, buf_count);
ret = -ENOBUFS;
goto err_get_bufs;
}
}
hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
......@@ -761,6 +851,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
}
hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions,
hr_qp->region_cnt);
hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
page_shift);
ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list,
hr_qp->regions, hr_qp->region_cnt);
if (ret) {
dev_err(dev, "mtr attatch error for create qp\n");
goto err_mtr;
}
if (init_attr->qp_type == IB_QPT_GSI &&
hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
/* In v1 engine, GSI QP context in RoCE engine's register */
......@@ -796,6 +897,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
hr_qp->event = hns_roce_ib_qp_event;
hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
return 0;
......@@ -810,6 +912,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (!sqpn)
hns_roce_release_range_qp(hr_dev, qpn, 1);
err_mtr:
hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
err_wrid:
if (udata) {
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
......@@ -829,10 +934,10 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_qp_has_sq(init_attr))
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
err_get_bufs:
hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
err_buf:
err_alloc_list:
if (hr_qp->umem)
ib_umem_release(hr_qp->umem);
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment