Commit 89b4b70b authored by chenglang's avatar chenglang Committed by Jason Gunthorpe

RDMA/hns: Optimize hns_roce_mhop_alloc function.

Move some lines for allocating multi-hop addressing into independent
functions in order to improve readability.

Link: https://lore.kernel.org/r/1562593285-8037-7-git-send-email-oulijun@huawei.comSigned-off-by: default avatarLang Cheng <chenglang@huawei.com>
Signed-off-by: default avatarLijun Ou <oulijun@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 99441ab5
...@@ -347,155 +347,208 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, ...@@ -347,155 +347,208 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
mr->pbl_bt_l0 = NULL; mr->pbl_bt_l0 = NULL;
mr->pbl_l0_dma_addr = 0; mr->pbl_l0_dma_addr = 0;
} }
static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
struct hns_roce_mr *mr, u32 pbl_bt_sz)
{
struct device *dev = hr_dev->dev;
/* PBL multi hop addressing */ if (npages > pbl_bt_sz / 8) {
static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, dev_err(dev, "npages %d is larger than buf_pg_sz!",
struct hns_roce_mr *mr) npages);
return -EINVAL;
}
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
GFP_KERNEL);
if (!mr->pbl_buf)
return -ENOMEM;
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr;
mr->pbl_hop_num = 1;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
}
static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
struct hns_roce_mr *mr, u32 pbl_bt_sz)
{ {
struct device *dev = hr_dev->dev; struct device *dev = hr_dev->dev;
int mr_alloc_done = 0;
int npages_allocated; int npages_allocated;
int i = 0, j = 0;
u32 pbl_bt_sz;
u32 mhop_num;
u64 pbl_last_bt_num; u64 pbl_last_bt_num;
u64 pbl_bt_cnt = 0; u64 pbl_bt_cnt = 0;
u64 bt_idx;
u64 size; u64 size;
int i;
mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
if (mhop_num == HNS_ROCE_HOP_NUM_0) /* alloc L1 BT */
return 0; for (i = 0; i < pbl_bt_sz / 8; i++) {
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
/* hop_num = 1 */ size = pbl_bt_sz;
if (mhop_num == 1) { } else {
if (npages > pbl_bt_sz / 8) { npages_allocated = i * (pbl_bt_sz / 8);
dev_err(dev, "npages %d is larger than buf_pg_sz!", size = (npages - npages_allocated) * 8;
npages);
return -EINVAL;
} }
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
&(mr->pbl_dma_addr), &(mr->pbl_l1_dma_addr[i]),
GFP_KERNEL); GFP_KERNEL);
if (!mr->pbl_buf) if (!mr->pbl_bt_l1[i]) {
hns_roce_loop_free(hr_dev, mr, 1, i, 0);
return -ENOMEM; return -ENOMEM;
}
mr->pbl_size = npages; *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
mr->pbl_ba = mr->pbl_dma_addr;
mr->pbl_hop_num = mhop_num; pbl_bt_cnt++;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; if (pbl_bt_cnt >= pbl_last_bt_num)
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; break;
return 0;
} }
mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, mr->l0_chunk_last_num = i + 1;
sizeof(*mr->pbl_l1_dma_addr),
return 0;
}
static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
struct hns_roce_mr *mr, u32 pbl_bt_sz)
{
struct device *dev = hr_dev->dev;
int mr_alloc_done = 0;
int npages_allocated;
u64 pbl_last_bt_num;
u64 pbl_bt_cnt = 0;
u64 bt_idx;
u64 size;
int i;
int j = 0;
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
sizeof(*mr->pbl_l2_dma_addr),
GFP_KERNEL); GFP_KERNEL);
if (!mr->pbl_l1_dma_addr) if (!mr->pbl_l2_dma_addr)
return -ENOMEM; return -ENOMEM;
mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
sizeof(*mr->pbl_bt_l2),
GFP_KERNEL); GFP_KERNEL);
if (!mr->pbl_bt_l1) if (!mr->pbl_bt_l2)
goto err_kcalloc_bt_l1; goto err_kcalloc_bt_l2;
if (mhop_num == 3) { /* alloc L1, L2 BT */
mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, for (i = 0; i < pbl_bt_sz / 8; i++) {
sizeof(*mr->pbl_l2_dma_addr), mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
GFP_KERNEL); &(mr->pbl_l1_dma_addr[i]),
if (!mr->pbl_l2_dma_addr) GFP_KERNEL);
goto err_kcalloc_l2_dma; if (!mr->pbl_bt_l1[i]) {
hns_roce_loop_free(hr_dev, mr, 1, i, 0);
goto err_dma_alloc_l0;
}
mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
sizeof(*mr->pbl_bt_l2),
GFP_KERNEL);
if (!mr->pbl_bt_l2)
goto err_kcalloc_bt_l2;
}
/* alloc L0 BT */ for (j = 0; j < pbl_bt_sz / 8; j++) {
mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, bt_idx = i * pbl_bt_sz / 8 + j;
&(mr->pbl_l0_dma_addr),
GFP_KERNEL);
if (!mr->pbl_bt_l0)
goto err_dma_alloc_l0;
if (mhop_num == 2) {
/* alloc L1 BT */
for (i = 0; i < pbl_bt_sz / 8; i++) {
if (pbl_bt_cnt + 1 < pbl_last_bt_num) { if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz; size = pbl_bt_sz;
} else { } else {
npages_allocated = i * (pbl_bt_sz / 8); npages_allocated = bt_idx *
(pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8; size = (npages - npages_allocated) * 8;
} }
mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
&(mr->pbl_l1_dma_addr[i]), dev, size,
GFP_KERNEL); &(mr->pbl_l2_dma_addr[bt_idx]),
if (!mr->pbl_bt_l1[i]) { GFP_KERNEL);
hns_roce_loop_free(hr_dev, mr, 1, i, 0); if (!mr->pbl_bt_l2[bt_idx]) {
hns_roce_loop_free(hr_dev, mr, 2, i, j);
goto err_dma_alloc_l0; goto err_dma_alloc_l0;
} }
*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; *(mr->pbl_bt_l1[i] + j) =
mr->pbl_l2_dma_addr[bt_idx];
pbl_bt_cnt++; pbl_bt_cnt++;
if (pbl_bt_cnt >= pbl_last_bt_num) if (pbl_bt_cnt >= pbl_last_bt_num) {
mr_alloc_done = 1;
break; break;
}
} else if (mhop_num == 3) {
/* alloc L1, L2 BT */
for (i = 0; i < pbl_bt_sz / 8; i++) {
mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
&(mr->pbl_l1_dma_addr[i]),
GFP_KERNEL);
if (!mr->pbl_bt_l1[i]) {
hns_roce_loop_free(hr_dev, mr, 1, i, 0);
goto err_dma_alloc_l0;
} }
}
*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; if (mr_alloc_done)
break;
}
for (j = 0; j < pbl_bt_sz / 8; j++) { mr->l0_chunk_last_num = i + 1;
bt_idx = i * pbl_bt_sz / 8 + j; mr->l1_chunk_last_num = j + 1;
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz;
} else {
npages_allocated = bt_idx *
(pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8;
}
mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
dev, size,
&(mr->pbl_l2_dma_addr[bt_idx]),
GFP_KERNEL);
if (!mr->pbl_bt_l2[bt_idx]) {
hns_roce_loop_free(hr_dev, mr, 2, i, j);
goto err_dma_alloc_l0;
}
*(mr->pbl_bt_l1[i] + j) = return 0;
mr->pbl_l2_dma_addr[bt_idx];
pbl_bt_cnt++; err_dma_alloc_l0:
if (pbl_bt_cnt >= pbl_last_bt_num) { kfree(mr->pbl_bt_l2);
mr_alloc_done = 1; mr->pbl_bt_l2 = NULL;
break;
}
}
if (mr_alloc_done) err_kcalloc_bt_l2:
break; kfree(mr->pbl_l2_dma_addr);
} mr->pbl_l2_dma_addr = NULL;
return -ENOMEM;
}
/* PBL multi hop addressing */
static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
struct hns_roce_mr *mr)
{
struct device *dev = hr_dev->dev;
u32 pbl_bt_sz;
u32 mhop_num;
mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return 0;
/* hop_num = 1 */
if (mhop_num == 1)
return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
sizeof(*mr->pbl_l1_dma_addr),
GFP_KERNEL);
if (!mr->pbl_l1_dma_addr)
return -ENOMEM;
mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
GFP_KERNEL);
if (!mr->pbl_bt_l1)
goto err_kcalloc_bt_l1;
/* alloc L0 BT */
mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
&(mr->pbl_l0_dma_addr),
GFP_KERNEL);
if (!mr->pbl_bt_l0)
goto err_kcalloc_l2_dma;
if (mhop_num == 2) {
if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
goto err_kcalloc_l2_dma;
}
if (mhop_num == 3) {
if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
goto err_kcalloc_l2_dma;
} }
mr->l0_chunk_last_num = i + 1;
if (mhop_num == 3)
mr->l1_chunk_last_num = j + 1;
mr->pbl_size = npages; mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_l0_dma_addr; mr->pbl_ba = mr->pbl_l0_dma_addr;
...@@ -505,14 +558,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, ...@@ -505,14 +558,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
return 0; return 0;
err_dma_alloc_l0:
kfree(mr->pbl_bt_l2);
mr->pbl_bt_l2 = NULL;
err_kcalloc_bt_l2:
kfree(mr->pbl_l2_dma_addr);
mr->pbl_l2_dma_addr = NULL;
err_kcalloc_l2_dma: err_kcalloc_l2_dma:
kfree(mr->pbl_bt_l1); kfree(mr->pbl_bt_l1);
mr->pbl_bt_l1 = NULL; mr->pbl_bt_l1 = NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment