Commit 0045e0d3 authored by Yixing Liu's avatar Yixing Liu Committed by Jason Gunthorpe

RDMA/hns: Support direct wqe of userspace

The current write wqe mechanism is to write to DDR first, and then notify
the hardware through doorbell to read the data. Direct wqe is a mechanism
to fill wqe directly into the hardware. In the case of light load, the wqe
will be filled into pcie bar space of the hardware, this will reduce one
memory access operation and therefore reduce the latency. SIMD
instructions allows cpu to write the 512 bits at one time to device
memory, thus it can be used for posting direct wqe.

Add direct wqe enable switch and address mapping.

Link: https://lore.kernel.org/r/20211207124901.42123-2-liangwenpeng@huawei.comSigned-off-by: default avatarYixing Liu <liuyixing1@huawei.com>
Signed-off-by: default avatarWenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent b1a4da64
...@@ -182,6 +182,7 @@ enum { ...@@ -182,6 +182,7 @@ enum {
HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_STASH = BIT(17),
}; };
...@@ -228,6 +229,7 @@ struct hns_roce_uar { ...@@ -228,6 +229,7 @@ struct hns_roce_uar {
enum hns_roce_mmap_type { enum hns_roce_mmap_type {
HNS_ROCE_MMAP_TYPE_DB = 1, HNS_ROCE_MMAP_TYPE_DB = 1,
HNS_ROCE_MMAP_TYPE_TPTR, HNS_ROCE_MMAP_TYPE_TPTR,
HNS_ROCE_MMAP_TYPE_DWQE,
}; };
struct hns_user_mmap_entry { struct hns_user_mmap_entry {
...@@ -627,10 +629,6 @@ struct hns_roce_work { ...@@ -627,10 +629,6 @@ struct hns_roce_work {
u32 queue_num; u32 queue_num;
}; };
enum {
HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
};
struct hns_roce_qp { struct hns_roce_qp {
struct ib_qp ibqp; struct ib_qp ibqp;
struct hns_roce_wq rq; struct hns_roce_wq rq;
...@@ -675,6 +673,7 @@ struct hns_roce_qp { ...@@ -675,6 +673,7 @@ struct hns_roce_qp {
struct list_head node; /* all qps are on a list */ struct list_head node; /* all qps are on a list */
struct list_head rq_node; /* all recv qps are on a list */ struct list_head rq_node; /* all recv qps are on a list */
struct list_head sq_node; /* all send qps are on a list */ struct list_head sq_node; /* all send qps are on a list */
struct hns_user_mmap_entry *dwqe_mmap_entry;
}; };
struct hns_roce_ib_iboe { struct hns_roce_ib_iboe {
...@@ -1010,6 +1009,7 @@ struct hns_roce_dev { ...@@ -1010,6 +1009,7 @@ struct hns_roce_dev {
u32 func_num; u32 func_num;
u32 is_vf; u32 is_vf;
u32 cong_algo_tmpl_id; u32 cong_algo_tmpl_id;
u64 dwqe_page;
}; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
......
...@@ -1989,7 +1989,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) ...@@ -1989,7 +1989,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
caps->flags |= HNS_ROCE_CAP_FLAG_STASH; caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
HNS_ROCE_CAP_FLAG_DIRECT_WQE;
caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
} else { } else {
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
......
...@@ -310,9 +310,25 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, ...@@ -310,9 +310,25 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
entry->address = address; entry->address = address;
entry->mmap_type = mmap_type; entry->mmap_type = mmap_type;
switch (mmap_type) {
case HNS_ROCE_MMAP_TYPE_DB:
ret = rdma_user_mmap_entry_insert_exact( ret = rdma_user_mmap_entry_insert_exact(
ucontext, &entry->rdma_entry, length, ucontext, &entry->rdma_entry, length, 0);
mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1); break;
case HNS_ROCE_MMAP_TYPE_TPTR:
ret = rdma_user_mmap_entry_insert_exact(
ucontext, &entry->rdma_entry, length, 1);
break;
case HNS_ROCE_MMAP_TYPE_DWQE:
ret = rdma_user_mmap_entry_insert_range(
ucontext, &entry->rdma_entry, length, 2,
U32_MAX);
break;
default:
ret = -EINVAL;
break;
}
if (ret) { if (ret) {
kfree(entry); kfree(entry);
return NULL; return NULL;
...@@ -439,10 +455,18 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) ...@@ -439,10 +455,18 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
entry = to_hns_mmap(rdma_entry); entry = to_hns_mmap(rdma_entry);
pfn = entry->address >> PAGE_SHIFT; pfn = entry->address >> PAGE_SHIFT;
prot = vma->vm_page_prot;
if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR) switch (entry->mmap_type) {
prot = pgprot_device(prot); case HNS_ROCE_MMAP_TYPE_DB:
case HNS_ROCE_MMAP_TYPE_DWQE:
prot = pgprot_device(vma->vm_page_prot);
break;
case HNS_ROCE_MMAP_TYPE_TPTR:
prot = vma->vm_page_prot;
break;
default:
return -EINVAL;
}
ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
prot, rdma_entry); prot, rdma_entry);
......
...@@ -115,6 +115,9 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) ...@@ -115,6 +115,9 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
} else { } else {
uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2))
>> PAGE_SHIFT); >> PAGE_SHIFT);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
hr_dev->dwqe_page =
pci_resource_start(hr_dev->pci_dev, 4);
} }
return 0; return 0;
......
...@@ -379,6 +379,11 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ...@@ -379,6 +379,11 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
return ret; return ret;
} }
static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
{
rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
}
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{ {
struct xarray *xa = &hr_dev->qp_table_xa; struct xarray *xa = &hr_dev->qp_table_xa;
...@@ -780,7 +785,11 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -780,7 +785,11 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
goto err_inline; goto err_inline;
} }
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
return 0; return 0;
err_inline: err_inline:
free_rq_inline_buf(hr_qp); free_rq_inline_buf(hr_qp);
...@@ -822,6 +831,35 @@ static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev, ...@@ -822,6 +831,35 @@ static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
hns_roce_qp_has_rq(init_attr)); hns_roce_qp_has_rq(init_attr));
} }
static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
struct hns_roce_dev *hr_dev,
struct ib_udata *udata,
struct hns_roce_ib_create_qp_resp *resp)
{
struct hns_roce_ucontext *uctx =
rdma_udata_to_drv_context(udata,
struct hns_roce_ucontext, ibucontext);
struct rdma_user_mmap_entry *rdma_entry;
u64 address;
address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
hr_qp->dwqe_mmap_entry =
hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
HNS_ROCE_DWQE_SIZE,
HNS_ROCE_MMAP_TYPE_DWQE);
if (!hr_qp->dwqe_mmap_entry) {
ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
return -ENOMEM;
}
rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
return 0;
}
static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr, struct ib_qp_init_attr *init_attr,
...@@ -909,10 +947,16 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -909,10 +947,16 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
if (udata) { if (udata) {
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
if (ret)
return ret;
}
ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd, ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
resp); resp);
if (ret) if (ret)
return ret; goto err_remove_qp;
} else { } else {
ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr); ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
if (ret) if (ret)
...@@ -920,6 +964,12 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -920,6 +964,12 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
} }
return 0; return 0;
err_remove_qp:
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
qp_user_mmap_entry_remove(hr_qp);
return ret;
} }
static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
...@@ -933,6 +983,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ...@@ -933,6 +983,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hns_roce_db_unmap_user(uctx, &hr_qp->rdb); hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb); hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
qp_user_mmap_entry_remove(hr_qp);
} else { } else {
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hns_roce_free_db(hr_dev, &hr_qp->rdb); hns_roce_free_db(hr_dev, &hr_qp->rdb);
......
...@@ -77,10 +77,12 @@ enum hns_roce_qp_cap_flags { ...@@ -77,10 +77,12 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
}; };
struct hns_roce_ib_create_qp_resp { struct hns_roce_ib_create_qp_resp {
__aligned_u64 cap_flags; __aligned_u64 cap_flags;
__aligned_u64 dwqe_mmap_key;
}; };
struct hns_roce_ib_alloc_ucontext_resp { struct hns_roce_ib_alloc_ucontext_resp {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment