Commit bfefae9f authored by Lang Cheng's avatar Lang Cheng Committed by Jason Gunthorpe

RDMA/hns: Add support for CQ stash

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. CQs need to enable stash by default.

Link: https://lore.kernel.org/r/1606374251-21512-2-git-send-email-liweihang@huawei.comSigned-off-by: default avatarLang Cheng <chenglang@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 71586dd2
...@@ -53,6 +53,18 @@ ...@@ -53,6 +53,18 @@
#define roce_set_bit(origin, shift, val) \ #define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val)) roce_set_field((origin), (1ul << (shift)), (shift), (val))
#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
({ \
const field_type *_ptr = ptr; \
*((__le32 *)_ptr + (field_h) / 32) |= \
cpu_to_le32(BIT((field_l) % 32)) + \
BUILD_BUG_ON_ZERO((field_h) != (field_l)); \
})
#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
......
...@@ -225,6 +225,7 @@ enum { ...@@ -225,6 +225,7 @@ enum {
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17),
}; };
#define HNS_ROCE_DB_TYPE_COUNT 2 #define HNS_ROCE_DB_TYPE_COUNT 2
......
...@@ -3168,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, ...@@ -3168,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
HNS_ROCE_V3_CQE_SIZE ? 1 : 0); HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
hr_reg_enable(cq_context, CQC_STASH);
cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
roce_set_field(cq_context->byte_16_hop_addr, roce_set_field(cq_context->byte_16_hop_addr,
......
...@@ -284,6 +284,7 @@ struct hns_roce_v2_cq_context { ...@@ -284,6 +284,7 @@ struct hns_roce_v2_cq_context {
__le32 cqe_report_timer; __le32 cqe_report_timer;
__le32 byte_64_se_cqe_idx; __le32 byte_64_se_cqe_idx;
}; };
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
...@@ -360,6 +361,10 @@ struct hns_roce_v2_cq_context { ...@@ -360,6 +361,10 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
#define CQC_STASH CQC_FIELD_LOC(63, 63)
struct hns_roce_srq_context { struct hns_roce_srq_context {
__le32 byte_4_srqn_srqst; __le32 byte_4_srqn_srqst;
__le32 byte_8_limit_wl; __le32 byte_8_limit_wl;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment