Commit 8bfc20ba authored by Namhyung Kim's avatar Namhyung Kim Committed by Peter Zijlstra

perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src

Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its
limitations. Mainly:

1. mem_lvl_num doesn't allow setting multiple sources whereas old API
   allows it. Setting multiple data sources is useful because IBS on
   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
   is only one such DataSrc(2h) though).
2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
   c2c (c2c_decode_stats()) does not use mem_lvl_num at all.

1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf
tool specific issue and should be fixed separately.
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Signed-off-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com
parent 5c6e623f
...@@ -728,7 +728,37 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) ...@@ -728,7 +728,37 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
return op_data2->data_src_lo; return op_data2->data_src_lo;
} }
static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, #define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
#define LN(x) PERF_MEM_S(LVLNUM, x)
#define REM PERF_MEM_S(REMOTE, REMOTE)
#define HOPS(x) PERF_MEM_S(HOPS, x)
static u64 g_data_src[8] = {
[IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
[IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM),
[IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
[IBS_DATA_SRC_IO] = L(IO) | LN(IO),
};
#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x))
static u64 g_zen4_data_src[32] = {
[IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3),
[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
[IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM),
[IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
[IBS_DATA_SRC_EXT_PMEM] = LN(PMEM),
[IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO),
[IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL),
};
#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
(1 << IBS_DATA_SRC_EXT_PMEM) | \
(1 << IBS_DATA_SRC_EXT_EXT_MEM))
#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x))
static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
union ibs_op_data3 *op_data3, union ibs_op_data3 *op_data3,
struct perf_sample_data *data) struct perf_sample_data *data)
{ {
...@@ -736,30 +766,25 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, ...@@ -736,30 +766,25 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
u8 ibs_data_src = perf_ibs_data_src(op_data2); u8 ibs_data_src = perf_ibs_data_src(op_data2);
data_src->mem_lvl = 0; data_src->mem_lvl = 0;
data_src->mem_lvl_num = 0;
/* /*
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
* memory accesses. So, check DcUcMemAcc bit early. * memory accesses. So, check DcUcMemAcc bit early.
*/ */
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; return L(UNC) | LN(UNC);
return;
}
/* L1 Hit */ /* L1 Hit */
if (op_data3->dc_miss == 0) { if (op_data3->dc_miss == 0)
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; return L(L1) | LN(L1);
return;
}
/* L2 Hit */ /* L2 Hit */
if (op_data3->l2_miss == 0) { if (op_data3->l2_miss == 0) {
/* Erratum #1293 */ /* Erratum #1293 */
if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; return L(L2) | LN(L2);
return;
}
} }
/* /*
...@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, ...@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
if (data_src->mem_op != PERF_MEM_OP_LOAD) if (data_src->mem_op != PERF_MEM_OP_LOAD)
goto check_mab; goto check_mab;
/* L3 Hit */
if (ibs_caps & IBS_CAPS_ZEN4) { if (ibs_caps & IBS_CAPS_ZEN4) {
if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { u64 val = g_zen4_data_src[ibs_data_src];
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
return;
}
} else {
if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
PERF_MEM_LVL_HIT;
return;
}
}
/* A peer cache in a near CCX */ if (!val)
if (ibs_caps & IBS_CAPS_ZEN4 && goto check_mab;
ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
return;
}
/* A peer cache in a far CCX */
if (ibs_caps & IBS_CAPS_ZEN4) {
if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
return;
}
} else {
if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
return;
}
}
/* DRAM */ /* HOPS_1 because IBS doesn't provide remote socket detail */
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
if (op_data2->rmt_node == 0) if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
else else
data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; val |= REM | HOPS(1);
return;
} }
/* PMEM */ return val;
if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { } else {
data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; u64 val = g_data_src[ibs_data_src];
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
}
return;
}
/* Extension Memory */ if (!val)
if (ibs_caps & IBS_CAPS_ZEN4 && goto check_mab;
ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
if (op_data2->rmt_node) {
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
}
return;
}
/* IO */ /* HOPS_1 because IBS doesn't provide remote socket detail */
if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
data_src->mem_lvl = PERF_MEM_LVL_IO; if (ibs_data_src == IBS_DATA_SRC_DRAM)
data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
if (op_data2->rmt_node) { else
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; val |= REM | HOPS(1);
/* IBS doesn't provide Remote socket detail */
data_src->mem_hops = PERF_MEM_HOPS_1;
} }
return;
return val;
} }
check_mab: check_mab:
...@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, ...@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
* MAB only when IBS fails to provide DataSrc. * MAB only when IBS fails to provide DataSrc.
*/ */
if (op_data3->dc_miss_no_mab_alloc) { if (op_data3->dc_miss_no_mab_alloc)
data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; return L(LFB) | LN(LFB);
return;
}
data_src->mem_lvl = PERF_MEM_LVL_NA; /* Don't set HIT with NA */
return PERF_MEM_S(LVL, NA) | LN(NA);
} }
static bool perf_ibs_cache_hit_st_valid(void) static bool perf_ibs_cache_hit_st_valid(void)
...@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, ...@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
union ibs_op_data2 *op_data2, union ibs_op_data2 *op_data2,
union ibs_op_data3 *op_data3) union ibs_op_data3 *op_data3)
{ {
perf_ibs_get_mem_lvl(op_data2, op_data3, data); union perf_mem_data_src *data_src = &data->data_src;
data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
perf_ibs_get_mem_snoop(op_data2, data); perf_ibs_get_mem_snoop(op_data2, data);
perf_ibs_get_tlb_lvl(op_data3, data); perf_ibs_get_tlb_lvl(op_data3, data);
perf_ibs_get_mem_lock(op_data3, data); perf_ibs_get_mem_lock(op_data3, data);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment