Commit 31c5ba6c authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo

perf vendor events intel: Update metrics to detect pmem at runtime

By detecting whether nvdimms are installed at runtime the number of
events can be reduced if it isn't. These changes come from this PR:
https://github.com/intel/perfmon/pull/63Reviewed-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarIan Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Edward Baker <edward.baker@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Samantha Alt <samantha.alt@intel.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20230324072218.181880-2-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent c3bf86f1
...@@ -201,7 +201,7 @@ ...@@ -201,7 +201,7 @@
{ {
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS", "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound", "MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
...@@ -933,7 +933,7 @@ ...@@ -933,7 +933,7 @@
}, },
{ {
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]", "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@", "MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC", "MetricGroup": "Mem;MemoryLat;Server;SoC",
"MetricName": "tma_info_mem_pmm_read_latency", "MetricName": "tma_info_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches" "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
...@@ -998,13 +998,13 @@ ...@@ -998,13 +998,13 @@
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_read_bw" "MetricName": "tma_info_pmm_read_bw"
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_write_bw" "MetricName": "tma_info_pmm_write_bw"
}, },
...@@ -1310,7 +1310,7 @@ ...@@ -1310,7 +1310,7 @@
{ {
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a", "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
"MetricConstraint": "NO_GROUP_EVENTS", "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)", "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound", "MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
......
...@@ -186,7 +186,7 @@ ...@@ -186,7 +186,7 @@
{ {
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS", "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound", "MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
...@@ -918,7 +918,7 @@ ...@@ -918,7 +918,7 @@
}, },
{ {
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]", "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@", "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC", "MetricGroup": "Mem;MemoryLat;Server;SoC",
"MetricName": "tma_info_mem_pmm_read_latency", "MetricName": "tma_info_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches" "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
...@@ -984,13 +984,13 @@ ...@@ -984,13 +984,13 @@
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_read_bw" "MetricName": "tma_info_pmm_read_bw"
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_write_bw" "MetricName": "tma_info_pmm_write_bw"
}, },
...@@ -1298,7 +1298,7 @@ ...@@ -1298,7 +1298,7 @@
}, },
{ {
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a", "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)", "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound", "MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
......
...@@ -185,7 +185,7 @@ ...@@ -185,7 +185,7 @@
{ {
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS", "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks - tma_pmm_bound", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound", "MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
...@@ -968,7 +968,7 @@ ...@@ -968,7 +968,7 @@
}, },
{ {
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]", "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@", "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC", "MetricGroup": "Mem;MemoryLat;Server;SoC",
"MetricName": "tma_info_mem_pmm_read_latency", "MetricName": "tma_info_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches" "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
...@@ -1034,13 +1034,13 @@ ...@@ -1034,13 +1034,13 @@
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_read_bw" "MetricName": "tma_info_pmm_read_bw"
}, },
{ {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time", "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC", "MetricGroup": "Mem;MemoryBW;Server;SoC",
"MetricName": "tma_info_pmm_write_bw" "MetricName": "tma_info_pmm_write_bw"
}, },
...@@ -1406,7 +1406,7 @@ ...@@ -1406,7 +1406,7 @@
}, },
{ {
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a", "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)", "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound", "MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment