Commit d7d213e0 authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo

perf report: Support Retire Latency

The Retire Latency field is added in the var3_w of the
PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of
this instruction compared to the previous instruction in cycles.  That's
quite useful to display the information with perf mem report.

The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc
and retire_lat to share the code.

Implement X86 specific codes to display the X86 specific header.

Add a new sort key retire_lat for the Retire Latency.
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20230104201349.1451191-8-kan.liang@linux.intel.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent ebab2916
...@@ -115,6 +115,8 @@ OPTIONS ...@@ -115,6 +115,8 @@ OPTIONS
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a - p_stage_cyc: On powerpc, this presents the number of cycles spent in a
pipeline stage. And currently supported only on powerpc. pipeline stage. And currently supported only on powerpc.
- addr: (Full) virtual address of the sampled instruction - addr: (Full) virtual address of the sampled instruction
- retire_lat: On X86, this reports pipeline stall of this instruction compared
to the previous instruction in cycles. And currently supported only on X86
By default, comm, dso and symbol keys are used. By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol) (i.e. --sort comm,dso,symbol)
......
...@@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data, ...@@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
else { else {
data->weight = weight.var1_dw; data->weight = weight.var1_dw;
data->ins_lat = weight.var2_w; data->ins_lat = weight.var2_w;
data->retire_lat = weight.var3_w;
} }
} }
...@@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, ...@@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
*array |= ((u64)data->ins_lat << 32); *array |= ((u64)data->ins_lat << 32);
} }
} }
const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
return "Local Retire Latency";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
return "Retire Latency";
return se_header;
}
int arch_support_sort_key(const char *sort_key)
{
if (!strcmp(sort_key, "p_stage_cyc"))
return 1;
if (!strcmp(sort_key, "local_p_stage_cyc"))
return 1;
return 0;
}
...@@ -92,7 +92,10 @@ struct perf_sample { ...@@ -92,7 +92,10 @@ struct perf_sample {
u8 cpumode; u8 cpumode;
u16 misc; u16 misc;
u16 ins_lat; u16 ins_lat;
union {
u16 p_stage_cyc; u16 p_stage_cyc;
u16 retire_lat;
};
bool no_hw_idx; /* No hw_idx collected in branch_stack */ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN]; char insn[MAX_INSN];
void *raw_data; void *raw_data;
......
...@@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = { ...@@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_ADDR, "addr", sort_addr),
DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
}; };
#undef DIM #undef DIM
......
...@@ -237,6 +237,8 @@ enum sort_type { ...@@ -237,6 +237,8 @@ enum sort_type {
SORT_LOCAL_PIPELINE_STAGE_CYC, SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC, SORT_GLOBAL_PIPELINE_STAGE_CYC,
SORT_ADDR, SORT_ADDR,
SORT_LOCAL_RETIRE_LAT,
SORT_GLOBAL_RETIRE_LAT,
/* branch stack specific sort keys */ /* branch stack specific sort keys */
__SORT_BRANCH_STACK, __SORT_BRANCH_STACK,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment