Commit 06e5ca74 authored by Athira Rajeev's avatar Athira Rajeev Committed by Arnaldo Carvalho de Melo

perf tools: Support pipeline stage cycles for powerpc

The pipeline stage cycles details can be recorded on powerpc from the
contents of Performance Monitor Unit (PMU) registers. On ISA v3.1
platform, sampling registers exposes the cycles spent in different
pipeline stages. Patch adds perf tools support to present two of the
cycle counter information along with memory latency (weight).

Re-use the field 'ins_lat' for storing the first pipeline stage cycle.
This is stored in 'var2_w' field of 'perf_sample_weight'.

Add a new field 'p_stage_cyc' to store the second pipeline stage cycle
which is stored in 'var3_w' field of perf_sample_weight.

Add new sort function 'Pipeline Stage Cycle' and include this in
default_mem_sort_order[]. This new sort function may be used to denote
some other pipeline stage in another architecture. So add this to list
of sort entries that can have dynamic header string.
Signed-off-by: default avatarAthira Rajeev <atrajeev@linux.vnet.ibm.com>
Reviewed-by: default avatarMadhavan Srinivasan <maddy@linux.ibm.com>
Acked-by: default avatarJiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Link: https://lore.kernel.org/r/1616425047-1666-5-git-send-email-atrajeev@linux.vnet.ibm.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent ff0bd0a3
...@@ -112,6 +112,8 @@ OPTIONS ...@@ -112,6 +112,8 @@ OPTIONS
- ins_lat: Instruction latency in core cycles. This is the global instruction - ins_lat: Instruction latency in core cycles. This is the global instruction
latency latency
- local_ins_lat: Local instruction latency version - local_ins_lat: Local instruction latency version
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
pipeline stage. And currently supported only on powerpc.
By default, comm, dso and symbol keys are used. By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol) (i.e. --sort comm,dso,symbol)
......
...@@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data, ...@@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
weight.full = *array; weight.full = *array;
if (type & PERF_SAMPLE_WEIGHT) if (type & PERF_SAMPLE_WEIGHT)
data->weight = weight.full; data->weight = weight.full;
else else {
data->weight = weight.var1_dw; data->weight = weight.var1_dw;
data->ins_lat = weight.var2_w;
data->p_stage_cyc = weight.var3_w;
}
} }
void arch_perf_synthesize_sample_weight(const struct perf_sample *data, void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
...@@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, ...@@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
{ {
*array = data->weight; *array = data->weight;
if (type & PERF_SAMPLE_WEIGHT_STRUCT) if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
*array &= 0xffffffff; *array &= 0xffffffff;
*array |= ((u64)data->ins_lat << 32);
}
}
const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local INSTR Latency"))
return "Finish Cyc";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
return "Dispatch Cyc";
return se_header;
} }
...@@ -147,6 +147,7 @@ struct perf_sample { ...@@ -147,6 +147,7 @@ struct perf_sample {
u8 cpumode; u8 cpumode;
u16 misc; u16 misc;
u16 ins_lat; u16 ins_lat;
u16 p_stage_cyc;
bool no_hw_idx; /* No hw_idx collected in branch_stack */ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN]; char insn[MAX_INSN];
void *raw_data; void *raw_data;
......
...@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
if (symbol_conf.nanosecs) if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16); hists__new_col_len(hists, HISTC_TIME, 16);
else else
...@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime) ...@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime)
} }
static void he_stat__add_period(struct he_stat *he_stat, u64 period, static void he_stat__add_period(struct he_stat *he_stat, u64 period,
u64 weight, u64 ins_lat) u64 weight, u64 ins_lat, u64 p_stage_cyc)
{ {
he_stat->period += period; he_stat->period += period;
he_stat->weight += weight; he_stat->weight += weight;
he_stat->nr_events += 1; he_stat->nr_events += 1;
he_stat->ins_lat += ins_lat; he_stat->ins_lat += ins_lat;
he_stat->p_stage_cyc += p_stage_cyc;
} }
static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
...@@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) ...@@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->nr_events += src->nr_events; dest->nr_events += src->nr_events;
dest->weight += src->weight; dest->weight += src->weight;
dest->ins_lat += src->ins_lat; dest->ins_lat += src->ins_lat;
dest->p_stage_cyc += src->p_stage_cyc;
} }
static void he_stat__decay(struct he_stat *he_stat) static void he_stat__decay(struct he_stat *he_stat)
...@@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, ...@@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
u64 period = entry->stat.period; u64 period = entry->stat.period;
u64 weight = entry->stat.weight; u64 weight = entry->stat.weight;
u64 ins_lat = entry->stat.ins_lat; u64 ins_lat = entry->stat.ins_lat;
u64 p_stage_cyc = entry->stat.p_stage_cyc;
bool leftmost = true; bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node; p = &hists->entries_in->rb_root.rb_node;
...@@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, ...@@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) { if (!cmp) {
if (sample_self) { if (sample_self) {
he_stat__add_period(&he->stat, period, weight, ins_lat); he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
hist_entry__add_callchain_period(he, period); hist_entry__add_callchain_period(he, period);
} }
if (symbol_conf.cumulate_callchain) if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period, weight, ins_lat); he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
/* /*
* This mem info was allocated from sample__resolve_mem * This mem info was allocated from sample__resolve_mem
...@@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists, ...@@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists,
.period = sample->period, .period = sample->period,
.weight = sample->weight, .weight = sample->weight,
.ins_lat = sample->ins_lat, .ins_lat = sample->ins_lat,
.p_stage_cyc = sample->p_stage_cyc,
}, },
.parent = sym_parent, .parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered, .filtered = symbol__parent_filter(sym_parent) | al->filtered,
......
...@@ -75,6 +75,7 @@ enum hist_column { ...@@ -75,6 +75,7 @@ enum hist_column {
HISTC_MEM_BLOCKED, HISTC_MEM_BLOCKED,
HISTC_LOCAL_INS_LAT, HISTC_LOCAL_INS_LAT,
HISTC_GLOBAL_INS_LAT, HISTC_GLOBAL_INS_LAT,
HISTC_P_STAGE_CYC,
HISTC_NR_COLS, /* Last entry */ HISTC_NR_COLS, /* Last entry */
}; };
......
...@@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, ...@@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
printf("... weight: %" PRIu64 "", sample->weight); printf("... weight: %" PRIu64 "", sample->weight);
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
printf(",0x%"PRIx16"", sample->ins_lat); printf(",0x%"PRIx16"", sample->ins_lat);
printf(",0x%"PRIx16"", sample->p_stage_cyc);
}
printf("\n"); printf("\n");
} }
......
...@@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; ...@@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern; const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol"; const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
const char default_top_sort_order[] = "dso,symbol"; const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace"; const char default_tracepoint_sort_order[] = "trace";
...@@ -46,7 +46,7 @@ const char *field_order; ...@@ -46,7 +46,7 @@ const char *field_order;
regex_t ignore_callees_regex; regex_t ignore_callees_regex;
int have_ignore_callees = 0; int have_ignore_callees = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL; enum sort_mode sort__mode = SORT_MODE__NORMAL;
const char *dynamic_headers[] = {"local_ins_lat"}; const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
/* /*
* Replaces all occurrences of a char used with the: * Replaces all occurrences of a char used with the:
...@@ -1410,6 +1410,25 @@ struct sort_entry sort_global_ins_lat = { ...@@ -1410,6 +1410,25 @@ struct sort_entry sort_global_ins_lat = {
.se_width_idx = HISTC_GLOBAL_INS_LAT, .se_width_idx = HISTC_GLOBAL_INS_LAT,
}; };
static int64_t
sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->stat.p_stage_cyc - right->stat.p_stage_cyc;
}
static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc);
}
struct sort_entry sort_p_stage_cyc = {
.se_header = "Pipeline Stage Cycle",
.se_cmp = sort__global_p_stage_cyc_cmp,
.se_snprintf = hist_entry__p_stage_cyc_snprintf,
.se_width_idx = HISTC_P_STAGE_CYC,
};
struct sort_entry sort_mem_daddr_sym = { struct sort_entry sort_mem_daddr_sym = {
.se_header = "Data Symbol", .se_header = "Data Symbol",
.se_cmp = sort__daddr_cmp, .se_cmp = sort__daddr_cmp,
...@@ -1853,6 +1872,7 @@ static struct sort_dimension common_sort_dimensions[] = { ...@@ -1853,6 +1872,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
}; };
#undef DIM #undef DIM
......
...@@ -51,6 +51,7 @@ struct he_stat { ...@@ -51,6 +51,7 @@ struct he_stat {
u64 period_guest_us; u64 period_guest_us;
u64 weight; u64 weight;
u64 ins_lat; u64 ins_lat;
u64 p_stage_cyc;
u32 nr_events; u32 nr_events;
}; };
...@@ -234,6 +235,7 @@ enum sort_type { ...@@ -234,6 +235,7 @@ enum sort_type {
SORT_CODE_PAGE_SIZE, SORT_CODE_PAGE_SIZE,
SORT_LOCAL_INS_LAT, SORT_LOCAL_INS_LAT,
SORT_GLOBAL_INS_LAT, SORT_GLOBAL_INS_LAT,
SORT_PIPELINE_STAGE_CYC,
/* branch stack specific sort keys */ /* branch stack specific sort keys */
__SORT_BRANCH_STACK, __SORT_BRANCH_STACK,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment