Commit 9b261365 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core

Pull perf/core improvements and fixes from Jiri Olsa:

  * Add support to accumulate hist periods (Namhyung Kim)
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e450f90e 0506aecc
...@@ -111,7 +111,7 @@ OPTIONS ...@@ -111,7 +111,7 @@ OPTIONS
--fields=:: --fields=::
Specify output field - multiple keys can be specified in CSV format. Specify output field - multiple keys can be specified in CSV format.
Following fields are available: Following fields are available:
overhead, overhead_sys, overhead_us, sample and period. overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s). Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended By default, every sort keys not specified in -F will be appended
...@@ -163,6 +163,11 @@ OPTIONS ...@@ -163,6 +163,11 @@ OPTIONS
Default: fractal,0.5,callee,function. Default: fractal,0.5,callee,function.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
--max-stack:: --max-stack::
Set the stack depth limit when parsing the callchain, anything Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off beyond the specified depth will be ignored. This is a trade-off
......
...@@ -119,7 +119,7 @@ Default is to monitor all CPUS. ...@@ -119,7 +119,7 @@ Default is to monitor all CPUS.
--fields=:: --fields=::
Specify output field - multiple keys can be specified in CSV format. Specify output field - multiple keys can be specified in CSV format.
Following fields are available: Following fields are available:
overhead, overhead_sys, overhead_us, sample and period. overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s). Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended By default, every sort keys not specified in --field will be appended
...@@ -161,6 +161,12 @@ Default is to monitor all CPUS. ...@@ -161,6 +161,12 @@ Default is to monitor all CPUS.
Setup and enable call-graph (stack chain/backtrace) recording, Setup and enable call-graph (stack chain/backtrace) recording,
implies -g. implies -g.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled.
--max-stack:: --max-stack::
Set the stack depth limit when parsing the callchain, anything Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off beyond the specified depth will be ignored. This is a trade-off
......
...@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o ...@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
LIB_OBJS += $(OUTPUT)tests/python-use.o LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
......
...@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, ...@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0; return 0;
} }
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
true);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); hists__inc_nr_samples(&evsel->hists, true);
return ret; return ret;
} }
......
...@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists, ...@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction) u64 weight, u64 transaction)
{ {
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
transaction) != NULL) transaction, true) != NULL)
return 0; return 0;
return -ENOMEM; return -ENOMEM;
} }
......
...@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb) ...@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL); rep->min_percent = strtof(value, NULL);
return 0; return 0;
} }
if (!strcmp(var, "report.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb); return perf_default_config(var, value, cb);
} }
...@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he) ...@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
*/ */
if (he->stat.nr_events == 1) if (he->stat.nr_events == 1)
rep->nr_entries++; rep->nr_entries++;
/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
} }
static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct perf_sample *sample, struct perf_evsel *evsel) struct addr_location *al, bool single,
void *arg)
{ {
struct symbol *parent = NULL; int err = 0;
struct hist_entry *he; struct report *rep = arg;
struct mem_info *mi, *mx; struct hist_entry *he = iter->he;
uint64_t cost; struct perf_evsel *evsel = iter->evsel;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); struct mem_info *mi;
struct branch_info *bi;
if (err)
return err;
mi = sample__resolve_mem(sample, al); report__inc_stats(rep, he);
if (!mi)
return -ENOMEM;
if (rep->hide_unresolved && !al->sym) if (!ui__has_annotation())
return 0; return 0;
cost = sample->weight; if (sort__mode == SORT_MODE__BRANCH) {
if (!cost) bi = he->branch_info;
cost = 1; err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
cost, cost, 0);
if (!he)
return -ENOMEM;
if (ui__has_annotation()) {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
if (err)
goto out;
mx = he->mem_info;
err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
if (err) if (err)
goto out; goto out;
}
report__inc_stats(rep, he);
err = hist_entry__append_callchain(he, sample);
out:
return err;
}
static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
struct symbol *parent = NULL;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err) err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
return err;
bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
err = -ENOMEM; } else if (rep->mem_mode) {
mi = he->mem_info;
/* overwrite the 'al' to branch-to info */ err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
al->map = bi[i].to.map; if (err)
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
1, 1, 0);
if (he) {
if (ui__has_annotation()) {
bx = he->branch_info;
err = addr_map_symbol__inc_samples(&bx->from,
evsel->idx);
if (err)
goto out;
err = addr_map_symbol__inc_samples(&bx->to,
evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
} else
goto out; goto out;
}
err = 0;
out:
free(bi);
return err;
}
static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
struct addr_location *al, struct perf_sample *sample)
{
struct symbol *parent = NULL;
struct hist_entry *he;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
if (he == NULL)
return -ENOMEM;
err = hist_entry__append_callchain(he, sample);
if (err)
goto out;
if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
report__inc_stats(rep, he); } else if (symbol_conf.cumulate_callchain) {
if (single)
err = hist_entry__inc_addr_samples(he, evsel->idx,
al->addr);
} else {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}
out: out:
return err; return err;
} }
static int process_sample_event(struct perf_tool *tool, static int process_sample_event(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_sample *sample, struct perf_sample *sample,
...@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool, ...@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
{ {
struct report *rep = container_of(tool, struct report, tool); struct report *rep = container_of(tool, struct report, tool);
struct addr_location al; struct addr_location al;
struct hist_entry_iter iter = {
.hide_unresolved = rep->hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret; int ret;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
...@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool, ...@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0; return 0;
if (sort__mode == SORT_MODE__BRANCH) { if (sort__mode == SORT_MODE__BRANCH)
ret = report__add_branch_hist_entry(rep, &al, sample, evsel); iter.ops = &hist_iter_branch;
if (ret < 0) else if (rep->mem_mode)
pr_debug("problem adding lbr entry, skipping event\n"); iter.ops = &hist_iter_mem;
} else if (rep->mem_mode == 1) { else if (symbol_conf.cumulate_callchain)
ret = report__add_mem_hist_entry(rep, &al, sample, evsel); iter.ops = &hist_iter_cumulative;
if (ret < 0) else
pr_debug("problem adding mem entry, skipping event\n"); iter.ops = &hist_iter_normal;
} else {
if (al.map != NULL) if (al.map != NULL)
al.map->dso->hit = 1; al.map->dso->hit = 1;
ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
ret = report__add_hist_entry(rep, evsel, &al, sample);
if (ret < 0)
pr_debug("problem incrementing symbol period, skipping event\n");
}
return ret; return ret;
} }
...@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep) ...@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
} }
} }
if (symbol_conf.cumulate_callchain) {
/* Silently ignore if callchain is missing */
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
}
if (sort__mode == SORT_MODE__BRANCH) { if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe && if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) { !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
...@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack, OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, " "Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. " "anything beyond the specified depth will be ignored. "
...@@ -804,8 +719,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -804,8 +719,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header, has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK); HEADER_BRANCH_STACK);
if (branch_mode == -1 && has_br_stack) if (branch_mode == -1 && has_br_stack) {
sort__mode = SORT_MODE__BRANCH; sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
if (report.mem_mode) { if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) { if (sort__mode == SORT_MODE__BRANCH) {
...@@ -813,6 +730,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -813,6 +730,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
goto error; goto error;
} }
sort__mode = SORT_MODE__MEMORY; sort__mode = SORT_MODE__MEMORY;
symbol_conf.cumulate_callchain = false;
} }
if (setup_sorting() < 0) { if (setup_sorting() < 0) {
......
...@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ ...@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
int err = 0; int err = 0;
evsel->hists.stats.total_period += sample->period; evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); hists__inc_nr_samples(&evsel->hists, true);
if (evsel->handler != NULL) { if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler; tracepoint_handler f = evsel->handler;
......
...@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top, ...@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
pthread_mutex_unlock(&notes->lock); pthread_mutex_unlock(&notes->lock);
/*
* This function is now called with he->hists->lock held.
* Release it before going to sleep.
*/
pthread_mutex_unlock(&he->hists->lock);
if (err == -ERANGE && !he->ms.map->erange_warned) if (err == -ERANGE && !he->ms.map->erange_warned)
ui__warn_map_erange(he->ms.map, sym, ip); ui__warn_map_erange(he->ms.map, sym, ip);
else if (err == -ENOMEM) { else if (err == -ENOMEM) {
...@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top, ...@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
sym->name); sym->name);
sleep(1); sleep(1);
} }
pthread_mutex_lock(&he->hists->lock);
} }
static void perf_top__show_details(struct perf_top *top) static void perf_top__show_details(struct perf_top *top)
...@@ -238,27 +246,6 @@ static void perf_top__show_details(struct perf_top *top) ...@@ -238,27 +246,6 @@ static void perf_top__show_details(struct perf_top *top)
pthread_mutex_unlock(&notes->lock); pthread_mutex_unlock(&notes->lock);
} }
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample)
{
struct hist_entry *he;
pthread_mutex_lock(&evsel->hists.lock);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
pthread_mutex_unlock(&evsel->hists.lock);
if (he == NULL)
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
evsel->hists.stats.nr_non_filtered_samples++;
return he;
}
static void perf_top__print_sym_table(struct perf_top *top) static void perf_top__print_sym_table(struct perf_top *top)
{ {
char bf[160]; char bf[160];
...@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) ...@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
return 0; return 0;
} }
static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct perf_top *top = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
if (sort__has_sym && single) {
u64 ip = al->addr;
if (al->map)
ip = al->map->unmap_ip(al->map, ip);
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
return 0;
}
static void perf_event__process_sample(struct perf_tool *tool, static void perf_event__process_sample(struct perf_tool *tool,
const union perf_event *event, const union perf_event *event,
struct perf_evsel *evsel, struct perf_evsel *evsel,
...@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool, ...@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
struct machine *machine) struct machine *machine)
{ {
struct perf_top *top = container_of(tool, struct perf_top, tool); struct perf_top *top = container_of(tool, struct perf_top, tool);
struct symbol *parent = NULL;
u64 ip = sample->ip;
struct addr_location al; struct addr_location al;
int err; int err;
...@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool, ...@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
} }
if (al.sym == NULL || !al.sym->ignore) { if (al.sym == NULL || !al.sym->ignore) {
struct hist_entry *he; struct hist_entry_iter iter = {
.add_entry_cb = hist_iter__top_callback,
};
err = sample__resolve_callchain(sample, &parent, evsel, &al, if (symbol_conf.cumulate_callchain)
top->max_stack); iter.ops = &hist_iter_cumulative;
if (err) else
return; iter.ops = &hist_iter_normal;
he = perf_evsel__add_hist_entry(evsel, &al, sample); pthread_mutex_lock(&evsel->hists.lock);
if (he == NULL) {
pr_err("Problem incrementing symbol period, skipping event\n");
return;
}
err = hist_entry__append_callchain(he, sample); err = hist_entry_iter__add(&iter, &al, evsel, sample,
if (err) top->max_stack, top);
return; if (err < 0)
pr_err("Problem incrementing symbol period, skipping event\n");
if (sort__has_sym) pthread_mutex_unlock(&evsel->hists.lock);
perf_top__record_precise_ip(top, he, evsel->idx, ip);
} }
return; return;
...@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb) ...@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb)
if (!strcmp(var, "top.call-graph")) if (!strcmp(var, "top.call-graph"))
return record_parse_callchain(value, &top->record_opts); return record_parse_callchain(value, &top->record_opts);
if (!strcmp(var, "top.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb); return perf_default_config(var, value, cb);
} }
...@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK(0, "call-graph", &top.record_opts, OPT_CALLBACK(0, "call-graph", &top.record_opts,
"mode[,dump_size]", record_callchain_help, "mode[,dump_size]", record_callchain_help,
&parse_callchain_opt), &parse_callchain_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack, OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. " "Set the maximum stack depth when parsing the callchain. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)), "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
...@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
top.sym_evsel = perf_evlist__first(top.evlist); top.sym_evsel = perf_evlist__first(top.evlist);
if (!symbol_conf.use_callchain) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
......
...@@ -139,6 +139,10 @@ static struct test { ...@@ -139,6 +139,10 @@ static struct test {
.desc = "Test output sorting of hist entries", .desc = "Test output sorting of hist entries",
.func = test__hists_output, .func = test__hists_output,
}, },
{
.desc = "Test cumulation of child hist entries",
.func = test__hists_cumulate,
},
{ {
.func = NULL, .func = NULL,
}, },
......
...@@ -12,9 +12,9 @@ static struct { ...@@ -12,9 +12,9 @@ static struct {
u32 pid; u32 pid;
const char *comm; const char *comm;
} fake_threads[] = { } fake_threads[] = {
{ 100, "perf" }, { FAKE_PID_PERF1, "perf" },
{ 200, "perf" }, { FAKE_PID_PERF2, "perf" },
{ 300, "bash" }, { FAKE_PID_BASH, "bash" },
}; };
static struct { static struct {
...@@ -22,15 +22,15 @@ static struct { ...@@ -22,15 +22,15 @@ static struct {
u64 start; u64 start;
const char *filename; const char *filename;
} fake_mmap_info[] = { } fake_mmap_info[] = {
{ 100, 0x40000, "perf" }, { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
{ 100, 0x50000, "libc" }, { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
{ 100, 0xf0000, "[kernel]" }, { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
{ 200, 0x40000, "perf" }, { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
{ 200, 0x50000, "libc" }, { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
{ 200, 0xf0000, "[kernel]" }, { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
{ 300, 0x40000, "bash" }, { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
{ 300, 0x50000, "libc" }, { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
{ 300, 0xf0000, "[kernel]" }, { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
}; };
struct fake_sym { struct fake_sym {
...@@ -40,27 +40,30 @@ struct fake_sym { ...@@ -40,27 +40,30 @@ struct fake_sym {
}; };
static struct fake_sym perf_syms[] = { static struct fake_sym perf_syms[] = {
{ 700, 100, "main" }, { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ 800, 100, "run_command" }, { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
{ 900, 100, "cmd_record" }, { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
}; };
static struct fake_sym bash_syms[] = { static struct fake_sym bash_syms[] = {
{ 700, 100, "main" }, { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ 800, 100, "xmalloc" }, { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
{ 900, 100, "xfree" }, { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
}; };
static struct fake_sym libc_syms[] = { static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" }, { 700, 100, "malloc" },
{ 800, 100, "free" }, { 800, 100, "free" },
{ 900, 100, "realloc" }, { 900, 100, "realloc" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
}; };
static struct fake_sym kernel_syms[] = { static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" }, { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
{ 800, 100, "page_fault" }, { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
{ 900, 100, "sys_perf_event_open" }, { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
}; };
static struct { static struct {
...@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines) ...@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines)
.pid = fake_mmap_info[i].pid, .pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start, .start = fake_mmap_info[i].start,
.len = 0x1000ULL, .len = FAKE_MAP_LENGTH,
.pgoff = 0ULL, .pgoff = 0ULL,
}, },
}; };
...@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists) ...@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists)
he = rb_entry(node, struct hist_entry, rb_node); he = rb_entry(node, struct hist_entry, rb_node);
if (!he->filtered) { if (!he->filtered) {
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
i, thread__comm_str(he->thread), he->thread->tid, i, thread__comm_str(he->thread), he->thread->tid,
he->ms.map->dso->short_name, he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period); he->ms.sym->name, he->stat.period,
he->stat_acc ? he->stat_acc->period : 0);
} }
i++; i++;
......
...@@ -4,6 +4,34 @@ ...@@ -4,6 +4,34 @@
struct machine; struct machine;
struct machines; struct machines;
#define FAKE_PID_PERF1 100
#define FAKE_PID_PERF2 200
#define FAKE_PID_BASH 300
#define FAKE_MAP_PERF 0x400000
#define FAKE_MAP_BASH 0x400000
#define FAKE_MAP_LIBC 0x500000
#define FAKE_MAP_KERNEL 0xf00000
#define FAKE_MAP_LENGTH 0x100000
#define FAKE_SYM_OFFSET1 700
#define FAKE_SYM_OFFSET2 800
#define FAKE_SYM_OFFSET3 900
#define FAKE_SYM_LENGTH 100
#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
/* /*
* The setup_fake_machine() provides a test environment which consists * The setup_fake_machine() provides a test environment which consists
* of 3 processes that have 3 mappings and in turn, have 3 symbols * of 3 processes that have 3 mappings and in turn, have 3 symbols
...@@ -13,7 +41,7 @@ struct machines; ...@@ -13,7 +41,7 @@ struct machines;
* ............. ............. ................... * ............. ............. ...................
* perf: 100 perf main * perf: 100 perf main
* perf: 100 perf run_command * perf: 100 perf run_command
* perf: 100 perf comd_record * perf: 100 perf cmd_record
* perf: 100 libc malloc * perf: 100 libc malloc
* perf: 100 libc free * perf: 100 libc free
* perf: 100 libc realloc * perf: 100 libc realloc
...@@ -22,7 +50,7 @@ struct machines; ...@@ -22,7 +50,7 @@ struct machines;
* perf: 100 [kernel] sys_perf_event_open * perf: 100 [kernel] sys_perf_event_open
* perf: 200 perf main * perf: 200 perf main
* perf: 200 perf run_command * perf: 200 perf run_command
* perf: 200 perf comd_record * perf: 200 perf cmd_record
* perf: 200 libc malloc * perf: 200 libc malloc
* perf: 200 libc free * perf: 200 libc free
* perf: 200 libc realloc * perf: 200 libc realloc
......
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "util/parse-events.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
struct sample {
u32 pid;
u64 ip;
struct thread *thread;
struct map *map;
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [libc] malloc() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [libc] free() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
/* perf [perf] main() */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [kernel] page_fault() */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [kernel] page_fault() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
/*
* Will be casted to struct ip_callchain which has all 64 bit entries
* of nr and ips[].
*/
static u64 fake_callchains[][10] = {
/* schedule => run_command => main */
{ 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_PERF_MAIN, },
/* cmd_record => run_command => main */
{ 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* malloc => cmd_record => run_command => main */
{ 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
FAKE_IP_PERF_MAIN, },
/* free => cmd_record => run_command => main */
{ 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_PERF_MAIN, },
/* page_fault => sys_perf_event_open => run_command => main */
{ 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_BASH_MAIN, },
/* xmalloc => malloc => xmalloc => malloc => xmalloc => main */
{ 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
/* page_fault => malloc => main */
{ 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
};
static int add_hist_entries(struct hists *hists, struct machine *machine)
{
struct addr_location al;
struct perf_evsel *evsel = hists_to_evsel(hists);
struct perf_sample sample = { .period = 1000, };
size_t i;
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
const union perf_event event = {
.header = {
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.hide_unresolved = false,
};
if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
sample.callchain = (struct ip_callchain *)fake_callchains[i];
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
}
return TEST_OK;
out:
pr_debug("Not enough memory for adding a hist entry\n");
return TEST_FAIL;
}
static void del_hist_entries(struct hists *hists)
{
struct hist_entry *he;
struct rb_root *root_in;
struct rb_root *root_out;
struct rb_node *node;
if (sort__need_collapse)
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
root_out = &hists->entries;
while (!RB_EMPTY_ROOT(root_out)) {
node = rb_first(root_out);
he = rb_entry(node, struct hist_entry, rb_node);
rb_erase(node, root_out);
rb_erase(&he->rb_node_in, root_in);
hist_entry__free(he);
}
}
typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
#define COMM(he) (thread__comm_str(he->thread))
#define DSO(he) (he->ms.map->dso->short_name)
#define SYM(he) (he->ms.sym->name)
#define CPU(he) (he->cpu)
#define PID(he) (he->thread->tid)
#define DEPTH(he) (he->callchain->max_depth)
#define CDSO(cl) (cl->ms.map->dso->short_name)
#define CSYM(cl) (cl->ms.sym->name)
struct result {
u64 children;
u64 self;
const char *comm;
const char *dso;
const char *sym;
};
struct callchain_result {
u64 nr;
struct {
const char *dso;
const char *sym;
} node[10];
};
static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
struct callchain_result *expected_callchain, size_t nr_callchain)
{
char buf[32];
size_t i, c;
struct hist_entry *he;
struct rb_root *root;
struct rb_node *node;
struct callchain_node *cnode;
struct callchain_list *clist;
/*
* adding and deleting hist entries must be done outside of this
* function since TEST_ASSERT_VAL() returns in case of failure.
*/
hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
if (verbose > 2) {
pr_info("use callchain: %d, cumulate callchain: %d\n",
symbol_conf.use_callchain,
symbol_conf.cumulate_callchain);
print_hists_out(hists);
}
root = &hists->entries;
for (node = rb_first(root), i = 0;
node && (he = rb_entry(node, struct hist_entry, rb_node));
node = rb_next(node), i++) {
scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
TEST_ASSERT_VAL("Incorrect number of hist entry",
i < nr_expected);
TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
!strcmp(COMM(he), expected[i].comm) &&
!strcmp(DSO(he), expected[i].dso) &&
!strcmp(SYM(he), expected[i].sym));
if (symbol_conf.cumulate_callchain)
TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
if (!symbol_conf.use_callchain)
continue;
/* check callchain entries */
root = &he->callchain->node.rb_root;
cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
c = 0;
list_for_each_entry(clist, &cnode->val, list) {
scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
TEST_ASSERT_VAL("Incorrect number of callchain entry",
c < expected_callchain[i].nr);
TEST_ASSERT_VAL(buf,
!strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
!strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
c++;
}
/* TODO: handle multiple child nodes properly */
TEST_ASSERT_VAL("Incorrect number of callchain entry",
c <= expected_callchain[i].nr);
}
TEST_ASSERT_VAL("Incorrect number of hist entry",
i == nr_expected);
TEST_ASSERT_VAL("Incorrect number of callchain entry",
!symbol_conf.use_callchain || nr_expected == nr_callchain);
return 0;
}
/* NO callchain + NO children */
static int test1(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Overhead Command Shared Object Symbol
* ======== ======= ============= ==============
* 20.00% perf perf [.] main
* 10.00% bash [kernel] [k] page_fault
* 10.00% bash bash [.] main
* 10.00% bash bash [.] xmalloc
* 10.00% perf [kernel] [k] page_fault
* 10.00% perf [kernel] [k] schedule
* 10.00% perf libc [.] free
* 10.00% perf libc [.] malloc
* 10.00% perf perf [.] cmd_record
*/
struct result expected[] = {
{ 0, 2000, "perf", "perf", "main" },
{ 0, 1000, "bash", "[kernel]", "page_fault" },
{ 0, 1000, "bash", "bash", "main" },
{ 0, 1000, "bash", "bash", "xmalloc" },
{ 0, 1000, "perf", "[kernel]", "page_fault" },
{ 0, 1000, "perf", "[kernel]", "schedule" },
{ 0, 1000, "perf", "libc", "free" },
{ 0, 1000, "perf", "libc", "malloc" },
{ 0, 1000, "perf", "perf", "cmd_record" },
};
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* callcain + NO children */
static int test2(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Overhead Command Shared Object Symbol
* ======== ======= ============= ==============
* 20.00% perf perf [.] main
* |
* --- main
*
* 10.00% bash [kernel] [k] page_fault
* |
* --- page_fault
* malloc
* main
*
* 10.00% bash bash [.] main
* |
* --- main
*
* 10.00% bash bash [.] xmalloc
* |
* --- xmalloc
* malloc
* xmalloc <--- NOTE: there's a cycle
* malloc
* xmalloc
* main
*
* 10.00% perf [kernel] [k] page_fault
* |
* --- page_fault
* sys_perf_event_open
* run_command
* main
*
* 10.00% perf [kernel] [k] schedule
* |
* --- schedule
* run_command
* main
*
* 10.00% perf libc [.] free
* |
* --- free
* cmd_record
* run_command
* main
*
* 10.00% perf libc [.] malloc
* |
* --- malloc
* cmd_record
* run_command
* main
*
* 10.00% perf perf [.] cmd_record
* |
* --- cmd_record
* run_command
* main
*
*/
struct result expected[] = {
{ 0, 2000, "perf", "perf", "main" },
{ 0, 1000, "bash", "[kernel]", "page_fault" },
{ 0, 1000, "bash", "bash", "main" },
{ 0, 1000, "bash", "bash", "xmalloc" },
{ 0, 1000, "perf", "[kernel]", "page_fault" },
{ 0, 1000, "perf", "[kernel]", "schedule" },
{ 0, 1000, "perf", "libc", "free" },
{ 0, 1000, "perf", "libc", "malloc" },
{ 0, 1000, "perf", "perf", "cmd_record" },
};
struct callchain_result expected_callchain[] = {
{
1, { { "perf", "main" }, },
},
{
3, { { "[kernel]", "page_fault" },
{ "libc", "malloc" },
{ "bash", "main" }, },
},
{
1, { { "bash", "main" }, },
},
{
6, { { "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" }, },
},
{
4, { { "[kernel]", "page_fault" },
{ "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "[kernel]", "schedule" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "free" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
};
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected),
expected_callchain, ARRAY_SIZE(expected_callchain));
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* NO callchain + children */
static int test3(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Children Self Command Shared Object Symbol
* ======== ======== ======= ============= =======================
* 70.00% 20.00% perf perf [.] main
* 50.00% 0.00% perf perf [.] run_command
* 30.00% 10.00% bash bash [.] main
* 30.00% 10.00% perf perf [.] cmd_record
* 20.00% 0.00% bash libc [.] malloc
* 10.00% 10.00% bash [kernel] [k] page_fault
* 10.00% 10.00% perf [kernel] [k] schedule
* 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
* 10.00% 10.00% perf [kernel] [k] page_fault
* 10.00% 10.00% perf libc [.] free
* 10.00% 10.00% perf libc [.] malloc
* 10.00% 10.00% bash bash [.] xmalloc
*/
struct result expected[] = {
{ 7000, 2000, "perf", "perf", "main" },
{ 5000, 0, "perf", "perf", "run_command" },
{ 3000, 1000, "bash", "bash", "main" },
{ 3000, 1000, "perf", "perf", "cmd_record" },
{ 2000, 0, "bash", "libc", "malloc" },
{ 1000, 1000, "bash", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "[kernel]", "schedule" },
{ 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
{ 1000, 1000, "perf", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "libc", "free" },
{ 1000, 1000, "perf", "libc", "malloc" },
{ 1000, 1000, "bash", "bash", "xmalloc" },
};
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* callchain + children */
static int test4(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Children Self Command Shared Object Symbol
* ======== ======== ======= ============= =======================
* 70.00% 20.00% perf perf [.] main
* |
* --- main
*
* 50.00% 0.00% perf perf [.] run_command
* |
* --- run_command
* main
*
* 30.00% 10.00% bash bash [.] main
* |
* --- main
*
* 30.00% 10.00% perf perf [.] cmd_record
* |
* --- cmd_record
* run_command
* main
*
* 20.00% 0.00% bash libc [.] malloc
* |
* --- malloc
* |
* |--50.00%-- xmalloc
* | main
* --50.00%-- main
*
* 10.00% 10.00% bash [kernel] [k] page_fault
* |
* --- page_fault
* malloc
* main
*
* 10.00% 10.00% perf [kernel] [k] schedule
* |
* --- schedule
* run_command
* main
*
* 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
* |
* --- sys_perf_event_open
* run_command
* main
*
* 10.00% 10.00% perf [kernel] [k] page_fault
* |
* --- page_fault
* sys_perf_event_open
* run_command
* main
*
* 10.00% 10.00% perf libc [.] free
* |
* --- free
* cmd_record
* run_command
* main
*
* 10.00% 10.00% perf libc [.] malloc
* |
* --- malloc
* cmd_record
* run_command
* main
*
* 10.00% 10.00% bash bash [.] xmalloc
* |
* --- xmalloc
* malloc
* xmalloc <--- NOTE: there's a cycle
* malloc
* xmalloc
* main
*
*/
struct result expected[] = {
{ 7000, 2000, "perf", "perf", "main" },
{ 5000, 0, "perf", "perf", "run_command" },
{ 3000, 1000, "bash", "bash", "main" },
{ 3000, 1000, "perf", "perf", "cmd_record" },
{ 2000, 0, "bash", "libc", "malloc" },
{ 1000, 1000, "bash", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "[kernel]", "schedule" },
{ 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
{ 1000, 1000, "perf", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "libc", "free" },
{ 1000, 1000, "perf", "libc", "malloc" },
{ 1000, 1000, "bash", "bash", "xmalloc" },
};
struct callchain_result expected_callchain[] = {
{
1, { { "perf", "main" }, },
},
{
2, { { "perf", "run_command" },
{ "perf", "main" }, },
},
{
1, { { "bash", "main" }, },
},
{
3, { { "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" },
{ "bash", "main" }, },
},
{
3, { { "[kernel]", "page_fault" },
{ "libc", "malloc" },
{ "bash", "main" }, },
},
{
3, { { "[kernel]", "schedule" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "[kernel]", "page_fault" },
{ "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "free" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
6, { { "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" }, },
},
};
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected),
expected_callchain, ARRAY_SIZE(expected_callchain));
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
int test__hists_cumulate(void)
{
int err = TEST_FAIL;
struct machines machines;
struct machine *machine;
struct perf_evsel *evsel;
struct perf_evlist *evlist = perf_evlist__new();
size_t i;
test_fn_t testcases[] = {
test1,
test2,
test3,
test4,
};
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
if (err)
goto out;
machines__init(&machines);
/* setup threads/dso/map/symbols also */
machine = setup_fake_machine(&machines);
if (!machine)
goto out;
if (verbose > 1)
machine__fprintf(machine, stderr);
evsel = perf_evlist__first(evlist);
for (i = 0; i < ARRAY_SIZE(testcases); i++) {
err = testcases[i](evsel, machine);
if (err < 0)
break;
}
out:
/* tear down everything */
perf_evlist__delete(evlist);
machines__exit(&machines);
return err;
}
...@@ -21,33 +21,33 @@ struct sample { ...@@ -21,33 +21,33 @@ struct sample {
/* For the numbers, see hists_common.c */ /* For the numbers, see hists_common.c */
static struct sample fake_samples[] = { static struct sample fake_samples[] = {
/* perf [kernel] schedule() */ /* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */ /* perf [perf] main() */
{ .pid = 100, .ip = 0x40000 + 700, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [libc] malloc() */ /* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [perf] main() */ /* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
/* perf [perf] cmd_record() */ /* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [kernel] page_fault() */ /* perf [kernel] page_fault() */
{ .pid = 200, .ip = 0xf0000 + 800, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */ /* bash [bash] main() */
{ .pid = 300, .ip = 0x40000 + 700, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */ /* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */ /* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
/* bash [kernel] page_fault() */ /* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
}; };
static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) static int add_hist_entries(struct perf_evlist *evlist,
struct machine *machine __maybe_unused)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
struct addr_location al; struct addr_location al;
struct hist_entry *he; struct perf_sample sample = { .period = 100, };
struct perf_sample sample = { .cpu = 0, };
size_t i; size_t i;
/* /*
...@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
.misc = PERF_RECORD_MISC_USER, .misc = PERF_RECORD_MISC_USER,
}, },
}; };
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
/* make sure it has no filter at first */ /* make sure it has no filter at first */
evsel->hists.thread_filter = NULL; evsel->hists.thread_filter = NULL;
...@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
&sample) < 0) &sample) < 0)
goto out; goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL, if (hist_entry_iter__add(&iter, &al, evsel, &sample,
NULL, NULL, 100, 1, 0); PERF_MAX_STACK_DEPTH, NULL) < 0)
if (he == NULL)
goto out; goto out;
fake_samples[i].thread = al.thread; fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map; fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym; fake_samples[i].sym = al.sym;
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
} }
} }
......
...@@ -21,41 +21,41 @@ struct sample { ...@@ -21,41 +21,41 @@ struct sample {
/* For the numbers, see hists_common.c */ /* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = { static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */ /* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */ /* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */ /* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* bash [bash] xmalloc() */ /* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */ /* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
}; };
static struct sample fake_samples[][5] = { static struct sample fake_samples[][5] = {
{ {
/* perf [perf] run_command() */ /* perf [perf] run_command() */
{ .pid = 100, .ip = 0x40000 + 800, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
/* perf [libc] malloc() */ /* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [kernel] page_fault() */ /* perf [kernel] page_fault() */
{ .pid = 100, .ip = 0xf0000 + 800, }, { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* perf [kernel] sys_perf_event_open() */ /* perf [kernel] sys_perf_event_open() */
{ .pid = 200, .ip = 0xf0000 + 900, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
/* bash [libc] free() */ /* bash [libc] free() */
{ .pid = 300, .ip = 0x50000 + 800, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
}, },
{ {
/* perf [libc] free() */ /* perf [libc] free() */
{ .pid = 200, .ip = 0x50000 + 800, }, { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
/* bash [libc] malloc() */ /* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
/* bash [bash] xfee() */ /* bash [bash] xfee() */
{ .pid = 300, .ip = 0x40000 + 900, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
/* bash [libc] realloc() */ /* bash [libc] realloc() */
{ .pid = 300, .ip = 0x50000 + 900, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
/* bash [kernel] page_fault() */ /* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, }, { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
}, },
}; };
...@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel; struct perf_evsel *evsel;
struct addr_location al; struct addr_location al;
struct hist_entry *he; struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, }; struct perf_sample sample = { .period = 1, };
size_t i = 0, k; size_t i = 0, k;
/* /*
...@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out; goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL, he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0); NULL, NULL, 1, 1, 0, true);
if (he == NULL) if (he == NULL)
goto out; goto out;
...@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out; goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL, he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0); NULL, NULL, 1, 1, 0, true);
if (he == NULL) if (he == NULL)
goto out; goto out;
......
...@@ -22,31 +22,31 @@ struct sample { ...@@ -22,31 +22,31 @@ struct sample {
/* For the numbers, see hists_common.c */ /* For the numbers, see hists_common.c */
static struct sample fake_samples[] = { static struct sample fake_samples[] = {
/* perf [kernel] schedule() */ /* perf [kernel] schedule() */
{ .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */ /* perf [perf] main() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */ /* perf [perf] cmd_record() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [libc] malloc() */ /* perf [libc] malloc() */
{ .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [libc] free() */ /* perf [libc] free() */
{ .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
/* perf [perf] main() */ /* perf [perf] main() */
{ .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [kernel] page_fault() */ /* perf [kernel] page_fault() */
{ .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */ /* bash [bash] main() */
{ .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */ /* bash [bash] xmalloc() */
{ .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [kernel] page_fault() */ /* bash [kernel] page_fault() */
{ .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
}; };
static int add_hist_entries(struct hists *hists, struct machine *machine) static int add_hist_entries(struct hists *hists, struct machine *machine)
{ {
struct addr_location al; struct addr_location al;
struct hist_entry *he; struct perf_evsel *evsel = hists_to_evsel(hists);
struct perf_sample sample = { .period = 100, }; struct perf_sample sample = { .period = 100, };
size_t i; size_t i;
...@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) ...@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
.misc = PERF_RECORD_MISC_USER, .misc = PERF_RECORD_MISC_USER,
}, },
}; };
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
sample.cpu = fake_samples[i].cpu; sample.cpu = fake_samples[i].cpu;
sample.pid = fake_samples[i].pid; sample.pid = fake_samples[i].pid;
...@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) ...@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
&sample) < 0) &sample) < 0)
goto out; goto out;
he = __hists__add_entry(hists, &al, NULL, NULL, NULL, if (hist_entry_iter__add(&iter, &al, evsel, &sample,
sample.period, 1, 0); PERF_MAX_STACK_DEPTH, NULL) < 0)
if (he == NULL)
goto out; goto out;
fake_samples[i].thread = al.thread; fake_samples[i].thread = al.thread;
......
...@@ -45,6 +45,7 @@ int test__hists_filter(void); ...@@ -45,6 +45,7 @@ int test__hists_filter(void);
int test__mmap_thread_lookup(void); int test__mmap_thread_lookup(void);
int test__thread_mg_share(void); int test__thread_mg_share(void);
int test__hists_output(void); int test__hists_output(void);
int test__hists_cumulate(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT #ifdef HAVE_DWARF_UNWIND_SUPPORT
......
...@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, ...@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
static void hist_browser__update_nr_entries(struct hist_browser *hb); static void hist_browser__update_nr_entries(struct hist_browser *hb);
static struct rb_node *hists__filter_entries(struct rb_node *nd, static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt); float min_pcnt);
static bool hist_browser__has_filter(struct hist_browser *hb) static bool hist_browser__has_filter(struct hist_browser *hb)
...@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) ...@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
struct hists *hists = browser->hists; struct hists *hists = browser->hists;
for (nd = rb_first(&hists->entries); for (nd = rb_first(&hists->entries);
(nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
nd = rb_next(nd)) { nd = rb_next(nd)) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
hist_entry__set_folding(he, unfold); hist_entry__set_folding(he, unfold);
...@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ ...@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
__hpp__slsmg_color_printf, true); \ __hpp__slsmg_color_printf, true); \
} }
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int \
hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
if (!symbol_conf.cumulate_callchain) { \
int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \
slsmg_printf("%s", hpp->buf); \
\
return ret; \
} \
return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \
__hpp__slsmg_color_printf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
void hist_browser__init_hpp(void) void hist_browser__init_hpp(void)
{ {
...@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void) ...@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void)
hist_browser__hpp_color_overhead_guest_sys; hist_browser__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
hist_browser__hpp_color_overhead_guest_us; hist_browser__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
hist_browser__hpp_color_overhead_acc;
} }
static int hist_browser__show_entry(struct hist_browser *browser, static int hist_browser__show_entry(struct hist_browser *browser,
...@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) ...@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
for (nd = browser->top; nd; nd = rb_next(nd)) { for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(h->hists); float percent;
float percent = 0.0;
if (h->filtered) if (h->filtered)
continue; continue;
if (total) percent = hist_entry__get_percent_limit(h);
percent = h->stat.period * 100.0 / total;
if (percent < hb->min_pcnt) if (percent < hb->min_pcnt)
continue; continue;
...@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) ...@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
} }
static struct rb_node *hists__filter_entries(struct rb_node *nd, static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt) float min_pcnt)
{ {
while (nd != NULL) { while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists); float percent = hist_entry__get_percent_limit(h);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
if (!h->filtered && percent >= min_pcnt) if (!h->filtered && percent >= min_pcnt)
return nd; return nd;
...@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, ...@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
} }
static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt) float min_pcnt)
{ {
while (nd != NULL) { while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists); float percent = hist_entry__get_percent_limit(h);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
if (!h->filtered && percent >= min_pcnt) if (!h->filtered && percent >= min_pcnt)
return nd; return nd;
...@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser, ...@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
switch (whence) { switch (whence) {
case SEEK_SET: case SEEK_SET:
nd = hists__filter_entries(rb_first(browser->entries), nd = hists__filter_entries(rb_first(browser->entries),
hb->hists, hb->min_pcnt); hb->min_pcnt);
break; break;
case SEEK_CUR: case SEEK_CUR:
nd = browser->top; nd = browser->top;
goto do_offset; goto do_offset;
case SEEK_END: case SEEK_END:
nd = hists__filter_prev_entries(rb_last(browser->entries), nd = hists__filter_prev_entries(rb_last(browser->entries),
hb->hists, hb->min_pcnt); hb->min_pcnt);
first = false; first = false;
break; break;
default: default:
...@@ -913,8 +924,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser, ...@@ -913,8 +924,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
break; break;
} }
} }
nd = hists__filter_entries(rb_next(nd), hb->hists, nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
hb->min_pcnt);
if (nd == NULL) if (nd == NULL)
break; break;
--offset; --offset;
...@@ -947,7 +957,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser, ...@@ -947,7 +957,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
} }
} }
nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, nd = hists__filter_prev_entries(rb_prev(nd),
hb->min_pcnt); hb->min_pcnt);
if (nd == NULL) if (nd == NULL)
break; break;
...@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, ...@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
{ {
struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
browser->hists,
browser->min_pcnt); browser->min_pcnt);
int printed = 0; int printed = 0;
...@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) ...@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
printed += hist_browser__fprintf_entry(browser, h, fp); printed += hist_browser__fprintf_entry(browser, h, fp);
nd = hists__filter_entries(rb_next(nd), browser->hists, nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
browser->min_pcnt);
} }
return printed; return printed;
...@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) ...@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
return; return;
} }
while ((nd = hists__filter_entries(nd, hb->hists, while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
hb->min_pcnt)) != NULL) {
nr_entries++; nr_entries++;
nd = rb_next(nd); nd = rb_next(nd);
} }
......
...@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, ...@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
__percent_color_snprintf, true); \ __percent_color_snprintf, true); \
} }
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
__percent_color_snprintf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_PERCENT_FN
...@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void) ...@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_guest_sys; perf_gtk__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
perf_gtk__hpp_color_overhead_guest_us; perf_gtk__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
perf_gtk__hpp_color_overhead_acc;
} }
static void callchain_list__sym_name(struct callchain_list *cl, static void callchain_list__sym_name(struct callchain_list *cl,
...@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, ...@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (perf_hpp__should_skip(fmt)) if (perf_hpp__should_skip(fmt))
continue; continue;
/*
* XXX no way to determine where symcol column is..
* Just use last column for now.
*/
if (perf_hpp__is_sort_entry(fmt))
sym_col = col_idx;
fmt->header(fmt, &hpp, hists_to_evsel(hists)); fmt->header(fmt, &hpp, hists_to_evsel(hists));
gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
...@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, ...@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter; GtkTreeIter iter;
u64 total = hists__total_period(h->hists); u64 total = hists__total_period(h->hists);
float percent = 0.0; float percent;
if (h->filtered) if (h->filtered)
continue; continue;
if (total) percent = hist_entry__get_percent_limit(h);
percent = h->stat.period * 100.0 / total;
if (percent < min_pcnt) if (percent < min_pcnt)
continue; continue;
...@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, ...@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (symbol_conf.use_callchain && sort__has_sym) { if (symbol_conf.use_callchain && sort__has_sym) {
if (callchain_param.mode == CHAIN_GRAPH_REL) if (callchain_param.mode == CHAIN_GRAPH_REL)
total = h->stat.period; total = symbol_conf.cumulate_callchain ?
h->stat_acc->period : h->stat.period;
perf_gtk__add_callchain(&h->sorted_chain, store, &iter, perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
sym_col, total); sym_col, total);
......
...@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, ...@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
return ret; return ret;
} }
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent)
{
if (!symbol_conf.cumulate_callchain) {
return snprintf(hpp->buf, hpp->size, "%*s",
fmt_percent ? 8 : 12, "N/A");
}
return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
}
static int field_cmp(u64 field_a, u64 field_b) static int field_cmp(u64 field_a, u64 field_b)
{ {
if (field_a > field_b) if (field_a > field_b)
...@@ -160,6 +172,24 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, ...@@ -160,6 +172,24 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
return ret; return ret;
} }
static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
hpp_field_fn get_field)
{
s64 ret = 0;
if (symbol_conf.cumulate_callchain) {
/*
* Put caller above callee when they have equal period.
*/
ret = field_cmp(get_field(a), get_field(b));
if (ret)
return ret;
ret = b->callchain->max_depth - a->callchain->max_depth;
}
return ret;
}
#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \ struct perf_hpp *hpp, \
...@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ ...@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
return __hpp__sort(a, b, he_get_##_field); \ return __hpp__sort(a, b, he_get_##_field); \
} }
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
hpp_color_scnprintf, true); \
}
#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \
hpp_entry_scnprintf, true); \
}
#define __HPP_SORT_ACC_FN(_type, _field) \
static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
{ \
return __hpp__sort_acc(a, b, he_get_acc_##_field); \
}
#define __HPP_ENTRY_RAW_FN(_type, _field) \ #define __HPP_ENTRY_RAW_FN(_type, _field) \
static u64 he_get_raw_##_field(struct hist_entry *he) \ static u64 he_get_raw_##_field(struct hist_entry *he) \
{ \ { \
...@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \ ...@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \
__HPP_ENTRY_PERCENT_FN(_type, _field) \ __HPP_ENTRY_PERCENT_FN(_type, _field) \
__HPP_SORT_FN(_type, _field) __HPP_SORT_FN(_type, _field)
#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
__HPP_SORT_ACC_FN(_type, _field)
#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ __HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_ENTRY_RAW_FN(_type, _field) \ __HPP_ENTRY_RAW_FN(_type, _field) \
__HPP_SORT_RAW_FN(_type, _field) __HPP_SORT_RAW_FN(_type, _field)
__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
HPP_RAW_FNS(period, "Period", period, 12, 12) HPP_RAW_FNS(period, "Period", period, 12, 12)
...@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, ...@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused,
.sort = hpp__sort_ ## _name, \ .sort = hpp__sort_ ## _name, \
} }
#define HPP__COLOR_ACC_PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
.width = hpp__width_ ## _name, \
.color = hpp__color_ ## _name, \
.entry = hpp__entry_ ## _name, \
.cmp = hpp__nop_cmp, \
.collapse = hpp__nop_cmp, \
.sort = hpp__sort_ ## _name, \
}
#define HPP__PRINT_FNS(_name) \ #define HPP__PRINT_FNS(_name) \
{ \ { \
.header = hpp__header_ ## _name, \ .header = hpp__header_ ## _name, \
...@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = { ...@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS(overhead_us), HPP__COLOR_PRINT_FNS(overhead_us),
HPP__COLOR_PRINT_FNS(overhead_guest_sys), HPP__COLOR_PRINT_FNS(overhead_guest_sys),
HPP__COLOR_PRINT_FNS(overhead_guest_us), HPP__COLOR_PRINT_FNS(overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
HPP__PRINT_FNS(samples), HPP__PRINT_FNS(samples),
HPP__PRINT_FNS(period) HPP__PRINT_FNS(period)
}; };
...@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list); ...@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list);
#undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_PRINT_FNS
#undef HPP__COLOR_ACC_PRINT_FNS
#undef HPP__PRINT_FNS #undef HPP__PRINT_FNS
#undef HPP_PERCENT_FNS #undef HPP_PERCENT_FNS
#undef HPP_PERCENT_ACC_FNS
#undef HPP_RAW_FNS #undef HPP_RAW_FNS
#undef __HPP_HEADER_FN #undef __HPP_HEADER_FN
#undef __HPP_WIDTH_FN #undef __HPP_WIDTH_FN
#undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_PERCENT_FN
#undef __HPP_ENTRY_PERCENT_FN #undef __HPP_ENTRY_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
#undef __HPP_ENTRY_ACC_PERCENT_FN
#undef __HPP_ENTRY_RAW_FN #undef __HPP_ENTRY_RAW_FN
#undef __HPP_SORT_FN
#undef __HPP_SORT_ACC_FN
#undef __HPP_SORT_RAW_FN
void perf_hpp__init(void) void perf_hpp__init(void)
...@@ -361,6 +447,13 @@ void perf_hpp__init(void) ...@@ -361,6 +447,13 @@ void perf_hpp__init(void)
if (field_order) if (field_order)
return; return;
if (symbol_conf.cumulate_callchain) {
perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header =
hpp__header_overhead_self;
}
perf_hpp__column_enable(PERF_HPP__OVERHEAD); perf_hpp__column_enable(PERF_HPP__OVERHEAD);
if (symbol_conf.show_cpu_utilization) { if (symbol_conf.show_cpu_utilization) {
...@@ -383,6 +476,12 @@ void perf_hpp__init(void) ...@@ -383,6 +476,12 @@ void perf_hpp__init(void)
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list)) if (list_empty(list))
list_add(list, &perf_hpp__sort_list); list_add(list, &perf_hpp__sort_list);
if (symbol_conf.cumulate_callchain) {
list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
if (list_empty(list))
list_add(list, &perf_hpp__sort_list);
}
} }
void perf_hpp__column_register(struct perf_hpp_fmt *format) void perf_hpp__column_register(struct perf_hpp_fmt *format)
...@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format) ...@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
list_add_tail(&format->list, &perf_hpp__list); list_add_tail(&format->list, &perf_hpp__list);
} }
void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
{
list_del(&format->list);
}
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
{ {
list_add_tail(&format->sort_list, &perf_hpp__sort_list); list_add_tail(&format->sort_list, &perf_hpp__sort_list);
...@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col) ...@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col)
perf_hpp__column_register(&perf_hpp__format[col]); perf_hpp__column_register(&perf_hpp__format[col]);
} }
void perf_hpp__column_disable(unsigned col)
{
BUG_ON(col >= PERF_HPP__MAX_INDEX);
perf_hpp__column_unregister(&perf_hpp__format[col]);
}
void perf_hpp__cancel_cumulate(void)
{
if (field_order)
return;
perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
}
void perf_hpp__setup_output_field(void) void perf_hpp__setup_output_field(void)
{ {
struct perf_hpp_fmt *fmt; struct perf_hpp_fmt *fmt;
......
...@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, ...@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
{ {
switch (callchain_param.mode) { switch (callchain_param.mode) {
case CHAIN_GRAPH_REL: case CHAIN_GRAPH_REL:
return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, return callchain__fprintf_graph(fp, &he->sorted_chain,
symbol_conf.cumulate_callchain ?
he->stat_acc->period : he->stat.period,
left_margin); left_margin);
break; break;
case CHAIN_GRAPH_ABS: case CHAIN_GRAPH_ABS:
...@@ -461,12 +463,12 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, ...@@ -461,12 +463,12 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
float percent = h->stat.period * 100.0 / float percent;
hists->stats.total_period;
if (h->filtered) if (h->filtered)
continue; continue;
percent = hist_entry__get_percent_limit(h);
if (percent < min_pcnt) if (percent < min_pcnt)
continue; continue;
......
...@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent ...@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
if (sample->callchain == NULL) if (sample->callchain == NULL)
return 0; return 0;
if (symbol_conf.use_callchain || sort__has_parent) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
sort__has_parent) {
return machine__resolve_callchain(al->machine, evsel, al->thread, return machine__resolve_callchain(al->machine, evsel, al->thread,
sample, parent, al, max_stack); sample, parent, al, max_stack);
} }
...@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp ...@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
return 0; return 0;
return callchain_append(he->callchain, &callchain_cursor, sample->period); return callchain_append(he->callchain, &callchain_cursor, sample->period);
} }
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved)
{
al->map = node->map;
al->sym = node->sym;
if (node->map)
al->addr = node->map->map_ip(node->map, node->ip);
else
al->addr = node->ip;
if (al->sym == NULL) {
if (hide_unresolved)
return 0;
if (al->map == NULL)
goto out;
}
if (al->map->groups == &al->machine->kmaps) {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
} else {
al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
al->level = 'g';
}
} else {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_USER;
al->level = '.';
} else if (perf_guest) {
al->cpumode = PERF_RECORD_MISC_GUEST_USER;
al->level = 'u';
} else {
al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
al->level = 'H';
}
}
out:
return 1;
}
...@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent ...@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
struct perf_evsel *evsel, struct addr_location *al, struct perf_evsel *evsel, struct addr_location *al,
int max_stack); int max_stack);
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved);
extern const char record_callchain_help[]; extern const char record_callchain_help[];
int parse_callchain_report_opt(const char *arg); int parse_callchain_report_opt(const char *arg);
static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
struct callchain_cursor *src)
{
*dest = *src;
dest->first = src->curr;
dest->nr -= src->pos;
}
#endif /* __PERF_CALLCHAIN_H */ #endif /* __PERF_CALLCHAIN_H */
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "session.h" #include "session.h"
#include "sort.h" #include "sort.h"
#include "evsel.h" #include "evsel.h"
#include "annotate.h"
#include <math.h> #include <math.h>
static bool hists__filter_entry_by_dso(struct hists *hists, static bool hists__filter_entry_by_dso(struct hists *hists,
...@@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) ...@@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
return true; return true;
he_stat__decay(&he->stat); he_stat__decay(&he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__decay(he->stat_acc);
diff = prev_period - he->stat.period; diff = prev_period - he->stat.period;
...@@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) ...@@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
* histogram, sorted on item, collects periods * histogram, sorted on item, collects periods
*/ */
static struct hist_entry *hist_entry__new(struct hist_entry *template) static struct hist_entry *hist_entry__new(struct hist_entry *template,
bool sample_self)
{ {
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; size_t callchain_size = 0;
struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); struct hist_entry *he;
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
callchain_size = sizeof(struct callchain_root);
he = zalloc(sizeof(*he) + callchain_size);
if (he != NULL) { if (he != NULL) {
*he = *template; *he = *template;
if (symbol_conf.cumulate_callchain) {
he->stat_acc = malloc(sizeof(he->stat));
if (he->stat_acc == NULL) {
free(he);
return NULL;
}
memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
if (!sample_self)
memset(&he->stat, 0, sizeof(he->stat));
}
if (he->ms.map) if (he->ms.map)
he->ms.map->referenced = true; he->ms.map->referenced = true;
...@@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) ...@@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
*/ */
he->branch_info = malloc(sizeof(*he->branch_info)); he->branch_info = malloc(sizeof(*he->branch_info));
if (he->branch_info == NULL) { if (he->branch_info == NULL) {
free(he->stat_acc);
free(he); free(he);
return NULL; return NULL;
} }
...@@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent) ...@@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
static struct hist_entry *add_hist_entry(struct hists *hists, static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry, struct hist_entry *entry,
struct addr_location *al) struct addr_location *al,
bool sample_self)
{ {
struct rb_node **p; struct rb_node **p;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
...@@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists, ...@@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
cmp = hist_entry__cmp(he, entry); cmp = hist_entry__cmp(he, entry);
if (!cmp) { if (!cmp) {
he_stat__add_period(&he->stat, period, weight); if (sample_self)
he_stat__add_period(&he->stat, period, weight);
if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period, weight);
/* /*
* This mem info was allocated from sample__resolve_mem * This mem info was allocated from sample__resolve_mem
...@@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists, ...@@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
p = &(*p)->rb_right; p = &(*p)->rb_right;
} }
he = hist_entry__new(entry); he = hist_entry__new(entry, sample_self);
if (!he) if (!he)
return NULL; return NULL;
rb_link_node(&he->rb_node_in, parent, p); rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, hists->entries_in); rb_insert_color(&he->rb_node_in, hists->entries_in);
out: out:
he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (sample_self)
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
if (symbol_conf.cumulate_callchain)
he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
return he; return he;
} }
...@@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists, ...@@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *sym_parent, struct symbol *sym_parent,
struct branch_info *bi, struct branch_info *bi,
struct mem_info *mi, struct mem_info *mi,
u64 period, u64 weight, u64 transaction) u64 period, u64 weight, u64 transaction,
bool sample_self)
{ {
struct hist_entry entry = { struct hist_entry entry = {
.thread = al->thread, .thread = al->thread,
...@@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists, ...@@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction, .transaction = transaction,
}; };
return add_hist_entry(hists, &entry, al); return add_hist_entry(hists, &entry, al, sample_self);
}
static int
iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct perf_sample *sample = iter->sample;
struct mem_info *mi;
mi = sample__resolve_mem(sample, al);
if (mi == NULL)
return -ENOMEM;
iter->priv = mi;
return 0;
}
static int
iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
u64 cost;
struct mem_info *mi = iter->priv;
struct hist_entry *he;
if (mi == NULL)
return -EINVAL;
cost = iter->sample->weight;
if (!cost)
cost = 1;
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
cost, cost, 0, true);
if (!he)
return -ENOMEM;
iter->he = he;
return 0;
}
static int
iter_finish_mem_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct perf_evsel *evsel = iter->evsel;
struct hist_entry *he = iter->he;
int err = -EINVAL;
if (he == NULL)
goto out;
hists__inc_nr_samples(&evsel->hists, he->filtered);
err = hist_entry__append_callchain(he, iter->sample);
out:
/*
* We don't need to free iter->priv (mem_info) here since
* the mem info was either already freed in add_hist_entry() or
* passed to a new hist entry by hist_entry__new().
*/
iter->priv = NULL;
iter->he = NULL;
return err;
}
static int
iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi;
struct perf_sample *sample = iter->sample;
bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;
iter->curr = 0;
iter->total = sample->branch_stack->nr;
iter->priv = bi;
return 0;
}
static int
iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
/* to avoid calling callback function */
iter->he = NULL;
return 0;
}
static int
iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi = iter->priv;
int i = iter->curr;
if (bi == NULL)
return 0;
if (iter->curr >= iter->total)
return 0;
al->map = bi[i].to.map;
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
return 1;
}
static int
iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi;
struct perf_evsel *evsel = iter->evsel;
struct hist_entry *he = NULL;
int i = iter->curr;
int err = 0;
bi = iter->priv;
if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
goto out;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
1, 1, 0, true);
if (he == NULL)
return -ENOMEM;
hists__inc_nr_samples(&evsel->hists, he->filtered);
out:
iter->he = he;
iter->curr++;
return err;
}
static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
zfree(&iter->priv);
iter->he = NULL;
return iter->curr >= iter->total ? 0 : -1;
}
static int
iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry *he;
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, true);
if (he == NULL)
return -ENOMEM;
iter->he = he;
return 0;
}
static int
iter_finish_normal_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
if (he == NULL)
return 0;
iter->he = NULL;
hists__inc_nr_samples(&evsel->hists, he->filtered);
return hist_entry__append_callchain(he, sample);
}
static int
iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
struct hist_entry **he_cache;
callchain_cursor_commit(&callchain_cursor);
/*
* This is for detecting cycles or recursions so that they're
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
if (he_cache == NULL)
return -ENOMEM;
iter->priv = he_cache;
iter->curr = 0;
return 0;
}
static int
iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
int err = 0;
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, true);
if (he == NULL)
return -ENOMEM;
iter->he = he;
he_cache[iter->curr++] = he;
callchain_append(he->callchain, &callchain_cursor, sample->period);
/*
* We need to re-initialize the cursor since callchain_append()
* advanced the cursor to the end.
*/
callchain_cursor_commit(&callchain_cursor);
hists__inc_nr_samples(&evsel->hists, he->filtered);
return err;
}
static int
iter_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct callchain_cursor_node *node;
node = callchain_cursor_current(&callchain_cursor);
if (node == NULL)
return 0;
return fill_callchain_info(al, node, iter->hide_unresolved);
}
static int
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
struct hist_entry he_tmp = {
.cpu = al->cpu,
.thread = al->thread,
.comm = thread__comm(al->thread),
.ip = al->addr,
.ms = {
.map = al->map,
.sym = al->sym,
},
.parent = iter->parent,
};
int i;
struct callchain_cursor cursor;
callchain_cursor_snapshot(&cursor, &callchain_cursor);
callchain_cursor_advance(&callchain_cursor);
/*
* Check if there's duplicate entries in the callchain.
* It's possible that it has cycles or recursive calls.
*/
for (i = 0; i < iter->curr; i++) {
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
/* to avoid calling callback function */
iter->he = NULL;
return 0;
}
}
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, false);
if (he == NULL)
return -ENOMEM;
iter->he = he;
he_cache[iter->curr++] = he;
callchain_append(he->callchain, &cursor, sample->period);
return 0;
}
static int
iter_finish_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
zfree(&iter->priv);
iter->he = NULL;
return 0;
}
const struct hist_iter_ops hist_iter_mem = {
.prepare_entry = iter_prepare_mem_entry,
.add_single_entry = iter_add_single_mem_entry,
.next_entry = iter_next_nop_entry,
.add_next_entry = iter_add_next_nop_entry,
.finish_entry = iter_finish_mem_entry,
};
const struct hist_iter_ops hist_iter_branch = {
.prepare_entry = iter_prepare_branch_entry,
.add_single_entry = iter_add_single_branch_entry,
.next_entry = iter_next_branch_entry,
.add_next_entry = iter_add_next_branch_entry,
.finish_entry = iter_finish_branch_entry,
};
const struct hist_iter_ops hist_iter_normal = {
.prepare_entry = iter_prepare_normal_entry,
.add_single_entry = iter_add_single_normal_entry,
.next_entry = iter_next_nop_entry,
.add_next_entry = iter_add_next_nop_entry,
.finish_entry = iter_finish_normal_entry,
};
const struct hist_iter_ops hist_iter_cumulative = {
.prepare_entry = iter_prepare_cumulative_entry,
.add_single_entry = iter_add_single_cumulative_entry,
.next_entry = iter_next_cumulative_entry,
.add_next_entry = iter_add_next_cumulative_entry,
.finish_entry = iter_finish_cumulative_entry,
};
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg)
{
int err, err2;
err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
max_stack_depth);
if (err)
return err;
iter->evsel = evsel;
iter->sample = sample;
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
err = iter->ops->add_single_entry(iter, al);
if (err)
goto out;
if (iter->he && iter->add_entry_cb) {
err = iter->add_entry_cb(iter, al, true, arg);
if (err)
goto out;
}
while (iter->ops->next_entry(iter, al)) {
err = iter->ops->add_next_entry(iter, al);
if (err)
break;
if (iter->he && iter->add_entry_cb) {
err = iter->add_entry_cb(iter, al, false, arg);
if (err)
goto out;
}
}
out:
err2 = iter->ops->finish_entry(iter, al);
if (!err)
err = err2;
return err;
} }
int64_t int64_t
...@@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he) ...@@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he)
{ {
zfree(&he->branch_info); zfree(&he->branch_info);
zfree(&he->mem_info); zfree(&he->mem_info);
zfree(&he->stat_acc);
free_srcline(he->srcline); free_srcline(he->srcline);
free(he); free(he);
} }
...@@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, ...@@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
if (!cmp) { if (!cmp) {
he_stat__add_stat(&iter->stat, &he->stat); he_stat__add_stat(&iter->stat, &he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__add_stat(iter->stat_acc, he->stat_acc);
if (symbol_conf.use_callchain) { if (symbol_conf.use_callchain) {
callchain_cursor_reset(&callchain_cursor); callchain_cursor_reset(&callchain_cursor);
...@@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type) ...@@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type)
events_stats__inc(&hists->stats, type); events_stats__inc(&hists->stats, type);
} }
void hists__inc_nr_samples(struct hists *hists, bool filtered)
{
events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
if (!filtered)
hists->stats.nr_non_filtered_samples++;
}
static struct hist_entry *hists__add_dummy_entry(struct hists *hists, static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair) struct hist_entry *pair)
{ {
...@@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, ...@@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
p = &(*p)->rb_right; p = &(*p)->rb_right;
} }
he = hist_entry__new(pair); he = hist_entry__new(pair, true);
if (he) { if (he) {
memset(&he->stat, 0, sizeof(he->stat)); memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists; he->hists = hists;
......
...@@ -96,12 +96,50 @@ struct hists { ...@@ -96,12 +96,50 @@ struct hists {
u16 col_len[HISTC_NR_COLS]; u16 col_len[HISTC_NR_COLS];
}; };
struct hist_entry_iter;
struct hist_iter_ops {
int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
};
struct hist_entry_iter {
int total;
int curr;
bool hide_unresolved;
struct perf_evsel *evsel;
struct perf_sample *sample;
struct hist_entry *he;
struct symbol *parent;
void *priv;
const struct hist_iter_ops *ops;
/* user-defined callback function (optional) */
int (*add_entry_cb)(struct hist_entry_iter *iter,
struct addr_location *al, bool single, void *arg);
};
extern const struct hist_iter_ops hist_iter_normal;
extern const struct hist_iter_ops hist_iter_branch;
extern const struct hist_iter_ops hist_iter_mem;
extern const struct hist_iter_ops hist_iter_cumulative;
struct hist_entry *__hists__add_entry(struct hists *hists, struct hist_entry *__hists__add_entry(struct hists *hists,
struct addr_location *al, struct addr_location *al,
struct symbol *parent, struct symbol *parent,
struct branch_info *bi, struct branch_info *bi,
struct mem_info *mi, u64 period, struct mem_info *mi, u64 period,
u64 weight, u64 transaction); u64 weight, u64 transaction,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__transaction_len(void); int hist_entry__transaction_len(void);
...@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists); ...@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists);
void hists__reset_stats(struct hists *hists); void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists, u32 type); void hists__inc_nr_events(struct hists *hists, u32 type);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
void events_stats__inc(struct events_stats *stats, u32 type); void events_stats__inc(struct events_stats *stats, u32 type);
size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
...@@ -192,6 +231,7 @@ enum { ...@@ -192,6 +231,7 @@ enum {
PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US, PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES, PERF_HPP__SAMPLES,
PERF_HPP__PERIOD, PERF_HPP__PERIOD,
...@@ -200,7 +240,11 @@ enum { ...@@ -200,7 +240,11 @@ enum {
void perf_hpp__init(void); void perf_hpp__init(void);
void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_register(struct perf_hpp_fmt *format);
void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
void perf_hpp__column_enable(unsigned col); void perf_hpp__column_enable(unsigned col);
void perf_hpp__column_disable(unsigned col);
void perf_hpp__cancel_cumulate(void);
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
void perf_hpp__setup_output_field(void); void perf_hpp__setup_output_field(void);
void perf_hpp__reset_output_field(void); void perf_hpp__reset_output_field(void);
...@@ -218,6 +262,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); ...@@ -218,6 +262,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent); hpp_snprint_fn print_fn, bool fmt_percent);
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent);
static inline void advance_hpp(struct perf_hpp *hpp, int inc) static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{ {
......
...@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = { ...@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"), DIM(PERF_HPP__PERIOD, "period"),
}; };
...@@ -1581,6 +1582,9 @@ void reset_output_field(void) ...@@ -1581,6 +1582,9 @@ void reset_output_field(void)
sort__has_sym = 0; sort__has_sym = 0;
sort__has_dso = 0; sort__has_dso = 0;
field_order = NULL;
sort_order = NULL;
reset_dimensions(); reset_dimensions();
perf_hpp__reset_output_field(); perf_hpp__reset_output_field();
} }
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "parse-options.h" #include "parse-options.h"
#include "parse-events.h" #include "parse-events.h"
#include "hist.h"
#include "thread.h" #include "thread.h"
extern regex_t parent_regex; extern regex_t parent_regex;
...@@ -82,6 +82,7 @@ struct hist_entry { ...@@ -82,6 +82,7 @@ struct hist_entry {
struct list_head head; struct list_head head;
} pairs; } pairs;
struct he_stat stat; struct he_stat stat;
struct he_stat *stat_acc;
struct map_symbol ms; struct map_symbol ms;
struct thread *thread; struct thread *thread;
struct comm *comm; struct comm *comm;
...@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair, ...@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair,
list_add_tail(&pair->pairs.node, &he->pairs.head); list_add_tail(&pair->pairs.node, &he->pairs.head);
} }
static inline float hist_entry__get_percent_limit(struct hist_entry *he)
{
u64 period = he->stat.period;
u64 total_period = hists__total_period(he->hists);
if (unlikely(total_period == 0))
return 0;
if (symbol_conf.cumulate_callchain)
period = he->stat_acc->period;
return period * 100.0 / total_period;
}
enum sort_mode { enum sort_mode {
SORT_MODE__NORMAL, SORT_MODE__NORMAL,
SORT_MODE__BRANCH, SORT_MODE__BRANCH,
......
...@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries; ...@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries;
char **vmlinux_path; char **vmlinux_path;
struct symbol_conf symbol_conf = { struct symbol_conf symbol_conf = {
.use_modules = true, .use_modules = true,
.try_vmlinux_path = true, .try_vmlinux_path = true,
.annotate_src = true, .annotate_src = true,
.demangle = true, .demangle = true,
.symfs = "", .cumulate_callchain = true,
.symfs = "",
}; };
static enum dso_binary_type binary_type_symtab[] = { static enum dso_binary_type binary_type_symtab[] = {
......
...@@ -109,6 +109,7 @@ struct symbol_conf { ...@@ -109,6 +109,7 @@ struct symbol_conf {
show_nr_samples, show_nr_samples,
show_total_period, show_total_period,
use_callchain, use_callchain,
cumulate_callchain,
exclude_other, exclude_other,
show_cpu_utilization, show_cpu_utilization,
initialized, initialized,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment