Commit 9b261365 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core

Pull perf/core improvements and fixes from Jiri Olsa:

  * Add support to accumulate hist periods (Namhyung Kim)
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e450f90e 0506aecc
......@@ -111,7 +111,7 @@ OPTIONS
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
......@@ -163,6 +163,11 @@ OPTIONS
Default: fractal,0.5,callee,function.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
......
......@@ -119,7 +119,7 @@ Default is to monitor all CPUS.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended
......@@ -161,6 +161,12 @@ Default is to monitor all CPUS.
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
......
......@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
......
......@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
true);
if (he == NULL)
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);
return ret;
}
......
......@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction)
{
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
transaction) != NULL)
transaction, true) != NULL)
return 0;
return -ENOMEM;
}
......
......@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL);
return 0;
}
if (!strcmp(var, "report.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb);
}
......@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
*/
if (he->stat.nr_events == 1)
rep->nr_entries++;
/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct symbol *parent = NULL;
struct hist_entry *he;
struct mem_info *mi, *mx;
uint64_t cost;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
int err = 0;
struct report *rep = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
struct mem_info *mi;
struct branch_info *bi;
mi = sample__resolve_mem(sample, al);
if (!mi)
return -ENOMEM;
report__inc_stats(rep, he);
if (rep->hide_unresolved && !al->sym)
if (!ui__has_annotation())
return 0;
cost = sample->weight;
if (!cost)
cost = 1;
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
cost, cost, 0);
if (!he)
return -ENOMEM;
if (ui__has_annotation()) {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
if (err)
goto out;
mx = he->mem_info;
err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
err = hist_entry__append_callchain(he, sample);
out:
return err;
}
static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
struct symbol *parent = NULL;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
err = -ENOMEM;
/* overwrite the 'al' to branch-to info */
al->map = bi[i].to.map;
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
1, 1, 0);
if (he) {
if (ui__has_annotation()) {
bx = he->branch_info;
err = addr_map_symbol__inc_samples(&bx->from,
evsel->idx);
if (err)
goto out;
err = addr_map_symbol__inc_samples(&bx->to,
evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
} else
} else if (rep->mem_mode) {
mi = he->mem_info;
err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
if (err)
goto out;
}
err = 0;
out:
free(bi);
return err;
}
static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
struct addr_location *al, struct perf_sample *sample)
{
struct symbol *parent = NULL;
struct hist_entry *he;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
if (he == NULL)
return -ENOMEM;
err = hist_entry__append_callchain(he, sample);
if (err)
goto out;
if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
report__inc_stats(rep, he);
} else if (symbol_conf.cumulate_callchain) {
if (single)
err = hist_entry__inc_addr_samples(he, evsel->idx,
al->addr);
} else {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}
out:
return err;
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
{
struct report *rep = container_of(tool, struct report, tool);
struct addr_location al;
struct hist_entry_iter iter = {
.hide_unresolved = rep->hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
......@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
if (sort__mode == SORT_MODE__BRANCH) {
ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding lbr entry, skipping event\n");
} else if (rep->mem_mode == 1) {
ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding mem entry, skipping event\n");
} else {
if (al.map != NULL)
al.map->dso->hit = 1;
if (sort__mode == SORT_MODE__BRANCH)
iter.ops = &hist_iter_branch;
else if (rep->mem_mode)
iter.ops = &hist_iter_mem;
else if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
if (al.map != NULL)
al.map->dso->hit = 1;
ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
ret = report__add_hist_entry(rep, evsel, &al, sample);
if (ret < 0)
pr_debug("problem incrementing symbol period, skipping event\n");
}
return ret;
}
......@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
}
}
if (symbol_conf.cumulate_callchain) {
/* Silently ignore if callchain is missing */
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
}
if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
......@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
......@@ -804,8 +719,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (branch_mode == -1 && has_br_stack)
if (branch_mode == -1 && has_br_stack) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
......@@ -813,6 +730,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
goto error;
}
sort__mode = SORT_MODE__MEMORY;
symbol_conf.cumulate_callchain = false;
}
if (setup_sorting() < 0) {
......
......@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
int err = 0;
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
......
......@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
pthread_mutex_unlock(&notes->lock);
/*
* This function is now called with he->hists->lock held.
* Release it before going to sleep.
*/
pthread_mutex_unlock(&he->hists->lock);
if (err == -ERANGE && !he->ms.map->erange_warned)
ui__warn_map_erange(he->ms.map, sym, ip);
else if (err == -ENOMEM) {
......@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
sym->name);
sleep(1);
}
pthread_mutex_lock(&he->hists->lock);
}
static void perf_top__show_details(struct perf_top *top)
......@@ -238,27 +246,6 @@ static void perf_top__show_details(struct perf_top *top)
pthread_mutex_unlock(&notes->lock);
}
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample)
{
struct hist_entry *he;
pthread_mutex_lock(&evsel->hists.lock);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
pthread_mutex_unlock(&evsel->hists.lock);
if (he == NULL)
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
evsel->hists.stats.nr_non_filtered_samples++;
return he;
}
static void perf_top__print_sym_table(struct perf_top *top)
{
char bf[160];
......@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
return 0;
}
static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct perf_top *top = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
if (sort__has_sym && single) {
u64 ip = al->addr;
if (al->map)
ip = al->map->unmap_ip(al->map, ip);
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
return 0;
}
static void perf_event__process_sample(struct perf_tool *tool,
const union perf_event *event,
struct perf_evsel *evsel,
......@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
struct machine *machine)
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
struct symbol *parent = NULL;
u64 ip = sample->ip;
struct addr_location al;
int err;
......@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
}
if (al.sym == NULL || !al.sym->ignore) {
struct hist_entry *he;
struct hist_entry_iter iter = {
.add_entry_cb = hist_iter__top_callback,
};
err = sample__resolve_callchain(sample, &parent, evsel, &al,
top->max_stack);
if (err)
return;
if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
he = perf_evsel__add_hist_entry(evsel, &al, sample);
if (he == NULL) {
pr_err("Problem incrementing symbol period, skipping event\n");
return;
}
pthread_mutex_lock(&evsel->hists.lock);
err = hist_entry__append_callchain(he, sample);
if (err)
return;
err = hist_entry_iter__add(&iter, &al, evsel, sample,
top->max_stack, top);
if (err < 0)
pr_err("Problem incrementing symbol period, skipping event\n");
if (sort__has_sym)
perf_top__record_precise_ip(top, he, evsel->idx, ip);
pthread_mutex_unlock(&evsel->hists.lock);
}
return;
......@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb)
if (!strcmp(var, "top.call-graph"))
return record_parse_callchain(value, &top->record_opts);
if (!strcmp(var, "top.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb);
}
......@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK(0, "call-graph", &top.record_opts,
"mode[,dump_size]", record_callchain_help,
&parse_callchain_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
......@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
top.sym_evsel = perf_evlist__first(top.evlist);
if (!symbol_conf.use_callchain) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
......
......@@ -139,6 +139,10 @@ static struct test {
.desc = "Test output sorting of hist entries",
.func = test__hists_output,
},
{
.desc = "Test cumulation of child hist entries",
.func = test__hists_cumulate,
},
{
.func = NULL,
},
......
......@@ -12,9 +12,9 @@ static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
{ FAKE_PID_PERF1, "perf" },
{ FAKE_PID_PERF2, "perf" },
{ FAKE_PID_BASH, "bash" },
};
static struct {
......@@ -22,15 +22,15 @@ static struct {
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
{ FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
{ FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
{ FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
{ FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
{ FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
{ FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
};
struct fake_sym {
......@@ -40,27 +40,30 @@ struct fake_sym {
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
};
static struct {
......@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines)
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.len = FAKE_MAP_LENGTH,
.pgoff = 0ULL,
},
};
......@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists)
he = rb_entry(node, struct hist_entry, rb_node);
if (!he->filtered) {
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
i, thread__comm_str(he->thread), he->thread->tid,
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
he->ms.sym->name, he->stat.period,
he->stat_acc ? he->stat_acc->period : 0);
}
i++;
......
......@@ -4,6 +4,34 @@
struct machine;
struct machines;
#define FAKE_PID_PERF1 100
#define FAKE_PID_PERF2 200
#define FAKE_PID_BASH 300
#define FAKE_MAP_PERF 0x400000
#define FAKE_MAP_BASH 0x400000
#define FAKE_MAP_LIBC 0x500000
#define FAKE_MAP_KERNEL 0xf00000
#define FAKE_MAP_LENGTH 0x100000
#define FAKE_SYM_OFFSET1 700
#define FAKE_SYM_OFFSET2 800
#define FAKE_SYM_OFFSET3 900
#define FAKE_SYM_LENGTH 100
#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
/*
* The setup_fake_machine() provides a test environment which consists
* of 3 processes that have 3 mappings and in turn, have 3 symbols
......@@ -13,7 +41,7 @@ struct machines;
* ............. ............. ...................
* perf: 100 perf main
* perf: 100 perf run_command
* perf: 100 perf comd_record
* perf: 100 perf cmd_record
* perf: 100 libc malloc
* perf: 100 libc free
* perf: 100 libc realloc
......@@ -22,7 +50,7 @@ struct machines;
* perf: 100 [kernel] sys_perf_event_open
* perf: 200 perf main
* perf: 200 perf run_command
* perf: 200 perf comd_record
* perf: 200 perf cmd_record
* perf: 200 libc malloc
* perf: 200 libc free
* perf: 200 libc realloc
......
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "util/parse-events.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
struct sample {
u32 pid;
u64 ip;
struct thread *thread;
struct map *map;
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [libc] malloc() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [libc] free() */
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
/* perf [perf] main() */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [kernel] page_fault() */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [kernel] page_fault() */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
/*
* Will be casted to struct ip_callchain which has all 64 bit entries
* of nr and ips[].
*/
static u64 fake_callchains[][10] = {
/* schedule => run_command => main */
{ 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_PERF_MAIN, },
/* cmd_record => run_command => main */
{ 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* malloc => cmd_record => run_command => main */
{ 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
FAKE_IP_PERF_MAIN, },
/* free => cmd_record => run_command => main */
{ 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_PERF_MAIN, },
/* page_fault => sys_perf_event_open => run_command => main */
{ 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
/* main */
{ 1, FAKE_IP_BASH_MAIN, },
/* xmalloc => malloc => xmalloc => malloc => xmalloc => main */
{ 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
/* page_fault => malloc => main */
{ 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
};
static int add_hist_entries(struct hists *hists, struct machine *machine)
{
struct addr_location al;
struct perf_evsel *evsel = hists_to_evsel(hists);
struct perf_sample sample = { .period = 1000, };
size_t i;
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
const union perf_event event = {
.header = {
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.hide_unresolved = false,
};
if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
sample.callchain = (struct ip_callchain *)fake_callchains[i];
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
}
return TEST_OK;
out:
pr_debug("Not enough memory for adding a hist entry\n");
return TEST_FAIL;
}
static void del_hist_entries(struct hists *hists)
{
struct hist_entry *he;
struct rb_root *root_in;
struct rb_root *root_out;
struct rb_node *node;
if (sort__need_collapse)
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
root_out = &hists->entries;
while (!RB_EMPTY_ROOT(root_out)) {
node = rb_first(root_out);
he = rb_entry(node, struct hist_entry, rb_node);
rb_erase(node, root_out);
rb_erase(&he->rb_node_in, root_in);
hist_entry__free(he);
}
}
typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
#define COMM(he) (thread__comm_str(he->thread))
#define DSO(he) (he->ms.map->dso->short_name)
#define SYM(he) (he->ms.sym->name)
#define CPU(he) (he->cpu)
#define PID(he) (he->thread->tid)
#define DEPTH(he) (he->callchain->max_depth)
#define CDSO(cl) (cl->ms.map->dso->short_name)
#define CSYM(cl) (cl->ms.sym->name)
struct result {
u64 children;
u64 self;
const char *comm;
const char *dso;
const char *sym;
};
struct callchain_result {
u64 nr;
struct {
const char *dso;
const char *sym;
} node[10];
};
static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
struct callchain_result *expected_callchain, size_t nr_callchain)
{
char buf[32];
size_t i, c;
struct hist_entry *he;
struct rb_root *root;
struct rb_node *node;
struct callchain_node *cnode;
struct callchain_list *clist;
/*
* adding and deleting hist entries must be done outside of this
* function since TEST_ASSERT_VAL() returns in case of failure.
*/
hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
if (verbose > 2) {
pr_info("use callchain: %d, cumulate callchain: %d\n",
symbol_conf.use_callchain,
symbol_conf.cumulate_callchain);
print_hists_out(hists);
}
root = &hists->entries;
for (node = rb_first(root), i = 0;
node && (he = rb_entry(node, struct hist_entry, rb_node));
node = rb_next(node), i++) {
scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
TEST_ASSERT_VAL("Incorrect number of hist entry",
i < nr_expected);
TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
!strcmp(COMM(he), expected[i].comm) &&
!strcmp(DSO(he), expected[i].dso) &&
!strcmp(SYM(he), expected[i].sym));
if (symbol_conf.cumulate_callchain)
TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
if (!symbol_conf.use_callchain)
continue;
/* check callchain entries */
root = &he->callchain->node.rb_root;
cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
c = 0;
list_for_each_entry(clist, &cnode->val, list) {
scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
TEST_ASSERT_VAL("Incorrect number of callchain entry",
c < expected_callchain[i].nr);
TEST_ASSERT_VAL(buf,
!strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
!strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
c++;
}
/* TODO: handle multiple child nodes properly */
TEST_ASSERT_VAL("Incorrect number of callchain entry",
c <= expected_callchain[i].nr);
}
TEST_ASSERT_VAL("Incorrect number of hist entry",
i == nr_expected);
TEST_ASSERT_VAL("Incorrect number of callchain entry",
!symbol_conf.use_callchain || nr_expected == nr_callchain);
return 0;
}
/* NO callchain + NO children */
static int test1(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Overhead Command Shared Object Symbol
* ======== ======= ============= ==============
* 20.00% perf perf [.] main
* 10.00% bash [kernel] [k] page_fault
* 10.00% bash bash [.] main
* 10.00% bash bash [.] xmalloc
* 10.00% perf [kernel] [k] page_fault
* 10.00% perf [kernel] [k] schedule
* 10.00% perf libc [.] free
* 10.00% perf libc [.] malloc
* 10.00% perf perf [.] cmd_record
*/
struct result expected[] = {
{ 0, 2000, "perf", "perf", "main" },
{ 0, 1000, "bash", "[kernel]", "page_fault" },
{ 0, 1000, "bash", "bash", "main" },
{ 0, 1000, "bash", "bash", "xmalloc" },
{ 0, 1000, "perf", "[kernel]", "page_fault" },
{ 0, 1000, "perf", "[kernel]", "schedule" },
{ 0, 1000, "perf", "libc", "free" },
{ 0, 1000, "perf", "libc", "malloc" },
{ 0, 1000, "perf", "perf", "cmd_record" },
};
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* callcain + NO children */
static int test2(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Overhead Command Shared Object Symbol
* ======== ======= ============= ==============
* 20.00% perf perf [.] main
* |
* --- main
*
* 10.00% bash [kernel] [k] page_fault
* |
* --- page_fault
* malloc
* main
*
* 10.00% bash bash [.] main
* |
* --- main
*
* 10.00% bash bash [.] xmalloc
* |
* --- xmalloc
* malloc
* xmalloc <--- NOTE: there's a cycle
* malloc
* xmalloc
* main
*
* 10.00% perf [kernel] [k] page_fault
* |
* --- page_fault
* sys_perf_event_open
* run_command
* main
*
* 10.00% perf [kernel] [k] schedule
* |
* --- schedule
* run_command
* main
*
* 10.00% perf libc [.] free
* |
* --- free
* cmd_record
* run_command
* main
*
* 10.00% perf libc [.] malloc
* |
* --- malloc
* cmd_record
* run_command
* main
*
* 10.00% perf perf [.] cmd_record
* |
* --- cmd_record
* run_command
* main
*
*/
struct result expected[] = {
{ 0, 2000, "perf", "perf", "main" },
{ 0, 1000, "bash", "[kernel]", "page_fault" },
{ 0, 1000, "bash", "bash", "main" },
{ 0, 1000, "bash", "bash", "xmalloc" },
{ 0, 1000, "perf", "[kernel]", "page_fault" },
{ 0, 1000, "perf", "[kernel]", "schedule" },
{ 0, 1000, "perf", "libc", "free" },
{ 0, 1000, "perf", "libc", "malloc" },
{ 0, 1000, "perf", "perf", "cmd_record" },
};
struct callchain_result expected_callchain[] = {
{
1, { { "perf", "main" }, },
},
{
3, { { "[kernel]", "page_fault" },
{ "libc", "malloc" },
{ "bash", "main" }, },
},
{
1, { { "bash", "main" }, },
},
{
6, { { "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" }, },
},
{
4, { { "[kernel]", "page_fault" },
{ "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "[kernel]", "schedule" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "free" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
};
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected),
expected_callchain, ARRAY_SIZE(expected_callchain));
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* NO callchain + children */
static int test3(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Children Self Command Shared Object Symbol
* ======== ======== ======= ============= =======================
* 70.00% 20.00% perf perf [.] main
* 50.00% 0.00% perf perf [.] run_command
* 30.00% 10.00% bash bash [.] main
* 30.00% 10.00% perf perf [.] cmd_record
* 20.00% 0.00% bash libc [.] malloc
* 10.00% 10.00% bash [kernel] [k] page_fault
* 10.00% 10.00% perf [kernel] [k] schedule
* 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
* 10.00% 10.00% perf [kernel] [k] page_fault
* 10.00% 10.00% perf libc [.] free
* 10.00% 10.00% perf libc [.] malloc
* 10.00% 10.00% bash bash [.] xmalloc
*/
struct result expected[] = {
{ 7000, 2000, "perf", "perf", "main" },
{ 5000, 0, "perf", "perf", "run_command" },
{ 3000, 1000, "bash", "bash", "main" },
{ 3000, 1000, "perf", "perf", "cmd_record" },
{ 2000, 0, "bash", "libc", "malloc" },
{ 1000, 1000, "bash", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "[kernel]", "schedule" },
{ 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
{ 1000, 1000, "perf", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "libc", "free" },
{ 1000, 1000, "perf", "libc", "malloc" },
{ 1000, 1000, "bash", "bash", "xmalloc" },
};
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
/* callchain + children */
static int test4(struct perf_evsel *evsel, struct machine *machine)
{
int err;
struct hists *hists = &evsel->hists;
/*
* expected output:
*
* Children Self Command Shared Object Symbol
* ======== ======== ======= ============= =======================
* 70.00% 20.00% perf perf [.] main
* |
* --- main
*
* 50.00% 0.00% perf perf [.] run_command
* |
* --- run_command
* main
*
* 30.00% 10.00% bash bash [.] main
* |
* --- main
*
* 30.00% 10.00% perf perf [.] cmd_record
* |
* --- cmd_record
* run_command
* main
*
* 20.00% 0.00% bash libc [.] malloc
* |
* --- malloc
* |
* |--50.00%-- xmalloc
* | main
* --50.00%-- main
*
* 10.00% 10.00% bash [kernel] [k] page_fault
* |
* --- page_fault
* malloc
* main
*
* 10.00% 10.00% perf [kernel] [k] schedule
* |
* --- schedule
* run_command
* main
*
* 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
* |
* --- sys_perf_event_open
* run_command
* main
*
* 10.00% 10.00% perf [kernel] [k] page_fault
* |
* --- page_fault
* sys_perf_event_open
* run_command
* main
*
* 10.00% 10.00% perf libc [.] free
* |
* --- free
* cmd_record
* run_command
* main
*
* 10.00% 10.00% perf libc [.] malloc
* |
* --- malloc
* cmd_record
* run_command
* main
*
* 10.00% 10.00% bash bash [.] xmalloc
* |
* --- xmalloc
* malloc
* xmalloc <--- NOTE: there's a cycle
* malloc
* xmalloc
* main
*
*/
struct result expected[] = {
{ 7000, 2000, "perf", "perf", "main" },
{ 5000, 0, "perf", "perf", "run_command" },
{ 3000, 1000, "bash", "bash", "main" },
{ 3000, 1000, "perf", "perf", "cmd_record" },
{ 2000, 0, "bash", "libc", "malloc" },
{ 1000, 1000, "bash", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "[kernel]", "schedule" },
{ 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
{ 1000, 1000, "perf", "[kernel]", "page_fault" },
{ 1000, 1000, "perf", "libc", "free" },
{ 1000, 1000, "perf", "libc", "malloc" },
{ 1000, 1000, "bash", "bash", "xmalloc" },
};
struct callchain_result expected_callchain[] = {
{
1, { { "perf", "main" }, },
},
{
2, { { "perf", "run_command" },
{ "perf", "main" }, },
},
{
1, { { "bash", "main" }, },
},
{
3, { { "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" },
{ "bash", "main" }, },
},
{
3, { { "[kernel]", "page_fault" },
{ "libc", "malloc" },
{ "bash", "main" }, },
},
{
3, { { "[kernel]", "schedule" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
3, { { "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "[kernel]", "page_fault" },
{ "[kernel]", "sys_perf_event_open" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "free" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
4, { { "libc", "malloc" },
{ "perf", "cmd_record" },
{ "perf", "run_command" },
{ "perf", "main" }, },
},
{
6, { { "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "libc", "malloc" },
{ "bash", "xmalloc" },
{ "bash", "main" }, },
},
};
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
setup_sorting();
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
if (err < 0)
goto out;
err = do_test(hists, expected, ARRAY_SIZE(expected),
expected_callchain, ARRAY_SIZE(expected_callchain));
out:
del_hist_entries(hists);
reset_output_field();
return err;
}
int test__hists_cumulate(void)
{
int err = TEST_FAIL;
struct machines machines;
struct machine *machine;
struct perf_evsel *evsel;
struct perf_evlist *evlist = perf_evlist__new();
size_t i;
test_fn_t testcases[] = {
test1,
test2,
test3,
test4,
};
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
if (err)
goto out;
machines__init(&machines);
/* setup threads/dso/map/symbols also */
machine = setup_fake_machine(&machines);
if (!machine)
goto out;
if (verbose > 1)
machine__fprintf(machine, stderr);
evsel = perf_evlist__first(evlist);
for (i = 0; i < ARRAY_SIZE(testcases); i++) {
err = testcases[i](evsel, machine);
if (err < 0)
break;
}
out:
/* tear down everything */
perf_evlist__delete(evlist);
machines__exit(&machines);
return err;
}
......@@ -21,33 +21,33 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = 100, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [kernel] page_fault() */
{ .pid = 200, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .pid = 300, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
static int add_hist_entries(struct perf_evlist *evlist,
struct machine *machine __maybe_unused)
{
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
struct perf_sample sample = { .period = 100, };
size_t i;
/*
......@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
/* make sure it has no filter at first */
evsel->hists.thread_filter = NULL;
......@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 100, 1, 0);
if (he == NULL)
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
}
......
......@@ -21,41 +21,41 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
};
static struct sample fake_samples[][5] = {
{
/* perf [perf] run_command() */
{ .pid = 100, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [kernel] page_fault() */
{ .pid = 100, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* perf [kernel] sys_perf_event_open() */
{ .pid = 200, .ip = 0xf0000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
/* bash [libc] free() */
{ .pid = 300, .ip = 0x50000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
},
{
/* perf [libc] free() */
{ .pid = 200, .ip = 0x50000 + 800, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
/* bash [bash] xfee() */
{ .pid = 300, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
/* bash [libc] realloc() */
{ .pid = 300, .ip = 0x50000 + 900, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
},
};
......@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
struct perf_sample sample = { .period = 1, };
size_t i = 0, k;
/*
......@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0);
NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
......@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0);
NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
......
......@@ -22,31 +22,31 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, },
{ .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 700, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 900, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [libc] malloc() */
{ .cpu = 1, .pid = 100, .ip = 0x50000 + 700, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [libc] free() */
{ .cpu = 2, .pid = 100, .ip = 0x50000 + 800, },
{ .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
/* perf [perf] main() */
{ .cpu = 2, .pid = 200, .ip = 0x40000 + 700, },
{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [kernel] page_fault() */
{ .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, },
{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .cpu = 3, .pid = 300, .ip = 0x40000 + 700, },
{ .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .cpu = 0, .pid = 300, .ip = 0x40000 + 800, },
{ .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [kernel] page_fault() */
{ .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, },
{ .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
static int add_hist_entries(struct hists *hists, struct machine *machine)
{
struct addr_location al;
struct hist_entry *he;
struct perf_evsel *evsel = hists_to_evsel(hists);
struct perf_sample sample = { .period = 100, };
size_t i;
......@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
sample.cpu = fake_samples[i].cpu;
sample.pid = fake_samples[i].pid;
......@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
&sample) < 0)
goto out;
he = __hists__add_entry(hists, &al, NULL, NULL, NULL,
sample.period, 1, 0);
if (he == NULL)
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
......
......@@ -45,6 +45,7 @@ int test__hists_filter(void);
int test__mmap_thread_lookup(void);
int test__thread_mg_share(void);
int test__hists_output(void);
int test__hists_cumulate(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
......
......@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
static void hist_browser__update_nr_entries(struct hist_browser *hb);
static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt);
static bool hist_browser__has_filter(struct hist_browser *hb)
......@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
struct hists *hists = browser->hists;
for (nd = rb_first(&hists->entries);
(nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL;
(nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
nd = rb_next(nd)) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
hist_entry__set_folding(he, unfold);
......@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
__hpp__slsmg_color_printf, true); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int \
hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
if (!symbol_conf.cumulate_callchain) { \
int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \
slsmg_printf("%s", hpp->buf); \
\
return ret; \
} \
return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \
__hpp__slsmg_color_printf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
void hist_browser__init_hpp(void)
{
......@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void)
hist_browser__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
hist_browser__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
hist_browser__hpp_color_overhead_acc;
}
static int hist_browser__show_entry(struct hist_browser *browser,
......@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(h->hists);
float percent = 0.0;
float percent;
if (h->filtered)
continue;
if (total)
percent = h->stat.period * 100.0 / total;
percent = hist_entry__get_percent_limit(h);
if (percent < hb->min_pcnt)
continue;
......@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
}
static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt)
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
float percent = hist_entry__get_percent_limit(h);
if (!h->filtered && percent >= min_pcnt)
return nd;
......@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
}
static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt)
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
float percent = hist_entry__get_percent_limit(h);
if (!h->filtered && percent >= min_pcnt)
return nd;
......@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
switch (whence) {
case SEEK_SET:
nd = hists__filter_entries(rb_first(browser->entries),
hb->hists, hb->min_pcnt);
hb->min_pcnt);
break;
case SEEK_CUR:
nd = browser->top;
goto do_offset;
case SEEK_END:
nd = hists__filter_prev_entries(rb_last(browser->entries),
hb->hists, hb->min_pcnt);
hb->min_pcnt);
first = false;
break;
default:
......@@ -913,8 +924,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
break;
}
}
nd = hists__filter_entries(rb_next(nd), hb->hists,
hb->min_pcnt);
nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
if (nd == NULL)
break;
--offset;
......@@ -947,7 +957,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
}
}
nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
nd = hists__filter_prev_entries(rb_prev(nd),
hb->min_pcnt);
if (nd == NULL)
break;
......@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
{
struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
browser->hists,
browser->min_pcnt);
int printed = 0;
......@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
printed += hist_browser__fprintf_entry(browser, h, fp);
nd = hists__filter_entries(rb_next(nd), browser->hists,
browser->min_pcnt);
nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
}
return printed;
......@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
return;
}
while ((nd = hists__filter_entries(nd, hb->hists,
hb->min_pcnt)) != NULL) {
while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
nr_entries++;
nd = rb_next(nd);
}
......
......@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
__percent_color_snprintf, true); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
__percent_color_snprintf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN
......@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
perf_gtk__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
perf_gtk__hpp_color_overhead_acc;
}
static void callchain_list__sym_name(struct callchain_list *cl,
......@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (perf_hpp__should_skip(fmt))
continue;
/*
* XXX no way to determine where symcol column is..
* Just use last column for now.
*/
if (perf_hpp__is_sort_entry(fmt))
sym_col = col_idx;
fmt->header(fmt, &hpp, hists_to_evsel(hists));
gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
......@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter;
u64 total = hists__total_period(h->hists);
float percent = 0.0;
float percent;
if (h->filtered)
continue;
if (total)
percent = h->stat.period * 100.0 / total;
percent = hist_entry__get_percent_limit(h);
if (percent < min_pcnt)
continue;
......@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (symbol_conf.use_callchain && sort__has_sym) {
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = h->stat.period;
total = symbol_conf.cumulate_callchain ?
h->stat_acc->period : h->stat.period;
perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
sym_col, total);
......
......@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
return ret;
}
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent)
{
if (!symbol_conf.cumulate_callchain) {
return snprintf(hpp->buf, hpp->size, "%*s",
fmt_percent ? 8 : 12, "N/A");
}
return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
}
static int field_cmp(u64 field_a, u64 field_b)
{
if (field_a > field_b)
......@@ -160,6 +172,24 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
return ret;
}
static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
hpp_field_fn get_field)
{
s64 ret = 0;
if (symbol_conf.cumulate_callchain) {
/*
* Put caller above callee when they have equal period.
*/
ret = field_cmp(get_field(a), get_field(b));
if (ret)
return ret;
ret = b->callchain->max_depth - a->callchain->max_depth;
}
return ret;
}
#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \
......@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
return __hpp__sort(a, b, he_get_##_field); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
hpp_color_scnprintf, true); \
}
#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \
hpp_entry_scnprintf, true); \
}
#define __HPP_SORT_ACC_FN(_type, _field) \
static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
{ \
return __hpp__sort_acc(a, b, he_get_acc_##_field); \
}
#define __HPP_ENTRY_RAW_FN(_type, _field) \
static u64 he_get_raw_##_field(struct hist_entry *he) \
{ \
......@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \
__HPP_ENTRY_PERCENT_FN(_type, _field) \
__HPP_SORT_FN(_type, _field)
#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
__HPP_SORT_ACC_FN(_type, _field)
#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_ENTRY_RAW_FN(_type, _field) \
__HPP_SORT_RAW_FN(_type, _field)
__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
HPP_RAW_FNS(period, "Period", period, 12, 12)
......@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused,
.sort = hpp__sort_ ## _name, \
}
#define HPP__COLOR_ACC_PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
.width = hpp__width_ ## _name, \
.color = hpp__color_ ## _name, \
.entry = hpp__entry_ ## _name, \
.cmp = hpp__nop_cmp, \
.collapse = hpp__nop_cmp, \
.sort = hpp__sort_ ## _name, \
}
#define HPP__PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
......@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS(overhead_us),
HPP__COLOR_PRINT_FNS(overhead_guest_sys),
HPP__COLOR_PRINT_FNS(overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
HPP__PRINT_FNS(samples),
HPP__PRINT_FNS(period)
};
......@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list);
#undef HPP__COLOR_PRINT_FNS
#undef HPP__COLOR_ACC_PRINT_FNS
#undef HPP__PRINT_FNS
#undef HPP_PERCENT_FNS
#undef HPP_PERCENT_ACC_FNS
#undef HPP_RAW_FNS
#undef __HPP_HEADER_FN
#undef __HPP_WIDTH_FN
#undef __HPP_COLOR_PERCENT_FN
#undef __HPP_ENTRY_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
#undef __HPP_ENTRY_ACC_PERCENT_FN
#undef __HPP_ENTRY_RAW_FN
#undef __HPP_SORT_FN
#undef __HPP_SORT_ACC_FN
#undef __HPP_SORT_RAW_FN
void perf_hpp__init(void)
......@@ -361,6 +447,13 @@ void perf_hpp__init(void)
if (field_order)
return;
if (symbol_conf.cumulate_callchain) {
perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header =
hpp__header_overhead_self;
}
perf_hpp__column_enable(PERF_HPP__OVERHEAD);
if (symbol_conf.show_cpu_utilization) {
......@@ -383,6 +476,12 @@ void perf_hpp__init(void)
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list))
list_add(list, &perf_hpp__sort_list);
if (symbol_conf.cumulate_callchain) {
list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
if (list_empty(list))
list_add(list, &perf_hpp__sort_list);
}
}
void perf_hpp__column_register(struct perf_hpp_fmt *format)
......@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
list_add_tail(&format->list, &perf_hpp__list);
}
void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
{
list_del(&format->list);
}
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
{
list_add_tail(&format->sort_list, &perf_hpp__sort_list);
......@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col)
perf_hpp__column_register(&perf_hpp__format[col]);
}
void perf_hpp__column_disable(unsigned col)
{
BUG_ON(col >= PERF_HPP__MAX_INDEX);
perf_hpp__column_unregister(&perf_hpp__format[col]);
}
void perf_hpp__cancel_cumulate(void)
{
if (field_order)
return;
perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
}
void perf_hpp__setup_output_field(void)
{
struct perf_hpp_fmt *fmt;
......
......@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
{
switch (callchain_param.mode) {
case CHAIN_GRAPH_REL:
return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period,
return callchain__fprintf_graph(fp, &he->sorted_chain,
symbol_conf.cumulate_callchain ?
he->stat_acc->period : he->stat.period,
left_margin);
break;
case CHAIN_GRAPH_ABS:
......@@ -461,12 +463,12 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
float percent = h->stat.period * 100.0 /
hists->stats.total_period;
float percent;
if (h->filtered)
continue;
percent = hist_entry__get_percent_limit(h);
if (percent < min_pcnt)
continue;
......
......@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
if (sample->callchain == NULL)
return 0;
if (symbol_conf.use_callchain || sort__has_parent) {
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
sort__has_parent) {
return machine__resolve_callchain(al->machine, evsel, al->thread,
sample, parent, al, max_stack);
}
......@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
return 0;
return callchain_append(he->callchain, &callchain_cursor, sample->period);
}
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved)
{
al->map = node->map;
al->sym = node->sym;
if (node->map)
al->addr = node->map->map_ip(node->map, node->ip);
else
al->addr = node->ip;
if (al->sym == NULL) {
if (hide_unresolved)
return 0;
if (al->map == NULL)
goto out;
}
if (al->map->groups == &al->machine->kmaps) {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
} else {
al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
al->level = 'g';
}
} else {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_USER;
al->level = '.';
} else if (perf_guest) {
al->cpumode = PERF_RECORD_MISC_GUEST_USER;
al->level = 'u';
} else {
al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
al->level = 'H';
}
}
out:
return 1;
}
......@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
struct perf_evsel *evsel, struct addr_location *al,
int max_stack);
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved);
extern const char record_callchain_help[];
int parse_callchain_report_opt(const char *arg);
static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
struct callchain_cursor *src)
{
*dest = *src;
dest->first = src->curr;
dest->nr -= src->pos;
}
#endif /* __PERF_CALLCHAIN_H */
......@@ -4,6 +4,7 @@
#include "session.h"
#include "sort.h"
#include "evsel.h"
#include "annotate.h"
#include <math.h>
static bool hists__filter_entry_by_dso(struct hists *hists,
......@@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
return true;
he_stat__decay(&he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__decay(he->stat_acc);
diff = prev_period - he->stat.period;
......@@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
* histogram, sorted on item, collects periods
*/
static struct hist_entry *hist_entry__new(struct hist_entry *template)
static struct hist_entry *hist_entry__new(struct hist_entry *template,
bool sample_self)
{
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
size_t callchain_size = 0;
struct hist_entry *he;
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
callchain_size = sizeof(struct callchain_root);
he = zalloc(sizeof(*he) + callchain_size);
if (he != NULL) {
*he = *template;
if (symbol_conf.cumulate_callchain) {
he->stat_acc = malloc(sizeof(he->stat));
if (he->stat_acc == NULL) {
free(he);
return NULL;
}
memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
if (!sample_self)
memset(&he->stat, 0, sizeof(he->stat));
}
if (he->ms.map)
he->ms.map->referenced = true;
......@@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
*/
he->branch_info = malloc(sizeof(*he->branch_info));
if (he->branch_info == NULL) {
free(he->stat_acc);
free(he);
return NULL;
}
......@@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry,
struct addr_location *al)
struct addr_location *al,
bool sample_self)
{
struct rb_node **p;
struct rb_node *parent = NULL;
......@@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
cmp = hist_entry__cmp(he, entry);
if (!cmp) {
he_stat__add_period(&he->stat, period, weight);
if (sample_self)
he_stat__add_period(&he->stat, period, weight);
if (symbol_conf.cumulate_callchain)
he_stat__add_period(he->stat_acc, period, weight);
/*
* This mem info was allocated from sample__resolve_mem
......@@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
p = &(*p)->rb_right;
}
he = hist_entry__new(entry);
he = hist_entry__new(entry, sample_self);
if (!he)
return NULL;
rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, hists->entries_in);
out:
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
if (sample_self)
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
if (symbol_conf.cumulate_callchain)
he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
return he;
}
......@@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *sym_parent,
struct branch_info *bi,
struct mem_info *mi,
u64 period, u64 weight, u64 transaction)
u64 period, u64 weight, u64 transaction,
bool sample_self)
{
struct hist_entry entry = {
.thread = al->thread,
......@@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction,
};
return add_hist_entry(hists, &entry, al);
return add_hist_entry(hists, &entry, al, sample_self);
}
static int
iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct perf_sample *sample = iter->sample;
struct mem_info *mi;
mi = sample__resolve_mem(sample, al);
if (mi == NULL)
return -ENOMEM;
iter->priv = mi;
return 0;
}
static int
iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
u64 cost;
struct mem_info *mi = iter->priv;
struct hist_entry *he;
if (mi == NULL)
return -EINVAL;
cost = iter->sample->weight;
if (!cost)
cost = 1;
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
cost, cost, 0, true);
if (!he)
return -ENOMEM;
iter->he = he;
return 0;
}
static int
iter_finish_mem_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct perf_evsel *evsel = iter->evsel;
struct hist_entry *he = iter->he;
int err = -EINVAL;
if (he == NULL)
goto out;
hists__inc_nr_samples(&evsel->hists, he->filtered);
err = hist_entry__append_callchain(he, iter->sample);
out:
/*
* We don't need to free iter->priv (mem_info) here since
* the mem info was either already freed in add_hist_entry() or
* passed to a new hist entry by hist_entry__new().
*/
iter->priv = NULL;
iter->he = NULL;
return err;
}
static int
iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi;
struct perf_sample *sample = iter->sample;
bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;
iter->curr = 0;
iter->total = sample->branch_stack->nr;
iter->priv = bi;
return 0;
}
static int
iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
/* to avoid calling callback function */
iter->he = NULL;
return 0;
}
static int
iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi = iter->priv;
int i = iter->curr;
if (bi == NULL)
return 0;
if (iter->curr >= iter->total)
return 0;
al->map = bi[i].to.map;
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
return 1;
}
static int
iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct branch_info *bi;
struct perf_evsel *evsel = iter->evsel;
struct hist_entry *he = NULL;
int i = iter->curr;
int err = 0;
bi = iter->priv;
if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
goto out;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
1, 1, 0, true);
if (he == NULL)
return -ENOMEM;
hists__inc_nr_samples(&evsel->hists, he->filtered);
out:
iter->he = he;
iter->curr++;
return err;
}
static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
zfree(&iter->priv);
iter->he = NULL;
return iter->curr >= iter->total ? 0 : -1;
}
static int
iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
return 0;
}
static int
iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry *he;
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, true);
if (he == NULL)
return -ENOMEM;
iter->he = he;
return 0;
}
static int
iter_finish_normal_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
if (he == NULL)
return 0;
iter->he = NULL;
hists__inc_nr_samples(&evsel->hists, he->filtered);
return hist_entry__append_callchain(he, sample);
}
static int
iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
struct hist_entry **he_cache;
callchain_cursor_commit(&callchain_cursor);
/*
* This is for detecting cycles or recursions so that they're
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
if (he_cache == NULL)
return -ENOMEM;
iter->priv = he_cache;
iter->curr = 0;
return 0;
}
static int
iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
int err = 0;
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, true);
if (he == NULL)
return -ENOMEM;
iter->he = he;
he_cache[iter->curr++] = he;
callchain_append(he->callchain, &callchain_cursor, sample->period);
/*
* We need to re-initialize the cursor since callchain_append()
* advanced the cursor to the end.
*/
callchain_cursor_commit(&callchain_cursor);
hists__inc_nr_samples(&evsel->hists, he->filtered);
return err;
}
static int
iter_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct callchain_cursor_node *node;
node = callchain_cursor_current(&callchain_cursor);
if (node == NULL)
return 0;
return fill_callchain_info(al, node, iter->hide_unresolved);
}
static int
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
{
struct perf_evsel *evsel = iter->evsel;
struct perf_sample *sample = iter->sample;
struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
struct hist_entry he_tmp = {
.cpu = al->cpu,
.thread = al->thread,
.comm = thread__comm(al->thread),
.ip = al->addr,
.ms = {
.map = al->map,
.sym = al->sym,
},
.parent = iter->parent,
};
int i;
struct callchain_cursor cursor;
callchain_cursor_snapshot(&cursor, &callchain_cursor);
callchain_cursor_advance(&callchain_cursor);
/*
* Check if there's duplicate entries in the callchain.
* It's possible that it has cycles or recursive calls.
*/
for (i = 0; i < iter->curr; i++) {
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
/* to avoid calling callback function */
iter->he = NULL;
return 0;
}
}
he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction, false);
if (he == NULL)
return -ENOMEM;
iter->he = he;
he_cache[iter->curr++] = he;
callchain_append(he->callchain, &cursor, sample->period);
return 0;
}
static int
iter_finish_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
zfree(&iter->priv);
iter->he = NULL;
return 0;
}
const struct hist_iter_ops hist_iter_mem = {
.prepare_entry = iter_prepare_mem_entry,
.add_single_entry = iter_add_single_mem_entry,
.next_entry = iter_next_nop_entry,
.add_next_entry = iter_add_next_nop_entry,
.finish_entry = iter_finish_mem_entry,
};
const struct hist_iter_ops hist_iter_branch = {
.prepare_entry = iter_prepare_branch_entry,
.add_single_entry = iter_add_single_branch_entry,
.next_entry = iter_next_branch_entry,
.add_next_entry = iter_add_next_branch_entry,
.finish_entry = iter_finish_branch_entry,
};
const struct hist_iter_ops hist_iter_normal = {
.prepare_entry = iter_prepare_normal_entry,
.add_single_entry = iter_add_single_normal_entry,
.next_entry = iter_next_nop_entry,
.add_next_entry = iter_add_next_nop_entry,
.finish_entry = iter_finish_normal_entry,
};
const struct hist_iter_ops hist_iter_cumulative = {
.prepare_entry = iter_prepare_cumulative_entry,
.add_single_entry = iter_add_single_cumulative_entry,
.next_entry = iter_next_cumulative_entry,
.add_next_entry = iter_add_next_cumulative_entry,
.finish_entry = iter_finish_cumulative_entry,
};
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg)
{
int err, err2;
err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
max_stack_depth);
if (err)
return err;
iter->evsel = evsel;
iter->sample = sample;
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
err = iter->ops->add_single_entry(iter, al);
if (err)
goto out;
if (iter->he && iter->add_entry_cb) {
err = iter->add_entry_cb(iter, al, true, arg);
if (err)
goto out;
}
while (iter->ops->next_entry(iter, al)) {
err = iter->ops->add_next_entry(iter, al);
if (err)
break;
if (iter->he && iter->add_entry_cb) {
err = iter->add_entry_cb(iter, al, false, arg);
if (err)
goto out;
}
}
out:
err2 = iter->ops->finish_entry(iter, al);
if (!err)
err = err2;
return err;
}
int64_t
......@@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he)
{
zfree(&he->branch_info);
zfree(&he->mem_info);
zfree(&he->stat_acc);
free_srcline(he->srcline);
free(he);
}
......@@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
if (!cmp) {
he_stat__add_stat(&iter->stat, &he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__add_stat(iter->stat_acc, he->stat_acc);
if (symbol_conf.use_callchain) {
callchain_cursor_reset(&callchain_cursor);
......@@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type)
events_stats__inc(&hists->stats, type);
}
void hists__inc_nr_samples(struct hists *hists, bool filtered)
{
events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
if (!filtered)
hists->stats.nr_non_filtered_samples++;
}
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
......@@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
p = &(*p)->rb_right;
}
he = hist_entry__new(pair);
he = hist_entry__new(pair, true);
if (he) {
memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists;
......
......@@ -96,12 +96,50 @@ struct hists {
u16 col_len[HISTC_NR_COLS];
};
struct hist_entry_iter;
struct hist_iter_ops {
int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
};
struct hist_entry_iter {
int total;
int curr;
bool hide_unresolved;
struct perf_evsel *evsel;
struct perf_sample *sample;
struct hist_entry *he;
struct symbol *parent;
void *priv;
const struct hist_iter_ops *ops;
/* user-defined callback function (optional) */
int (*add_entry_cb)(struct hist_entry_iter *iter,
struct addr_location *al, bool single, void *arg);
};
extern const struct hist_iter_ops hist_iter_normal;
extern const struct hist_iter_ops hist_iter_branch;
extern const struct hist_iter_ops hist_iter_mem;
extern const struct hist_iter_ops hist_iter_cumulative;
struct hist_entry *__hists__add_entry(struct hists *hists,
struct addr_location *al,
struct symbol *parent,
struct branch_info *bi,
struct mem_info *mi, u64 period,
u64 weight, u64 transaction);
u64 weight, u64 transaction,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__transaction_len(void);
......@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists);
void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists, u32 type);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
void events_stats__inc(struct events_stats *stats, u32 type);
size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
......@@ -192,6 +231,7 @@ enum {
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
......@@ -200,7 +240,11 @@ enum {
void perf_hpp__init(void);
void perf_hpp__column_register(struct perf_hpp_fmt *format);
void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
void perf_hpp__column_enable(unsigned col);
void perf_hpp__column_disable(unsigned col);
void perf_hpp__cancel_cumulate(void);
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
void perf_hpp__setup_output_field(void);
void perf_hpp__reset_output_field(void);
......@@ -218,6 +262,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent);
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent);
static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{
......
......@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
};
......@@ -1581,6 +1582,9 @@ void reset_output_field(void)
sort__has_sym = 0;
sort__has_dso = 0;
field_order = NULL;
sort_order = NULL;
reset_dimensions();
perf_hpp__reset_output_field();
}
......@@ -20,7 +20,7 @@
#include "parse-options.h"
#include "parse-events.h"
#include "hist.h"
#include "thread.h"
extern regex_t parent_regex;
......@@ -82,6 +82,7 @@ struct hist_entry {
struct list_head head;
} pairs;
struct he_stat stat;
struct he_stat *stat_acc;
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
......@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair,
list_add_tail(&pair->pairs.node, &he->pairs.head);
}
static inline float hist_entry__get_percent_limit(struct hist_entry *he)
{
u64 period = he->stat.period;
u64 total_period = hists__total_period(he->hists);
if (unlikely(total_period == 0))
return 0;
if (symbol_conf.cumulate_callchain)
period = he->stat_acc->period;
return period * 100.0 / total_period;
}
enum sort_mode {
SORT_MODE__NORMAL,
SORT_MODE__BRANCH,
......
......@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries;
char **vmlinux_path;
struct symbol_conf symbol_conf = {
.use_modules = true,
.try_vmlinux_path = true,
.annotate_src = true,
.demangle = true,
.symfs = "",
.use_modules = true,
.try_vmlinux_path = true,
.annotate_src = true,
.demangle = true,
.cumulate_callchain = true,
.symfs = "",
};
static enum dso_binary_type binary_type_symtab[] = {
......
......@@ -109,6 +109,7 @@ struct symbol_conf {
show_nr_samples,
show_total_period,
use_callchain,
cumulate_callchain,
exclude_other,
show_cpu_utilization,
initialized,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment