Commit 9b261365 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core

Pull perf/core improvements and fixes from Jiri Olsa:

  * Add support to accumulate hist periods (Namhyung Kim)
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e450f90e 0506aecc
......@@ -111,7 +111,7 @@ OPTIONS
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
......@@ -163,6 +163,11 @@ OPTIONS
Default: fractal,0.5,callee,function.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
......
......@@ -119,7 +119,7 @@ Default is to monitor all CPUS.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended
......@@ -161,6 +161,12 @@ Default is to monitor all CPUS.
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
......
......@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
......
......@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
true);
if (he == NULL)
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);
return ret;
}
......
......@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction)
{
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
transaction) != NULL)
transaction, true) != NULL)
return 0;
return -ENOMEM;
}
......
......@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL);
return 0;
}
if (!strcmp(var, "report.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb);
}
......@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
*/
if (he->stat.nr_events == 1)
rep->nr_entries++;
/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct symbol *parent = NULL;
struct hist_entry *he;
struct mem_info *mi, *mx;
uint64_t cost;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
int err = 0;
struct report *rep = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
struct mem_info *mi;
struct branch_info *bi;
mi = sample__resolve_mem(sample, al);
if (!mi)
return -ENOMEM;
report__inc_stats(rep, he);
if (rep->hide_unresolved && !al->sym)
if (!ui__has_annotation())
return 0;
cost = sample->weight;
if (!cost)
cost = 1;
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
cost, cost, 0);
if (!he)
return -ENOMEM;
if (ui__has_annotation()) {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
if (err)
goto out;
mx = he->mem_info;
err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
err = hist_entry__append_callchain(he, sample);
out:
return err;
}
static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
struct symbol *parent = NULL;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
err = -ENOMEM;
/* overwrite the 'al' to branch-to info */
al->map = bi[i].to.map;
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
1, 1, 0);
if (he) {
if (ui__has_annotation()) {
bx = he->branch_info;
err = addr_map_symbol__inc_samples(&bx->from,
evsel->idx);
if (err)
goto out;
err = addr_map_symbol__inc_samples(&bx->to,
evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
} else
} else if (rep->mem_mode) {
mi = he->mem_info;
err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
if (err)
goto out;
}
err = 0;
out:
free(bi);
return err;
}
static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
struct addr_location *al, struct perf_sample *sample)
{
struct symbol *parent = NULL;
struct hist_entry *he;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
if (err)
return err;
he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
if (he == NULL)
return -ENOMEM;
err = hist_entry__append_callchain(he, sample);
if (err)
goto out;
if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
report__inc_stats(rep, he);
} else if (symbol_conf.cumulate_callchain) {
if (single)
err = hist_entry__inc_addr_samples(he, evsel->idx,
al->addr);
} else {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}
out:
return err;
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
{
struct report *rep = container_of(tool, struct report, tool);
struct addr_location al;
struct hist_entry_iter iter = {
.hide_unresolved = rep->hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
......@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
if (sort__mode == SORT_MODE__BRANCH) {
ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding lbr entry, skipping event\n");
} else if (rep->mem_mode == 1) {
ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding mem entry, skipping event\n");
} else {
if (al.map != NULL)
al.map->dso->hit = 1;
if (sort__mode == SORT_MODE__BRANCH)
iter.ops = &hist_iter_branch;
else if (rep->mem_mode)
iter.ops = &hist_iter_mem;
else if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
if (al.map != NULL)
al.map->dso->hit = 1;
ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
ret = report__add_hist_entry(rep, evsel, &al, sample);
if (ret < 0)
pr_debug("problem incrementing symbol period, skipping event\n");
}
return ret;
}
......@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
}
}
if (symbol_conf.cumulate_callchain) {
/* Silently ignore if callchain is missing */
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
}
if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
......@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
......@@ -804,8 +719,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (branch_mode == -1 && has_br_stack)
if (branch_mode == -1 && has_br_stack) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
......@@ -813,6 +730,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
goto error;
}
sort__mode = SORT_MODE__MEMORY;
symbol_conf.cumulate_callchain = false;
}
if (setup_sorting() < 0) {
......
......@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
int err = 0;
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
......
......@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
pthread_mutex_unlock(&notes->lock);
/*
* This function is now called with he->hists->lock held.
* Release it before going to sleep.
*/
pthread_mutex_unlock(&he->hists->lock);
if (err == -ERANGE && !he->ms.map->erange_warned)
ui__warn_map_erange(he->ms.map, sym, ip);
else if (err == -ENOMEM) {
......@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
sym->name);
sleep(1);
}
pthread_mutex_lock(&he->hists->lock);
}
static void perf_top__show_details(struct perf_top *top)
......@@ -238,27 +246,6 @@ static void perf_top__show_details(struct perf_top *top)
pthread_mutex_unlock(&notes->lock);
}
static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample)
{
struct hist_entry *he;
pthread_mutex_lock(&evsel->hists.lock);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
pthread_mutex_unlock(&evsel->hists.lock);
if (he == NULL)
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
evsel->hists.stats.nr_non_filtered_samples++;
return he;
}
static void perf_top__print_sym_table(struct perf_top *top)
{
char bf[160];
......@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
return 0;
}
static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct perf_top *top = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
if (sort__has_sym && single) {
u64 ip = al->addr;
if (al->map)
ip = al->map->unmap_ip(al->map, ip);
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
return 0;
}
static void perf_event__process_sample(struct perf_tool *tool,
const union perf_event *event,
struct perf_evsel *evsel,
......@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
struct machine *machine)
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
struct symbol *parent = NULL;
u64 ip = sample->ip;
struct addr_location al;
int err;
......@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
}
if (al.sym == NULL || !al.sym->ignore) {
struct hist_entry *he;
struct hist_entry_iter iter = {
.add_entry_cb = hist_iter__top_callback,
};
err = sample__resolve_callchain(sample, &parent, evsel, &al,
top->max_stack);
if (err)
return;
if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;
he = perf_evsel__add_hist_entry(evsel, &al, sample);
if (he == NULL) {
pr_err("Problem incrementing symbol period, skipping event\n");
return;
}
pthread_mutex_lock(&evsel->hists.lock);
err = hist_entry__append_callchain(he, sample);
if (err)
return;
err = hist_entry_iter__add(&iter, &al, evsel, sample,
top->max_stack, top);
if (err < 0)
pr_err("Problem incrementing symbol period, skipping event\n");
if (sort__has_sym)
perf_top__record_precise_ip(top, he, evsel->idx, ip);
pthread_mutex_unlock(&evsel->hists.lock);
}
return;
......@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb)
if (!strcmp(var, "top.call-graph"))
return record_parse_callchain(value, &top->record_opts);
if (!strcmp(var, "top.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}
return perf_default_config(var, value, cb);
}
......@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK(0, "call-graph", &top.record_opts,
"mode[,dump_size]", record_callchain_help,
&parse_callchain_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
......@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
top.sym_evsel = perf_evlist__first(top.evlist);
if (!symbol_conf.use_callchain) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
......
......@@ -139,6 +139,10 @@ static struct test {
.desc = "Test output sorting of hist entries",
.func = test__hists_output,
},
{
.desc = "Test cumulation of child hist entries",
.func = test__hists_cumulate,
},
{
.func = NULL,
},
......
......@@ -12,9 +12,9 @@ static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
{ FAKE_PID_PERF1, "perf" },
{ FAKE_PID_PERF2, "perf" },
{ FAKE_PID_BASH, "bash" },
};
static struct {
......@@ -22,15 +22,15 @@ static struct {
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
{ FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
{ FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
{ FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
{ FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
{ FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
{ FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
{ FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
};
struct fake_sym {
......@@ -40,27 +40,30 @@ struct fake_sym {
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
};
static struct {
......@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines)
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.len = FAKE_MAP_LENGTH,
.pgoff = 0ULL,
},
};
......@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists)
he = rb_entry(node, struct hist_entry, rb_node);
if (!he->filtered) {
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
i, thread__comm_str(he->thread), he->thread->tid,
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
he->ms.sym->name, he->stat.period,
he->stat_acc ? he->stat_acc->period : 0);
}
i++;
......
......@@ -4,6 +4,34 @@
struct machine;
struct machines;
#define FAKE_PID_PERF1 100
#define FAKE_PID_PERF2 200
#define FAKE_PID_BASH 300
#define FAKE_MAP_PERF 0x400000
#define FAKE_MAP_BASH 0x400000
#define FAKE_MAP_LIBC 0x500000
#define FAKE_MAP_KERNEL 0xf00000
#define FAKE_MAP_LENGTH 0x100000
#define FAKE_SYM_OFFSET1 700
#define FAKE_SYM_OFFSET2 800
#define FAKE_SYM_OFFSET3 900
#define FAKE_SYM_LENGTH 100
#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
/*
* The setup_fake_machine() provides a test environment which consists
* of 3 processes that have 3 mappings and in turn, have 3 symbols
......@@ -13,7 +41,7 @@ struct machines;
* ............. ............. ...................
* perf: 100 perf main
* perf: 100 perf run_command
* perf: 100 perf comd_record
* perf: 100 perf cmd_record
* perf: 100 libc malloc
* perf: 100 libc free
* perf: 100 libc realloc
......@@ -22,7 +50,7 @@ struct machines;
* perf: 100 [kernel] sys_perf_event_open
* perf: 200 perf main
* perf: 200 perf run_command
* perf: 200 perf comd_record
* perf: 200 perf cmd_record
* perf: 200 libc malloc
* perf: 200 libc free
* perf: 200 libc realloc
......
This diff is collapsed.
......@@ -21,33 +21,33 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = 100, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [kernel] page_fault() */
{ .pid = 200, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .pid = 300, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
static int add_hist_entries(struct perf_evlist *evlist,
struct machine *machine __maybe_unused)
{
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
struct perf_sample sample = { .period = 100, };
size_t i;
/*
......@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
/* make sure it has no filter at first */
evsel->hists.thread_filter = NULL;
......@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 100, 1, 0);
if (he == NULL)
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
}
......
......@@ -21,41 +21,41 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
};
static struct sample fake_samples[][5] = {
{
/* perf [perf] run_command() */
{ .pid = 100, .ip = 0x40000 + 800, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [kernel] page_fault() */
{ .pid = 100, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* perf [kernel] sys_perf_event_open() */
{ .pid = 200, .ip = 0xf0000 + 900, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
/* bash [libc] free() */
{ .pid = 300, .ip = 0x50000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
},
{
/* perf [libc] free() */
{ .pid = 200, .ip = 0x50000 + 800, },
{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
/* bash [bash] xfee() */
{ .pid = 300, .ip = 0x40000 + 900, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
/* bash [libc] realloc() */
{ .pid = 300, .ip = 0x50000 + 900, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
{ .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
},
};
......@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
struct perf_sample sample = { .period = 1, };
size_t i = 0, k;
/*
......@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0);
NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
......@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 1, 1, 0);
NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
......
......@@ -22,31 +22,31 @@ struct sample {
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, },
{ .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 700, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
{ .cpu = 1, .pid = 100, .ip = 0x40000 + 900, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* perf [libc] malloc() */
{ .cpu = 1, .pid = 100, .ip = 0x50000 + 700, },
{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [libc] free() */
{ .cpu = 2, .pid = 100, .ip = 0x50000 + 800, },
{ .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
/* perf [perf] main() */
{ .cpu = 2, .pid = 200, .ip = 0x40000 + 700, },
{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [kernel] page_fault() */
{ .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, },
{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* bash [bash] main() */
{ .cpu = 3, .pid = 300, .ip = 0x40000 + 700, },
{ .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
/* bash [bash] xmalloc() */
{ .cpu = 0, .pid = 300, .ip = 0x40000 + 800, },
{ .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [kernel] page_fault() */
{ .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, },
{ .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
};
static int add_hist_entries(struct hists *hists, struct machine *machine)
{
struct addr_location al;
struct hist_entry *he;
struct perf_evsel *evsel = hists_to_evsel(hists);
struct perf_sample sample = { .period = 100, };
size_t i;
......@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
.misc = PERF_RECORD_MISC_USER,
},
};
struct hist_entry_iter iter = {
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
sample.cpu = fake_samples[i].cpu;
sample.pid = fake_samples[i].pid;
......@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
&sample) < 0)
goto out;
he = __hists__add_entry(hists, &al, NULL, NULL, NULL,
sample.period, 1, 0);
if (he == NULL)
if (hist_entry_iter__add(&iter, &al, evsel, &sample,
PERF_MAX_STACK_DEPTH, NULL) < 0)
goto out;
fake_samples[i].thread = al.thread;
......
......@@ -45,6 +45,7 @@ int test__hists_filter(void);
int test__mmap_thread_lookup(void);
int test__thread_mg_share(void);
int test__hists_output(void);
int test__hists_cumulate(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
......
......@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
static void hist_browser__update_nr_entries(struct hist_browser *hb);
static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt);
static bool hist_browser__has_filter(struct hist_browser *hb)
......@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
struct hists *hists = browser->hists;
for (nd = rb_first(&hists->entries);
(nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL;
(nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
nd = rb_next(nd)) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
hist_entry__set_folding(he, unfold);
......@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
__hpp__slsmg_color_printf, true); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int \
hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
if (!symbol_conf.cumulate_callchain) { \
int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \
slsmg_printf("%s", hpp->buf); \
\
return ret; \
} \
return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \
__hpp__slsmg_color_printf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
void hist_browser__init_hpp(void)
{
......@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void)
hist_browser__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
hist_browser__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
hist_browser__hpp_color_overhead_acc;
}
static int hist_browser__show_entry(struct hist_browser *browser,
......@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(h->hists);
float percent = 0.0;
float percent;
if (h->filtered)
continue;
if (total)
percent = h->stat.period * 100.0 / total;
percent = hist_entry__get_percent_limit(h);
if (percent < hb->min_pcnt)
continue;
......@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
}
static struct rb_node *hists__filter_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt)
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
float percent = hist_entry__get_percent_limit(h);
if (!h->filtered && percent >= min_pcnt)
return nd;
......@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
}
static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
struct hists *hists,
float min_pcnt)
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
u64 total = hists__total_period(hists);
float percent = 0.0;
if (total)
percent = h->stat.period * 100.0 / total;
float percent = hist_entry__get_percent_limit(h);
if (!h->filtered && percent >= min_pcnt)
return nd;
......@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
switch (whence) {
case SEEK_SET:
nd = hists__filter_entries(rb_first(browser->entries),
hb->hists, hb->min_pcnt);
hb->min_pcnt);
break;
case SEEK_CUR:
nd = browser->top;
goto do_offset;
case SEEK_END:
nd = hists__filter_prev_entries(rb_last(browser->entries),
hb->hists, hb->min_pcnt);
hb->min_pcnt);
first = false;
break;
default:
......@@ -913,8 +924,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
break;
}
}
nd = hists__filter_entries(rb_next(nd), hb->hists,
hb->min_pcnt);
nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
if (nd == NULL)
break;
--offset;
......@@ -947,7 +957,7 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
}
}
nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
nd = hists__filter_prev_entries(rb_prev(nd),
hb->min_pcnt);
if (nd == NULL)
break;
......@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
{
struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
browser->hists,
browser->min_pcnt);
int printed = 0;
......@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
printed += hist_browser__fprintf_entry(browser, h, fp);
nd = hists__filter_entries(rb_next(nd), browser->hists,
browser->min_pcnt);
nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
}
return printed;
......@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
return;
}
while ((nd = hists__filter_entries(nd, hb->hists,
hb->min_pcnt)) != NULL) {
while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
nr_entries++;
nd = rb_next(nd);
}
......
......@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
__percent_color_snprintf, true); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
__percent_color_snprintf, true); \
}
__HPP_COLOR_PERCENT_FN(overhead, period)
__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
#undef __HPP_COLOR_PERCENT_FN
......@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_guest_sys;
perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
perf_gtk__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
perf_gtk__hpp_color_overhead_acc;
}
static void callchain_list__sym_name(struct callchain_list *cl,
......@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (perf_hpp__should_skip(fmt))
continue;
/*
* XXX no way to determine where symcol column is..
* Just use last column for now.
*/
if (perf_hpp__is_sort_entry(fmt))
sym_col = col_idx;
fmt->header(fmt, &hpp, hists_to_evsel(hists));
gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
......@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter;
u64 total = hists__total_period(h->hists);
float percent = 0.0;
float percent;
if (h->filtered)
continue;
if (total)
percent = h->stat.period * 100.0 / total;
percent = hist_entry__get_percent_limit(h);
if (percent < min_pcnt)
continue;
......@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
if (symbol_conf.use_callchain && sort__has_sym) {
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = h->stat.period;
total = symbol_conf.cumulate_callchain ?
h->stat_acc->period : h->stat.period;
perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
sym_col, total);
......
......@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
return ret;
}
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent)
{
if (!symbol_conf.cumulate_callchain) {
return snprintf(hpp->buf, hpp->size, "%*s",
fmt_percent ? 8 : 12, "N/A");
}
return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
}
static int field_cmp(u64 field_a, u64 field_b)
{
if (field_a > field_b)
......@@ -160,6 +172,24 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
return ret;
}
static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
hpp_field_fn get_field)
{
s64 ret = 0;
if (symbol_conf.cumulate_callchain) {
/*
* Put caller above callee when they have equal period.
*/
ret = field_cmp(get_field(a), get_field(b));
if (ret)
return ret;
ret = b->callchain->max_depth - a->callchain->max_depth;
}
return ret;
}
#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, \
......@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
return __hpp__sort(a, b, he_get_##_field); \
}
#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
static u64 he_get_acc_##_field(struct hist_entry *he) \
{ \
return he->stat_acc->_field; \
} \
\
static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
hpp_color_scnprintf, true); \
}
#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \
hpp_entry_scnprintf, true); \
}
#define __HPP_SORT_ACC_FN(_type, _field) \
static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
{ \
return __hpp__sort_acc(a, b, he_get_acc_##_field); \
}
#define __HPP_ENTRY_RAW_FN(_type, _field) \
static u64 he_get_raw_##_field(struct hist_entry *he) \
{ \
......@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \
__HPP_ENTRY_PERCENT_FN(_type, _field) \
__HPP_SORT_FN(_type, _field)
#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
__HPP_SORT_ACC_FN(_type, _field)
#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
__HPP_ENTRY_RAW_FN(_type, _field) \
__HPP_SORT_RAW_FN(_type, _field)
__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
HPP_RAW_FNS(period, "Period", period, 12, 12)
......@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused,
.sort = hpp__sort_ ## _name, \
}
#define HPP__COLOR_ACC_PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
.width = hpp__width_ ## _name, \
.color = hpp__color_ ## _name, \
.entry = hpp__entry_ ## _name, \
.cmp = hpp__nop_cmp, \
.collapse = hpp__nop_cmp, \
.sort = hpp__sort_ ## _name, \
}
#define HPP__PRINT_FNS(_name) \
{ \
.header = hpp__header_ ## _name, \
......@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS(overhead_us),
HPP__COLOR_PRINT_FNS(overhead_guest_sys),
HPP__COLOR_PRINT_FNS(overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
HPP__PRINT_FNS(samples),
HPP__PRINT_FNS(period)
};
......@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list);
#undef HPP__COLOR_PRINT_FNS
#undef HPP__COLOR_ACC_PRINT_FNS
#undef HPP__PRINT_FNS
#undef HPP_PERCENT_FNS
#undef HPP_PERCENT_ACC_FNS
#undef HPP_RAW_FNS
#undef __HPP_HEADER_FN
#undef __HPP_WIDTH_FN
#undef __HPP_COLOR_PERCENT_FN
#undef __HPP_ENTRY_PERCENT_FN
#undef __HPP_COLOR_ACC_PERCENT_FN
#undef __HPP_ENTRY_ACC_PERCENT_FN
#undef __HPP_ENTRY_RAW_FN
#undef __HPP_SORT_FN
#undef __HPP_SORT_ACC_FN
#undef __HPP_SORT_RAW_FN
void perf_hpp__init(void)
......@@ -361,6 +447,13 @@ void perf_hpp__init(void)
if (field_order)
return;
if (symbol_conf.cumulate_callchain) {
perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header =
hpp__header_overhead_self;
}
perf_hpp__column_enable(PERF_HPP__OVERHEAD);
if (symbol_conf.show_cpu_utilization) {
......@@ -383,6 +476,12 @@ void perf_hpp__init(void)
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list))
list_add(list, &perf_hpp__sort_list);
if (symbol_conf.cumulate_callchain) {
list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
if (list_empty(list))
list_add(list, &perf_hpp__sort_list);
}
}
void perf_hpp__column_register(struct perf_hpp_fmt *format)
......@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
list_add_tail(&format->list, &perf_hpp__list);
}
void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
{
list_del(&format->list);
}
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
{
list_add_tail(&format->sort_list, &perf_hpp__sort_list);
......@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col)
perf_hpp__column_register(&perf_hpp__format[col]);
}
void perf_hpp__column_disable(unsigned col)
{
BUG_ON(col >= PERF_HPP__MAX_INDEX);
perf_hpp__column_unregister(&perf_hpp__format[col]);
}
void perf_hpp__cancel_cumulate(void)
{
if (field_order)
return;
perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
}
void perf_hpp__setup_output_field(void)
{
struct perf_hpp_fmt *fmt;
......
......@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
{
switch (callchain_param.mode) {
case CHAIN_GRAPH_REL:
return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period,
return callchain__fprintf_graph(fp, &he->sorted_chain,
symbol_conf.cumulate_callchain ?
he->stat_acc->period : he->stat.period,
left_margin);
break;
case CHAIN_GRAPH_ABS:
......@@ -461,12 +463,12 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
float percent = h->stat.period * 100.0 /
hists->stats.total_period;
float percent;
if (h->filtered)
continue;
percent = hist_entry__get_percent_limit(h);
if (percent < min_pcnt)
continue;
......
......@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
if (sample->callchain == NULL)
return 0;
if (symbol_conf.use_callchain || sort__has_parent) {
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
sort__has_parent) {
return machine__resolve_callchain(al->machine, evsel, al->thread,
sample, parent, al, max_stack);
}
......@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
return 0;
return callchain_append(he->callchain, &callchain_cursor, sample->period);
}
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved)
{
al->map = node->map;
al->sym = node->sym;
if (node->map)
al->addr = node->map->map_ip(node->map, node->ip);
else
al->addr = node->ip;
if (al->sym == NULL) {
if (hide_unresolved)
return 0;
if (al->map == NULL)
goto out;
}
if (al->map->groups == &al->machine->kmaps) {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
} else {
al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
al->level = 'g';
}
} else {
if (machine__is_host(al->machine)) {
al->cpumode = PERF_RECORD_MISC_USER;
al->level = '.';
} else if (perf_guest) {
al->cpumode = PERF_RECORD_MISC_GUEST_USER;
al->level = 'u';
} else {
al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
al->level = 'H';
}
}
out:
return 1;
}
......@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
struct perf_evsel *evsel, struct addr_location *al,
int max_stack);
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved);
extern const char record_callchain_help[];
int parse_callchain_report_opt(const char *arg);
static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
struct callchain_cursor *src)
{
*dest = *src;
dest->first = src->curr;
dest->nr -= src->pos;
}
#endif /* __PERF_CALLCHAIN_H */
This diff is collapsed.
......@@ -96,12 +96,50 @@ struct hists {
u16 col_len[HISTC_NR_COLS];
};
struct hist_entry_iter;
struct hist_iter_ops {
int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
};
struct hist_entry_iter {
int total;
int curr;
bool hide_unresolved;
struct perf_evsel *evsel;
struct perf_sample *sample;
struct hist_entry *he;
struct symbol *parent;
void *priv;
const struct hist_iter_ops *ops;
/* user-defined callback function (optional) */
int (*add_entry_cb)(struct hist_entry_iter *iter,
struct addr_location *al, bool single, void *arg);
};
extern const struct hist_iter_ops hist_iter_normal;
extern const struct hist_iter_ops hist_iter_branch;
extern const struct hist_iter_ops hist_iter_mem;
extern const struct hist_iter_ops hist_iter_cumulative;
struct hist_entry *__hists__add_entry(struct hists *hists,
struct addr_location *al,
struct symbol *parent,
struct branch_info *bi,
struct mem_info *mi, u64 period,
u64 weight, u64 transaction);
u64 weight, u64 transaction,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__transaction_len(void);
......@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists);
void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists, u32 type);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
void events_stats__inc(struct events_stats *stats, u32 type);
size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
......@@ -192,6 +231,7 @@ enum {
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
......@@ -200,7 +240,11 @@ enum {
void perf_hpp__init(void);
void perf_hpp__column_register(struct perf_hpp_fmt *format);
void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
void perf_hpp__column_enable(unsigned col);
void perf_hpp__column_disable(unsigned col);
void perf_hpp__cancel_cumulate(void);
void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
void perf_hpp__setup_output_field(void);
void perf_hpp__reset_output_field(void);
......@@ -218,6 +262,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent);
int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt,
hpp_snprint_fn print_fn, bool fmt_percent);
static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{
......
......@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
};
......@@ -1581,6 +1582,9 @@ void reset_output_field(void)
sort__has_sym = 0;
sort__has_dso = 0;
field_order = NULL;
sort_order = NULL;
reset_dimensions();
perf_hpp__reset_output_field();
}
......@@ -20,7 +20,7 @@
#include "parse-options.h"
#include "parse-events.h"
#include "hist.h"
#include "thread.h"
extern regex_t parent_regex;
......@@ -82,6 +82,7 @@ struct hist_entry {
struct list_head head;
} pairs;
struct he_stat stat;
struct he_stat *stat_acc;
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
......@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair,
list_add_tail(&pair->pairs.node, &he->pairs.head);
}
static inline float hist_entry__get_percent_limit(struct hist_entry *he)
{
u64 period = he->stat.period;
u64 total_period = hists__total_period(he->hists);
if (unlikely(total_period == 0))
return 0;
if (symbol_conf.cumulate_callchain)
period = he->stat_acc->period;
return period * 100.0 / total_period;
}
enum sort_mode {
SORT_MODE__NORMAL,
SORT_MODE__BRANCH,
......
......@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries;
char **vmlinux_path;
struct symbol_conf symbol_conf = {
.use_modules = true,
.try_vmlinux_path = true,
.annotate_src = true,
.demangle = true,
.symfs = "",
.use_modules = true,
.try_vmlinux_path = true,
.annotate_src = true,
.demangle = true,
.cumulate_callchain = true,
.symfs = "",
};
static enum dso_binary_type binary_type_symtab[] = {
......
......@@ -109,6 +109,7 @@ struct symbol_conf {
show_nr_samples,
show_total_period,
use_callchain,
cumulate_callchain,
exclude_other,
show_cpu_utilization,
initialized,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment