Commit 1354ac6a authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - IPC and cycle accounting in 'perf annotate'. (Andi Kleen)

  - Display cycles in branch sort mode in 'perf report'. (Andi Kleen)

  - Add total time column to 'perf trace' syscall stats summary. (Milian Woff)

Infrastructure changes:

  - PMU helpers to use in Intel PT. (Adrian Hunter)

  - Fix perf-with-kcore script not to split args with spaces. (Adrian Hunter)

  - Add empty Build files for some more architectures. (Ben Hutchings)

  - Move 'perf stat' config variables to a struct to allow using some
    of its functions in more places. (Jiri Olsa)

  - Add DWARF register names for 'xtensa' arch. (Max Filippov)

  - Implement BPF programs attached to uprobes. (Wang Nan)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents b6b6c18f 141b2d31
......@@ -243,6 +243,7 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
TRACE_EVENT_FL_KPROBE_BIT,
TRACE_EVENT_FL_UPROBE_BIT,
};
/*
......@@ -257,6 +258,7 @@ enum {
* USE_CALL_FILTER - For trace internal events, don't use file filter
* TRACEPOINT - Event is a tracepoint
* KPROBE - Event is a kprobe
* UPROBE - Event is a uprobe
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
......@@ -267,8 +269,11 @@ enum {
TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
struct trace_event_call {
struct list_head list;
struct trace_event_class *class;
......@@ -542,7 +547,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
event_triggers_post_call(file, tt);
}
#ifdef CONFIG_BPF_SYSCALL
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
......
......@@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
if (event->tp_event->prog)
return -EEXIST;
if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
/* bpf programs can only be attached to kprobes */
if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
/* bpf programs can only be attached to u/kprobes */
return -EINVAL;
prog = bpf_prog_get(prog_fd);
......
......@@ -434,7 +434,7 @@ config UPROBE_EVENT
config BPF_EVENTS
depends on BPF_SYSCALL
depends on KPROBE_EVENT
depends on KPROBE_EVENT || UPROBE_EVENT
bool
default y
help
......
......@@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
{
struct trace_event_call *call = &tu->tp.call;
struct uprobe_trace_entry_head *entry;
struct bpf_prog *prog = call->prog;
struct hlist_head *head;
void *data;
int size, esize;
int rctx;
if (prog && !trace_call_bpf(prog, regs))
return;
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
......@@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return -ENODEV;
}
call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
ret = trace_add_event_call(call);
......
......@@ -109,6 +109,7 @@ OPTIONS
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
- cycles: Cycles in basic block
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
......
......@@ -208,6 +208,27 @@ Default is to monitor all CPUS.
This option sets the time out limit. The default value is 500 ms.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters.
For a full list of modifiers please see the perf record manpage.
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
INTERACTIVE PROMPTING KEYS
--------------------------
......
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
libperf-$(CONFIG_DWARF) += dwarf-regs.o
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (c) 2015 Cadence Design Systems Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include <dwarf-regs.h>
#define XTENSA_MAX_REGS 16
const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
};
const char *get_arch_regstr(unsigned int n)
{
return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
}
......@@ -187,6 +187,7 @@ static void hists__find_annotations(struct hists *hists,
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
zfree(&notes->src->cycles_hist);
zfree(&notes->src);
}
}
......
......@@ -53,6 +53,7 @@ struct report {
bool mem_mode;
bool header;
bool header_only;
bool nonany_branch_mode;
int max_stack;
struct perf_read_values show_threads_values;
const char *pretty_printing_style;
......@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
if (!ui__has_annotation())
return 0;
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
rep->nonany_branch_mode);
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
......@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
/* ??? handle more cases than just ANY? */
if (!(perf_evlist__combined_branch_type(session->evlist) &
PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
return 0;
}
......
This diff is collapsed.
......@@ -40,6 +40,7 @@
#include "util/xyarray.h"
#include "util/sort.h"
#include "util/intlist.h"
#include "util/parse-branch-options.h"
#include "arch/common.h"
#include "util/debug.h"
......@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
return 0;
}
......@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"don't try to adjust column width, use these fixed values"),
OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
"branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_END()
};
const char * const top_usage[] = {
......
......@@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, "\n");
printed += fprintf(fp, " syscall calls min avg max stddev\n");
printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
printed += fprintf(fp, " syscall calls total min avg max stddev\n");
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
/* each int_node is a syscall */
while (inode) {
......@@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
sc = &trace->syscalls.table[inode->i];
printed += fprintf(fp, " %-15s", sc->name);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
n, min, avg);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
n, avg * n, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}
......
......@@ -50,7 +50,7 @@ copy_kcore()
fi
rm -f perf.data.junk
("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
PERF_PID=$!
# Need to make sure that perf has started
......@@ -160,18 +160,18 @@ record()
echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
fi
if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
fi
if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
fi
if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
true
elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
true
elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
......@@ -193,8 +193,8 @@ record()
mkdir "$PERF_DATA_DIR"
echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
"$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
exit 1
......@@ -209,8 +209,8 @@ subcommand()
{
find_perf
check_buildid_cache_permissions
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
}
if [ "$1" = "fix_buildid_cache_permissions" ] ; then
......@@ -234,7 +234,7 @@ fi
case "$PERF_SUB_COMMAND" in
"record")
while [ "$1" != "--" ] ; do
PERF_OPTIONS+="$1 "
PERF_OPTIONS+=("$1")
shift || break
done
if [ "$1" != "--" ] ; then
......@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
usage
fi
shift
record $*
record "$@"
;;
"script")
subcommand $*
subcommand "$@"
;;
"report")
subcommand $*
subcommand "$@"
;;
"inject")
subcommand $*
subcommand "$@"
;;
*)
usage
......
......@@ -16,6 +16,9 @@ struct disasm_line_samples {
u64 nr;
};
#define IPC_WIDTH 6
#define CYCLES_WIDTH 6
struct browser_disasm_line {
struct rb_node rb_node;
u32 idx;
......@@ -53,6 +56,7 @@ struct annotate_browser {
int max_jump_sources;
int nr_jumps;
bool searching_backwards;
bool have_cycles;
u8 addr_width;
u8 jumps_width;
u8 target_width;
......@@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
return ui_browser__set_color(&browser->b, color);
}
static int annotate_browser__pcnt_width(struct annotate_browser *ab)
{
int w = 7 * ab->nr_events;
if (ab->have_cycles)
w += IPC_WIDTH + CYCLES_WIDTH;
return w;
}
static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
......@@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
(!current_entry || (browser->use_navkeypressed &&
!browser->navkeypressed)));
int width = browser->width, printed;
int i, pcnt_width = 7 * ab->nr_events;
int i, pcnt_width = annotate_browser__pcnt_width(ab);
double percent_max = 0.0;
char bf[256];
......@@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
}
if (dl->offset != -1 && percent_max != 0.0) {
for (i = 0; i < ab->nr_events; i++) {
ui_browser__set_percent_color(browser,
bdl->samples[i].percent,
current_entry);
if (annotate_browser__opts.show_total_period)
slsmg_printf("%6" PRIu64 " ",
bdl->samples[i].nr);
else
slsmg_printf("%6.2f ", bdl->samples[i].percent);
if (percent_max != 0.0) {
for (i = 0; i < ab->nr_events; i++) {
ui_browser__set_percent_color(browser,
bdl->samples[i].percent,
current_entry);
if (annotate_browser__opts.show_total_period)
slsmg_printf("%6" PRIu64 " ",
bdl->samples[i].nr);
else
slsmg_printf("%6.2f ", bdl->samples[i].percent);
}
} else {
slsmg_write_nstring(" ", 7 * ab->nr_events);
}
} else {
ui_browser__set_percent_color(browser, 0, current_entry);
slsmg_write_nstring(" ", pcnt_width);
slsmg_write_nstring(" ", 7 * ab->nr_events);
}
if (ab->have_cycles) {
if (dl->ipc)
slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
else
slsmg_write_nstring(" ", IPC_WIDTH);
if (dl->cycles)
slsmg_printf("%*" PRIu64 " ",
CYCLES_WIDTH - 1, dl->cycles);
else
slsmg_write_nstring(" ", CYCLES_WIDTH);
}
SLsmg_write_char(' ');
......@@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
unsigned int from, to;
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
u8 pcnt_width = 7;
u8 pcnt_width = annotate_browser__pcnt_width(ab);
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
......@@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
to = (u64)btarget->idx;
}
pcnt_width *= ab->nr_events;
ui_browser__set_color(browser, HE_COLORSET_CODE);
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
from, to);
......@@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
int ret = ui_browser__list_head_refresh(browser);
int pcnt_width;
pcnt_width = 7 * ab->nr_events;
int pcnt_width = annotate_browser__pcnt_width(ab);
if (annotate_browser__opts.jump_arrows)
annotate_browser__draw_current_jump(browser);
......@@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
max_percent = bpos->samples[i].percent;
}
if (max_percent < 0.01) {
if (max_percent < 0.01 && pos->ipc == 0) {
RB_CLEAR_NODE(&bpos->rb_node);
continue;
}
......@@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
return map_symbol__tui_annotate(&he->ms, evsel, hbt);
}
static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
{
unsigned n_insn = 0;
u64 offset;
for (offset = start; offset <= end; offset++) {
if (browser->offsets[offset])
n_insn++;
}
return n_insn;
}
static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
struct cyc_hist *ch)
{
unsigned n_insn;
u64 offset;
n_insn = count_insn(browser, start, end);
if (n_insn && ch->num && ch->cycles) {
float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
/* Hide data when there are too many overlaps. */
if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
return;
for (offset = start; offset <= end; offset++) {
struct disasm_line *dl = browser->offsets[offset];
if (dl)
dl->ipc = ipc;
}
}
}
/*
* This should probably be in util/annotate.c to share with the tty
* annotate, but right now we need the per byte offsets arrays,
* which are only here.
*/
static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
struct symbol *sym)
{
u64 offset;
struct annotation *notes = symbol__annotation(sym);
if (!notes->src || !notes->src->cycles_hist)
return;
pthread_mutex_lock(&notes->lock);
for (offset = 0; offset < size; ++offset) {
struct cyc_hist *ch;
ch = &notes->src->cycles_hist[offset];
if (ch && ch->cycles) {
struct disasm_line *dl;
if (ch->have_start)
count_and_fill(browser, ch->start, offset, ch);
dl = browser->offsets[offset];
if (dl && ch->num_aggr)
dl->cycles = ch->cycles_aggr / ch->num_aggr;
browser->have_cycles = true;
}
}
pthread_mutex_unlock(&notes->lock);
}
static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
size_t size)
{
......@@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
}
annotate_browser__mark_jump_targets(&browser, size);
annotate__compute_ipc(&browser, size, sym);
browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
browser.max_addr_width = hex_width(sym->end);
......
......@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
return 0;
}
/* The cycles histogram is lazily allocated. */
static int symbol__alloc_hist_cycles(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
const size_t size = symbol__size(sym);
notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
if (notes->src->cycles_hist == NULL)
return -1;
return 0;
}
void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
pthread_mutex_lock(&notes->lock);
if (notes->src != NULL)
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
if (notes->src->cycles_hist)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
}
pthread_mutex_unlock(&notes->lock);
}
static int __symbol__account_cycles(struct annotation *notes,
u64 start,
unsigned offset, unsigned cycles,
unsigned have_start)
{
struct cyc_hist *ch;
ch = notes->src->cycles_hist;
/*
* For now we can only account one basic block per
* final jump. But multiple could be overlapping.
* Always account the longest one. So when
* a shorter one has been already seen throw it away.
*
* We separately always account the full cycles.
*/
ch[offset].num_aggr++;
ch[offset].cycles_aggr += cycles;
if (!have_start && ch[offset].have_start)
return 0;
if (ch[offset].num) {
if (have_start && (!ch[offset].have_start ||
ch[offset].start > start)) {
ch[offset].have_start = 0;
ch[offset].cycles = 0;
ch[offset].num = 0;
if (ch[offset].reset < 0xffff)
ch[offset].reset++;
} else if (have_start &&
ch[offset].start < start)
return 0;
}
ch[offset].have_start = have_start;
ch[offset].start = start;
ch[offset].cycles += cycles;
ch[offset].num++;
return 0;
}
static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
struct annotation *notes, int evidx, u64 addr)
{
......@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
return 0;
}
static struct annotation *symbol__get_annotation(struct symbol *sym)
static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
{
struct annotation *notes = symbol__annotation(sym);
......@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
if (symbol__alloc_hist(sym) < 0)
return NULL;
}
if (!notes->src->cycles_hist && cycles) {
if (symbol__alloc_hist_cycles(sym) < 0)
return NULL;
}
return notes;
}
......@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
if (sym == NULL)
return 0;
notes = symbol__get_annotation(sym);
notes = symbol__get_annotation(sym, false);
if (notes == NULL)
return -ENOMEM;
return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
}
static int symbol__account_cycles(u64 addr, u64 start,
struct symbol *sym, unsigned cycles)
{
struct annotation *notes;
unsigned offset;
if (sym == NULL)
return 0;
notes = symbol__get_annotation(sym, true);
if (notes == NULL)
return -ENOMEM;
if (addr < sym->start || addr >= sym->end)
return -ERANGE;
if (start) {
if (start < sym->start || start >= sym->end)
return -ERANGE;
if (start >= addr)
start = 0;
}
offset = addr - sym->start;
return __symbol__account_cycles(notes,
start ? start - sym->start : 0,
offset, cycles,
!!start);
}
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
unsigned cycles)
{
unsigned long saddr = 0;
int err;
if (!cycles)
return 0;
/*
* Only set start when IPC can be computed. We can only
* compute it when the basic block is completely in a single
* function.
* Special case the case when the jump is elsewhere, but
* it starts on the function start.
*/
if (start &&
(start->sym == ams->sym ||
(ams->sym &&
start->addr == ams->sym->start + ams->map->start)))
saddr = start->al_addr;
if (saddr == 0)
pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
ams->addr,
start ? start->addr : 0,
ams->sym ? ams->sym->start + ams->map->start : 0,
saddr);
err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
if (err)
pr_debug2("account_cycles failed %d\n", err);
return err;
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
{
return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
......
......@@ -59,6 +59,8 @@ struct disasm_line {
char *name;
struct ins *ins;
int line_nr;
float ipc;
u64 cycles;
struct ins_operands ops;
};
......@@ -79,6 +81,17 @@ struct sym_hist {
u64 addr[0];
};
struct cyc_hist {
u64 start;
u64 cycles;
u64 cycles_aggr;
u32 num;
u32 num_aggr;
u8 have_start;
/* 1 byte padding */
u16 reset;
};
struct source_line_samples {
double percent;
double percent_sum;
......@@ -97,6 +110,7 @@ struct source_line {
* @histogram: Array of addr hit histograms per event being monitored
* @lines: If 'print_lines' is specified, per source code line percentages
* @source: source parsed from a disassembler like objdump -dS
* @cyc_hist: Average cycles per basic block
*
* lines is allocated, percentages calculated and all sorted by percentage
* when the annotation is about to be presented, so the percentages are for
......@@ -109,6 +123,7 @@ struct annotated_source {
struct source_line *lines;
int nr_histograms;
int sizeof_sym_hist;
struct cyc_hist *cycles_hist;
struct sym_hist histograms[0];
};
......@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
struct addr_map_symbol *start,
unsigned cycles);
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym);
......
......@@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
struct itrace_synth_opts *synth_opts = opt->value;
const char *p;
char *endptr;
bool period_type_set = false;
synth_opts->set = true;
......@@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'i':
synth_opts->period_type =
PERF_ITRACE_PERIOD_INSTRUCTIONS;
period_type_set = true;
break;
case 't':
synth_opts->period_type =
PERF_ITRACE_PERIOD_TICKS;
period_type_set = true;
break;
case 'm':
synth_opts->period *= 1000;
......@@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
goto out_err;
synth_opts->period_type =
PERF_ITRACE_PERIOD_NANOSECS;
period_type_set = true;
break;
case '\0':
goto out;
......@@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
out:
if (synth_opts->instructions) {
if (!synth_opts->period_type)
if (!period_type_set)
synth_opts->period_type =
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
if (!synth_opts->period)
......
......@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
return ret;
}
int veprintf(int level, int var, const char *fmt, va_list args)
{
return _eprintf(level, var, fmt, args);
}
int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
......
......@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);
......
......@@ -134,7 +134,8 @@ struct branch_flags {
u64 predicted:1;
u64 in_tx:1;
u64 abort:1;
u64 reserved:60;
u64 cycles:16;
u64 reserved:44;
};
struct branch_entry {
......
......@@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
return __perf_evlist__combined_sample_type(evlist);
}
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
u64 branch_type = 0;
evlist__for_each(evlist, evsel)
branch_type |= evsel->attr.branch_sample_type;
return branch_type;
}
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
......
......@@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
u64 perf_evlist__read_format(struct perf_evlist *evlist);
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
......
......@@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
1, 1, 0, true);
1, bi->flags.cycles ? bi->flags.cycles : 1,
0, true);
if (he == NULL)
return -ENOMEM;
......@@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
return 0;
}
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode)
{
struct branch_info *bi;
/* If we have branch cycles always annotate them. */
if (bs && bs->nr && bs->entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
if (bi) {
struct addr_map_symbol *prev = NULL;
/*
* Ignore errors, still want to process the
* other entries.
*
* For non standard branch modes always
* force no IPC (prev == NULL)
*
* Note that perf stores branches reversed from
* program order!
*/
for (i = bs->nr - 1; i >= 0; i--) {
addr_map_symbol__account_cycles(&bi[i].from,
nonany_branch_mode ? NULL : prev,
bi[i].flags.cycles);
prev = &bi[i].to;
}
free(bi);
}
}
}
size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
{
......
......@@ -47,6 +47,7 @@ enum hist_column {
HISTC_MEM_SNOOP,
HISTC_MEM_DCACHELINE,
HISTC_TRANSACTION,
HISTC_CYCLES,
HISTC_NR_COLS, /* Last entry */
};
......@@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
unsigned int hists__sort_list_width(struct hists *hists);
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode);
struct option;
int parse_filter_percentage(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused);
......
......@@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
* Maximum error index indent, we will cut
* the event string if it's bigger.
*/
int max_err_idx = 10;
int max_err_idx = 13;
/*
* Let's be specific with the message when
......
......@@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
}
static struct perf_pmu_format *
pmu_find_format(struct list_head *formats, char *name)
pmu_find_format(struct list_head *formats, const char *name)
{
struct perf_pmu_format *format;
......@@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
return NULL;
}
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
{
struct perf_pmu_format *format = pmu_find_format(formats, name);
__u64 bits = 0;
int fbit;
if (!format)
return 0;
for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
bits |= 1ULL << fbit;
return bits;
}
/*
* Sets value based on the format definition (format parameter)
* and unformated value (value parameter).
......@@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
}
}
static __u64 pmu_format_max_value(const unsigned long *format)
{
int w;
w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
if (!w)
return 0;
if (w < 64)
return (1ULL << w) - 1;
return -1;
}
/*
* Term is a string term, and might be a param-term. Try to look up it's value
* in the remaining terms.
......@@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
{
struct perf_pmu_format *format;
__u64 *vp;
__u64 val;
__u64 val, max_val;
/*
* If this is a parameter we've already used for parameterized-eval,
......@@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
} else
return -EINVAL;
max_val = pmu_format_max_value(format->bits);
if (val > max_val) {
if (err) {
err->idx = term->err_val;
if (asprintf(&err->str,
"value too big for format, maximum is %llu",
(unsigned long long)max_val) < 0)
err->str = strdup("value too big for format");
return -EINVAL;
}
/*
* Assume we don't care if !err, in which case the value will be
* silently truncated.
*/
}
pmu_format_value(format->bits, val, vp, zero);
return 0;
}
......
......@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *error);
__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
struct perf_pmu_info *info);
struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
......
......@@ -106,6 +106,8 @@ struct variable_list {
struct strlist *vars; /* Available variables */
};
struct map;
/* Command string to events */
extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev);
......
......@@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
i, sample->branch_stack->entries[i].from,
sample->branch_stack->entries[i].to);
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &sample->branch_stack->entries[i];
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
i, e->from, e->to,
e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved);
}
}
static void regs_dump__printf(u64 mask, u64 *regs)
......
......@@ -9,7 +9,7 @@ regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char default_sort_order[] = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
......@@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
}
static int64_t
sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->branch_info->flags.cycles -
right->branch_info->flags.cycles;
}
static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
if (he->branch_info->flags.cycles == 0)
return repsep_snprintf(bf, size, "%-*s", width, "-");
return repsep_snprintf(bf, size, "%-*hd", width,
he->branch_info->flags.cycles);
}
struct sort_entry sort_cycles = {
.se_header = "Basic Block Cycles",
.se_cmp = sort__cycles_cmp,
.se_snprintf = hist_entry__cycles_snprintf,
.se_width_idx = HISTC_CYCLES,
};
/* --sort daddr_sym */
static int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
......@@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
DIM(SORT_ABORT, "abort", sort_abort),
DIM(SORT_CYCLES, "cycles", sort_cycles),
};
#undef DIM
......
......@@ -185,6 +185,7 @@ enum sort_type {
SORT_MISPREDICT,
SORT_ABORT,
SORT_IN_TX,
SORT_CYCLES,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
......
......@@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
perf_evsel__reset_counts(evsel);
}
}
static void zero_per_pkg(struct perf_evsel *counter)
{
if (counter->per_pkg_mask)
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
int s;
*skip = false;
if (!counter->per_pkg)
return 0;
if (cpu_map__empty(cpus))
return 0;
if (!mask) {
mask = zalloc(MAX_NR_CPUS);
if (!mask)
return -ENOMEM;
counter->per_pkg_mask = mask;
}
s = cpu_map__get_socket(cpus, cpu);
if (s < 0)
return -1;
*skip = test_and_set_bit(s, mask) == 1;
return 0;
}
static int
process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
int cpu, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
if (check_per_pkg(evsel, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
if (skip)
count = &zero;
switch (config->aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
perf_evsel__compute_deltas(evsel, cpu, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if (config->aggr_mode == AGGR_NONE)
perf_stat__update_shadow_stats(evsel, count->values, cpu);
break;
case AGGR_GLOBAL:
aggr->val += count->val;
if (config->scale) {
aggr->ena += count->ena;
aggr->run += count->run;
}
default:
break;
}
return 0;
}
static int process_counter_maps(struct perf_stat_config *config,
struct perf_evsel *counter)
{
int nthreads = thread_map__nr(counter->threads);
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;
if (counter->system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
if (process_counter_values(config, counter, cpu, thread,
perf_counts(counter->counts, cpu, thread)))
return -1;
}
}
return 0;
}
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter)
{
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i, ret;
aggr->val = aggr->ena = aggr->run = 0;
init_stats(ps->res_stats);
if (counter->per_pkg)
zero_per_pkg(counter);
ret = process_counter_maps(config, counter);
if (ret)
return ret;
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
if (!counter->snapshot)
perf_evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
if (verbose) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter), count[0], count[1], count[2]);
}
/*
* Save the full runtime - to allow normalization during printout:
*/
perf_stat__update_shadow_stats(counter, count, 0);
return 0;
}
......@@ -50,6 +50,13 @@ struct perf_counts {
struct xyarray *values;
};
struct perf_stat_config {
enum aggr_mode aggr_mode;
bool scale;
FILE *output;
unsigned int interval;
};
static inline struct perf_counts_values*
perf_counts(struct perf_counts *counts, int cpu, int thread)
{
......@@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct perf_evlist *evlist);
void perf_evlist__reset_stats(struct perf_evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment