Commit bea95c15 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'perf/hw-branch-sampling' into perf/core

Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents f9b4eeb8 24bff2dc
...@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) ...@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
{ {
int err; int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) { switch (event->attr.type) {
case PERF_TYPE_RAW: case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
......
...@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) ...@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0; int err = 0;
atomic_t *active_events = &armpmu->active_events; atomic_t *active_events = &armpmu->active_events;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (armpmu->map_event(event) == -ENOENT) if (armpmu->map_event(event) == -ENOENT)
return -ENOENT; return -ENOENT;
......
...@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) ...@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
{ {
int err = 0; int err = 0;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) { switch (event->attr.type) {
case PERF_TYPE_RAW: case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
......
...@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) ...@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu) if (!ppmu)
return -ENOENT; return -ENOENT;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) { switch (event->attr.type) {
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
ev = event->attr.config; ev = event->attr.config;
......
...@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) ...@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
{ {
int err; int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) { switch (event->attr.type) {
case PERF_TYPE_RAW: case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE: case PERF_TYPE_HW_CACHE:
......
...@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) ...@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0) if (atomic_read(&nmi_active) < 0)
return -ENODEV; return -ENODEV;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (attr->type) { switch (attr->type) {
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events) if (attr->config >= sparc_pmu->max_events)
......
...@@ -56,6 +56,13 @@ ...@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7 #define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_IA32_PERF_CAPABILITIES 0x00000345
......
...@@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) ...@@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0; return 0;
} }
/*
* check that branch_sample_type is compatible with
* settings needed for precise_ip > 1 which implies
* using the LBR to capture ALL taken branches at the
* priv levels of the measurement
*/
static inline int precise_br_compat(struct perf_event *event)
{
u64 m = event->attr.branch_sample_type;
u64 b = 0;
/* must capture all branches */
if (!(m & PERF_SAMPLE_BRANCH_ANY))
return 0;
m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_user)
b |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
b |= PERF_SAMPLE_BRANCH_KERNEL;
/*
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
*/
return m == b;
}
int x86_pmu_hw_config(struct perf_event *event) int x86_pmu_hw_config(struct perf_event *event)
{ {
if (event->attr.precise_ip) { if (event->attr.precise_ip) {
...@@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) ...@@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise) if (event->attr.precise_ip > precise)
return -EOPNOTSUPP; return -EOPNOTSUPP;
/*
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
if (event->attr.precise_ip > 1) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
if (!precise_br_compat(event))
return -EOPNOTSUPP;
/* branch_sample_type is compatible */
} else {
/*
* user did not specify branch_sample_type
*
* For PEBS fixups, we capture all
* the branches at the priv level of the
* event.
*/
*br_type = PERF_SAMPLE_BRANCH_ANY;
if (!event->attr.exclude_user)
*br_type |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
}
}
} }
/* /*
...@@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) ...@@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */ /* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE; event->hw.extra_reg.idx = EXTRA_REG_NONE;
/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event); return x86_pmu.hw_config(event);
} }
...@@ -1607,6 +1671,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { ...@@ -1607,6 +1671,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL, NULL,
}; };
static void x86_pmu_flush_branch_stack(void)
{
if (x86_pmu.flush_branch_stack)
x86_pmu.flush_branch_stack();
}
static struct pmu pmu = { static struct pmu pmu = {
.pmu_enable = x86_pmu_enable, .pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable, .pmu_disable = x86_pmu_disable,
...@@ -1626,6 +1696,7 @@ static struct pmu pmu = { ...@@ -1626,6 +1696,7 @@ static struct pmu pmu = {
.commit_txn = x86_pmu_commit_txn, .commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx, .event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
}; };
void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
......
...@@ -33,6 +33,7 @@ enum extra_reg_type { ...@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */ EXTRA_REG_MAX /* number of entries needed */
}; };
...@@ -130,6 +131,8 @@ struct cpu_hw_events { ...@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context; void *lbr_context;
struct perf_branch_stack lbr_stack; struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;
/* /*
* Intel host/guest exclude bits * Intel host/guest exclude bits
...@@ -344,6 +347,7 @@ struct x86_pmu { ...@@ -344,6 +347,7 @@ struct x86_pmu {
void (*cpu_starting)(int cpu); void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu); void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu); void (*cpu_dead)(int cpu);
void (*flush_branch_stack)(void);
/* /*
* Intel Arch Perfmon v2+ * Intel Arch Perfmon v2+
...@@ -365,6 +369,8 @@ struct x86_pmu { ...@@ -365,6 +369,8 @@ struct x86_pmu {
*/ */
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */ int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
/* /*
* Extra registers for events * Extra registers for events
...@@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint; ...@@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained; extern struct event_constraint unconstrained;
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
#ifdef CONFIG_CPU_SUP_AMD #ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void); int amd_pmu_init(void);
...@@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); ...@@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void); void intel_pmu_lbr_init_atom(void);
void intel_pmu_lbr_init_snb(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
int p4_pmu_init(void); int p4_pmu_init(void);
int p6_pmu_init(void); int p6_pmu_init(void);
......
...@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) ...@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret) if (ret)
return ret; return ret;
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (event->attr.exclude_host && event->attr.exclude_guest) if (event->attr.exclude_host && event->attr.exclude_guest)
/* /*
* When HO == GO == 1 the hardware treats that as GO == HO == 0 * When HO == GO == 1 the hardware treats that as GO == HO == 0
......
...@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids ...@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
}, },
}; };
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
if (has_branch_stack(event))
return true;
/* implicit branch sampling to correct PEBS skid */
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
return true;
return false;
}
static void intel_pmu_disable_all(void) static void intel_pmu_disable_all(void)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) ...@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
/*
* must disable before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_disable(event);
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc); intel_pmu_disable_fixed(hwc);
return; return;
...@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) ...@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config); intel_pmu_enable_bts(hwc->config);
return; return;
} }
/*
* must enabled before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_enable(event);
if (event->attr.exclude_host) if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
...@@ -1058,6 +1084,9 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -1058,6 +1084,9 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
data.period = event->hw.last_period; data.period = event->hw.last_period;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, regs)) if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0); x86_pmu_stop(event, 0);
} }
...@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) ...@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/ */
static struct event_constraint * static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event,
struct hw_perf_event_extra *reg)
{ {
struct event_constraint *c = &emptyconstraint; struct event_constraint *c = &emptyconstraint;
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era; struct er_account *era;
unsigned long flags; unsigned long flags;
int orig_idx = reg->idx; int orig_idx = reg->idx;
/* already allocated shared msr */ /* already allocated shared msr */
if (reg->alloc) if (reg->alloc)
return &unconstrained; return NULL; /* call x86_get_event_constraint() */
again: again:
era = &cpuc->shared_regs->regs[reg->idx]; era = &cpuc->shared_regs->regs[reg->idx];
...@@ -1157,14 +1186,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, ...@@ -1157,14 +1186,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
reg->alloc = 1; reg->alloc = 1;
/* /*
* All events using extra_reg are unconstrained. * need to call x86_get_event_constraint()
* Avoids calling x86_get_event_constraints() * to check if associated event has constraints
*
* Must revisit if extra_reg controlling events
* ever have constraints. Worst case we go through
* the regular event constraint table.
*/ */
c = &unconstrained; c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) { } else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags); raw_spin_unlock_irqrestore(&era->lock, flags);
goto again; goto again;
...@@ -1201,11 +1226,23 @@ static struct event_constraint * ...@@ -1201,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc, intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event)
{ {
struct event_constraint *c = NULL; struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;
if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
c = __intel_shared_reg_get_constraints(cpuc, event);
xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c; return c;
} }
...@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, ...@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg; reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE) if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg); __intel_shared_reg_put_constraints(cpuc, reg);
reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
} }
static void intel_put_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
...@@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event) ...@@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->hw.config = alt_config; event->hw.config = alt_config;
} }
if (intel_pmu_needs_lbr_smpl(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
}
if (event->attr.type != PERF_TYPE_RAW) if (event->attr.type != PERF_TYPE_RAW)
return 0; return 0;
...@@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) ...@@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{ {
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
if (!x86_pmu.extra_regs) if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK; return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu); cpuc->shared_regs = allocate_shared_regs(cpu);
...@@ -1455,9 +1502,12 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -1455,9 +1502,12 @@ static void intel_pmu_cpu_starting(int cpu)
*/ */
intel_pmu_lbr_reset(); intel_pmu_lbr_reset();
if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) cpuc->lbr_sel = NULL;
if (!cpuc->shared_regs)
return; return;
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
for_each_cpu(i, topology_thread_cpumask(cpu)) { for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc; struct intel_shared_regs *pc;
...@@ -1468,9 +1518,12 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -1468,9 +1518,12 @@ static void intel_pmu_cpu_starting(int cpu)
break; break;
} }
} }
cpuc->shared_regs->core_id = core_id; cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++; cpuc->shared_regs->refcnt++;
}
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
} }
static void intel_pmu_cpu_dying(int cpu) static void intel_pmu_cpu_dying(int cpu)
...@@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) ...@@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu); fini_debug_store_on_cpu(cpu);
} }
static void intel_pmu_flush_branch_stack(void)
{
/*
* Intel LBR does not tag entries with the
* PID of the current task, then we need to
* flush it on ctxsw
* For now, we simply reset it
*/
if (x86_pmu.lbr_nr)
intel_pmu_lbr_reset();
}
static __initconst const struct x86_pmu intel_pmu = { static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel", .name = "Intel",
.handle_irq = intel_pmu_handle_irq, .handle_irq = intel_pmu_handle_irq,
...@@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { ...@@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting, .cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying, .cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs, .guest_get_msrs = intel_guest_get_msrs,
.flush_branch_stack = intel_pmu_flush_branch_stack,
}; };
static __init void intel_clovertown_quirk(void) static __init void intel_clovertown_quirk(void)
...@@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void) ...@@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids)); sizeof(hw_cache_event_ids));
intel_pmu_lbr_init_nhm(); intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/insn.h>
#include "perf_event.h" #include "perf_event.h"
...@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) ...@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx; cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_enable(event);
} }
void intel_pmu_pebs_disable(struct perf_event *event) void intel_pmu_pebs_disable(struct perf_event *event)
...@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) ...@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT; hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_disable(event);
} }
void intel_pmu_pebs_enable_all(void) void intel_pmu_pebs_enable_all(void)
...@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) ...@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0); wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
} }
#include <asm/insn.h>
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this * both formats and we don't use the other fields in this
* routine. * routine.
*/ */
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs; struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data; struct perf_sample_data data;
struct pt_regs regs; struct pt_regs regs;
...@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else else
regs.flags &= ~PERF_EFLAGS_EXACT; regs.flags &= ~PERF_EFLAGS_EXACT;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, &regs)) if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0); x86_pmu_stop(event, 0);
} }
......
This diff is collapsed.
...@@ -129,10 +129,39 @@ enum perf_event_sample_format { ...@@ -129,10 +129,39 @@ enum perf_event_sample_format {
PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_STREAM_ID = 1U << 9,
PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_RAW = 1U << 10,
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
}; };
/*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
* not have to match. Branch priv level is checked for permissions.
*
* The branch types can be combined, however BRANCH_ANY covers all types
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
PERF_SAMPLE_BRANCH_HV)
/* /*
* The format of the data returned by read() on a perf event fd, * The format of the data returned by read() on a perf event fd,
* as specified by attr.read_format: * as specified by attr.read_format:
...@@ -163,6 +192,8 @@ enum perf_event_read_format { ...@@ -163,6 +192,8 @@ enum perf_event_read_format {
}; };
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
/* /*
* Hardware event_id to monitor via a performance monitoring event: * Hardware event_id to monitor via a performance monitoring event:
...@@ -240,6 +271,7 @@ struct perf_event_attr { ...@@ -240,6 +271,7 @@ struct perf_event_attr {
__u64 bp_len; __u64 bp_len;
__u64 config2; /* extension of config1 */ __u64 config2; /* extension of config1 */
}; };
__u64 branch_sample_type; /* enum branch_sample_type */
}; };
/* /*
...@@ -458,6 +490,8 @@ enum perf_event_type { ...@@ -458,6 +490,8 @@ enum perf_event_type {
* *
* { u32 size; * { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW * char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
* }; * };
*/ */
PERF_RECORD_SAMPLE = 9, PERF_RECORD_SAMPLE = 9,
...@@ -530,12 +564,34 @@ struct perf_raw_record { ...@@ -530,12 +564,34 @@ struct perf_raw_record {
void *data; void *data;
}; };
/*
* single taken branch record layout:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
* mispred: branch target was mispredicted
* predicted: branch target was predicted
*
* support for mispred, predicted is optional. In case it
* is not supported mispred = predicted = 0.
*/
struct perf_branch_entry { struct perf_branch_entry {
__u64 from; __u64 from;
__u64 to; __u64 to;
__u64 flags; __u64 mispred:1, /* target mispredicted */
predicted:1,/* target predicted */
reserved:62;
}; };
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
*/
struct perf_branch_stack { struct perf_branch_stack {
__u64 nr; __u64 nr;
struct perf_branch_entry entries[0]; struct perf_branch_entry entries[0];
...@@ -566,7 +622,9 @@ struct hw_perf_event { ...@@ -566,7 +622,9 @@ struct hw_perf_event {
unsigned long event_base; unsigned long event_base;
int idx; int idx;
int last_cpu; int last_cpu;
struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg;
}; };
struct { /* software */ struct { /* software */
struct hrtimer hrtimer; struct hrtimer hrtimer;
...@@ -690,6 +748,11 @@ struct pmu { ...@@ -690,6 +748,11 @@ struct pmu {
* if no implementation is provided it will default to: event->hw.idx + 1. * if no implementation is provided it will default to: event->hw.idx + 1.
*/ */
int (*event_idx) (struct perf_event *event); /*optional */ int (*event_idx) (struct perf_event *event); /*optional */
/*
* flush branch stack on context-switches (needed in cpu-wide mode)
*/
void (*flush_branch_stack) (void);
}; };
/** /**
...@@ -923,7 +986,8 @@ struct perf_event_context { ...@@ -923,7 +986,8 @@ struct perf_event_context {
u64 parent_gen; u64 parent_gen;
u64 generation; u64 generation;
int pin_count; int pin_count;
int nr_cgroups; /* cgroup events present */ int nr_cgroups; /* cgroup evts */
int nr_branch_stack; /* branch_stack evt */
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };
...@@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, ...@@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
extern u64 perf_event_read_value(struct perf_event *event, extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running); u64 *enabled, u64 *running);
struct perf_sample_data { struct perf_sample_data {
u64 type; u64 type;
...@@ -1007,12 +1072,14 @@ struct perf_sample_data { ...@@ -1007,12 +1072,14 @@ struct perf_sample_data {
u64 period; u64 period;
struct perf_callchain_entry *callchain; struct perf_callchain_entry *callchain;
struct perf_raw_record *raw; struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
}; };
static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
{ {
data->addr = addr; data->addr = addr;
data->raw = NULL; data->raw = NULL;
data->br_stack = NULL;
} }
extern void perf_output_sample(struct perf_output_handle *handle, extern void perf_output_sample(struct perf_output_handle *handle,
...@@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); ...@@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
# define perf_instruction_pointer(regs) instruction_pointer(regs) # define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif #endif
static inline bool has_branch_stack(struct perf_event *event)
{
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
}
extern int perf_output_begin(struct perf_output_handle *handle, extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size); struct perf_event *event, unsigned int size);
extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_end(struct perf_output_handle *handle);
......
...@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) ...@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
PERF_FLAG_FD_OUTPUT |\ PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP) PERF_FLAG_PID_CGROUP)
/*
* branch priv levels that need permission checks
*/
#define PERF_SAMPLE_BRANCH_PERM_PLM \
(PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
enum event_type_t { enum event_type_t {
EVENT_FLEXIBLE = 0x1, EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2, EVENT_PINNED = 0x2,
...@@ -130,6 +137,7 @@ enum event_type_t { ...@@ -130,6 +137,7 @@ enum event_type_t {
*/ */
struct static_key_deferred perf_sched_events __read_mostly; struct static_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly; static atomic_t nr_comm_events __read_mostly;
...@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ...@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (is_cgroup_event(event)) if (is_cgroup_event(event))
ctx->nr_cgroups++; ctx->nr_cgroups++;
if (has_branch_stack(event))
ctx->nr_branch_stack++;
list_add_rcu(&event->event_entry, &ctx->event_list); list_add_rcu(&event->event_entry, &ctx->event_list);
if (!ctx->nr_events) if (!ctx->nr_events)
perf_pmu_rotate_start(ctx->pmu); perf_pmu_rotate_start(ctx->pmu);
...@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) ...@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
cpuctx->cgrp = NULL; cpuctx->cgrp = NULL;
} }
if (has_branch_stack(event))
ctx->nr_branch_stack--;
ctx->nr_events--; ctx->nr_events--;
if (event->attr.inherit_stat) if (event->attr.inherit_stat)
ctx->nr_stat--; ctx->nr_stat--;
...@@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, ...@@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
perf_pmu_rotate_start(ctx->pmu); perf_pmu_rotate_start(ctx->pmu);
} }
/*
* When sampling the branck stack in system-wide, it may be necessary
* to flush the stack on context switch. This happens when the branch
* stack does not tag its entries with the pid of the current task.
* Otherwise it becomes impossible to associate a branch entry with a
* task. This ambiguity is more likely to appear when the branch stack
* supports priv level filtering and the user sets it to monitor only
* at the user level (which could be a useful measurement in system-wide
* mode). In that case, the risk is high of having a branch stack with
* branch from multiple tasks. Flushing may mean dropping the existing
* entries or stashing them somewhere in the PMU specific code layer.
*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when there is at least one system-wide context
* with at least one active event using taken branch sampling.
*/
static void perf_branch_stack_sched_in(struct task_struct *prev,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
unsigned long flags;
/* no need to flush branch stack if not changing task */
if (prev == task)
return;
local_irq_save(flags);
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
/*
* check if the context has at least one
* event using PERF_SAMPLE_BRANCH_STACK
*/
if (cpuctx->ctx.nr_branch_stack > 0
&& pmu->flush_branch_stack) {
pmu = cpuctx->ctx.pmu;
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
pmu->flush_branch_stack();
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
}
rcu_read_unlock();
local_irq_restore(flags);
}
/* /*
* Called from scheduler to add the events of the current task * Called from scheduler to add the events of the current task
* with interrupts disabled. * with interrupts disabled.
...@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, ...@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
*/ */
if (atomic_read(&__get_cpu_var(perf_cgroup_events))) if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
perf_cgroup_sched_in(prev, task); perf_cgroup_sched_in(prev, task);
/* check for system-wide branch_stack events */
if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
perf_branch_stack_sched_in(prev, task);
} }
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
...@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event) ...@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event)
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
static_key_slow_dec_deferred(&perf_sched_events); static_key_slow_dec_deferred(&perf_sched_events);
} }
if (has_branch_stack(event)) {
static_key_slow_dec_deferred(&perf_sched_events);
/* is system-wide event */
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_dec(&per_cpu(perf_branch_stack_events,
event->cpu));
}
} }
if (event->rb) { if (event->rb) {
...@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
} }
} }
} }
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) {
size_t size;
size = data->br_stack->nr
* sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr);
perf_output_copy(handle, data->br_stack->entries, size);
} else {
/*
* we always store at least the value of nr
*/
u64 nr = 0;
perf_output_put(handle, nr);
}
}
} }
void perf_prepare_sample(struct perf_event_header *header, void perf_prepare_sample(struct perf_event_header *header,
...@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
WARN_ON_ONCE(size & (sizeof(u64)-1)); WARN_ON_ONCE(size & (sizeof(u64)-1));
header->size += size; header->size += size;
} }
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->br_stack) {
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
header->size += size;
}
} }
static void perf_event_output(struct perf_event *event, static void perf_event_output(struct perf_event *event,
...@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) ...@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_SOFTWARE) if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT; return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event_id) { switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK: case PERF_COUNT_SW_CPU_CLOCK:
case PERF_COUNT_SW_TASK_CLOCK: case PERF_COUNT_SW_TASK_CLOCK:
...@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) ...@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_TRACEPOINT) if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -ENOENT; return -ENOENT;
/*
* no branch sampling for tracepoint events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
err = perf_trace_init(event); err = perf_trace_init(event);
if (err) if (err)
return err; return err;
...@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) ...@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
return -ENOENT; return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event); perf_swevent_init_hrtimer(event);
return 0; return 0;
...@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) ...@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
return -ENOENT; return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event); perf_swevent_init_hrtimer(event);
return 0; return 0;
...@@ -5866,6 +6003,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, ...@@ -5866,6 +6003,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
return ERR_PTR(err); return ERR_PTR(err);
} }
} }
if (has_branch_stack(event)) {
static_key_slow_inc(&perf_sched_events.key);
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_inc(&per_cpu(perf_branch_stack_events,
event->cpu));
}
} }
return event; return event;
...@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, ...@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->read_format & ~(PERF_FORMAT_MAX-1)) if (attr->read_format & ~(PERF_FORMAT_MAX-1))
return -EINVAL; return -EINVAL;
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
u64 mask = attr->branch_sample_type;
/* only using defined bits */
if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
return -EINVAL;
/* at least one branch bit must be set */
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
return -EINVAL;
/* kernel level capture: check permissions */
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
/* propagate priv level, when not set for branch */
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
/* exclude_kernel checked on syscall entry */
if (!attr->exclude_kernel)
mask |= PERF_SAMPLE_BRANCH_KERNEL;
if (!attr->exclude_user)
mask |= PERF_SAMPLE_BRANCH_USER;
if (!attr->exclude_hv)
mask |= PERF_SAMPLE_BRANCH_HV;
/*
* adjust user setting (for HW filter setup)
*/
attr->branch_sample_type = mask;
}
}
out: out:
return ret; return ret;
......
...@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) ...@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
if (bp->attr.type != PERF_TYPE_BREAKPOINT) if (bp->attr.type != PERF_TYPE_BREAKPOINT)
return -ENOENT; return -ENOENT;
/*
* no branch sampling for breakpoint events
*/
if (has_branch_stack(bp))
return -EOPNOTSUPP;
err = register_perf_hw_breakpoint(bp); err = register_perf_hw_breakpoint(bp);
if (err) if (err)
return err; return err;
......
...@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha ...@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
corresponding events, i.e., they always refer to events defined earlier on the command corresponding events, i.e., they always refer to events defined earlier on the command
line. line.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters. The
following filters are defined:
- any: any type of branches
- any_call: any function call or system call
- any_ret: any function return or system call return
- any_ind: any indirect branch
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
+
The option requires at least one branch type among any, any_call, any_ret, ind_call.
The privilege levels may be ommitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-stat[1], linkperf:perf-list[1] linkperf:perf-stat[1], linkperf:perf-list[1]
...@@ -153,6 +153,16 @@ OPTIONS ...@@ -153,6 +153,16 @@ OPTIONS
information which may be very large and thus may clutter the display. information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system. It currently includes: cpu and numa topology of the host system.
-b::
--branch-stack::
Use the addresses of sampled taken branches instead of the instruction
address to build the histograms. To generate meaningful output, the
perf.data file must have been obtained using perf record -b or
perf record --branch-filter xxx where xxx is a branch filter option.
perf report is able to auto-detect whether a perf.data file contains
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-stat[1], linkperf:perf-annotate[1] linkperf:perf-stat[1], linkperf:perf-annotate[1]
...@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) ...@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (!have_tracepoints(&evsel_list->entries)) if (!have_tracepoints(&evsel_list->entries))
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
if (!rec->opts.branch_stack)
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
if (!rec->file_new) { if (!rec->file_new) {
err = perf_session__read_header(session, output); err = perf_session__read_header(session, output);
if (err < 0) if (err < 0)
...@@ -638,6 +641,90 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) ...@@ -638,6 +641,90 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
return err; return err;
} }
#define BRANCH_OPT(n, m) \
{ .name = n, .mode = (m) }
#define BRANCH_END { .name = NULL }
struct branch_mode {
const char *name;
int mode;
};
static const struct branch_mode branch_modes[] = {
BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
BRANCH_END
};
static int
parse_branch_stack(const struct option *opt, const char *str, int unset)
{
#define ONLY_PLM \
(PERF_SAMPLE_BRANCH_USER |\
PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
uint64_t *mode = (uint64_t *)opt->value;
const struct branch_mode *br;
char *s, *os = NULL, *p;
int ret = -1;
if (unset)
return 0;
/*
* cannot set it twice, -b + --branch-filter for instance
*/
if (*mode)
return -1;
/* str may be NULL in case no arg is passed to -b */
if (str) {
/* because str is read-only */
s = os = strdup(str);
if (!s)
return -1;
for (;;) {
p = strchr(s, ',');
if (p)
*p = '\0';
for (br = branch_modes; br->name; br++) {
if (!strcasecmp(s, br->name))
break;
}
if (!br->name) {
ui__warning("unknown branch filter %s,"
" check man page\n", s);
goto error;
}
*mode |= br->mode;
if (!p)
break;
s = p + 1;
}
}
ret = 0;
/* default to any branch */
if ((*mode & ~ONLY_PLM) == 0) {
*mode = PERF_SAMPLE_BRANCH_ANY;
}
error:
free(os);
return ret;
}
static const char * const record_usage[] = { static const char * const record_usage[] = {
"perf record [<options>] [<command>]", "perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]", "perf record [<options>] -- <command> [<options>]",
...@@ -727,6 +814,14 @@ const struct option record_options[] = { ...@@ -727,6 +814,14 @@ const struct option record_options[] = {
"monitor event in cgroup name only", "monitor event in cgroup name only",
parse_cgroups), parse_cgroups),
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
"branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_END() OPT_END()
}; };
......
...@@ -53,6 +53,82 @@ struct perf_report { ...@@ -53,6 +53,82 @@ struct perf_report {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
}; };
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
struct addr_location *al,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct symbol *parent = NULL;
int err = 0;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
if ((sort__has_parent || symbol_conf.use_callchain)
&& sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample->callchain, &parent);
if (err)
return err;
}
bi = machine__resolve_bstack(machine, al->thread,
sample->branch_stack);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_branch_entry(&evsel->hists, al, parent,
&bi[i], 1);
if (he) {
struct annotation *notes;
err = -ENOMEM;
bx = he->branch_info;
if (bx->from.sym && use_browser > 0) {
notes = symbol__annotation(bx->from.sym);
if (!notes->src
&& symbol__alloc_hist(bx->from.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->from.sym,
bx->from.map,
evsel->idx,
bx->from.al_addr);
if (err)
goto out;
}
if (bx->to.sym && use_browser > 0) {
notes = symbol__annotation(bx->to.sym);
if (!notes->src
&& symbol__alloc_hist(bx->to.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->to.sym,
bx->to.map,
evsel->idx,
bx->to.al_addr);
if (err)
goto out;
}
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
err = 0;
} else
return -ENOMEM;
}
out:
return err;
}
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al, struct addr_location *al,
struct perf_sample *sample, struct perf_sample *sample,
...@@ -126,6 +202,13 @@ static int process_sample_event(struct perf_tool *tool, ...@@ -126,6 +202,13 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0; return 0;
if (sort__branch_mode == 1) {
if (perf_report__add_branch_hist_entry(tool, &al, sample,
evsel, machine)) {
pr_debug("problem adding lbr entry, skipping event\n");
return -1;
}
} else {
if (al.map != NULL) if (al.map != NULL)
al.map->dso->hit = 1; al.map->dso->hit = 1;
...@@ -133,7 +216,7 @@ static int process_sample_event(struct perf_tool *tool, ...@@ -133,7 +216,7 @@ static int process_sample_event(struct perf_tool *tool,
pr_debug("problem incrementing symbol period, skipping event\n"); pr_debug("problem incrementing symbol period, skipping event\n");
return -1; return -1;
} }
}
return 0; return 0;
} }
...@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) ...@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
} }
} }
if (sort__branch_mode == 1) {
if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
fprintf(stderr, "selected -b but no branch data."
" Did you call perf record without"
" -b?\n");
return -1;
}
}
return 0; return 0;
} }
...@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) ...@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
{ {
int ret = -EINVAL; int ret = -EINVAL;
u64 nr_samples; u64 nr_samples;
struct perf_session *session; struct perf_session *session = rep->session;
struct perf_evsel *pos; struct perf_evsel *pos;
struct map *kernel_map; struct map *kernel_map;
struct kmap *kernel_kmap; struct kmap *kernel_kmap;
...@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) ...@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
signal(SIGINT, sig_handler); signal(SIGINT, sig_handler);
session = perf_session__new(rep->input_name, O_RDONLY,
rep->force, false, &rep->tool);
if (session == NULL)
return -ENOMEM;
rep->session = session;
if (rep->cpu_list) { if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list, ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap); rep->cpu_bitmap);
...@@ -427,9 +512,19 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) ...@@ -427,9 +512,19 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return 0; return 0;
} }
static int
parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
{
sort__branch_mode = !unset;
return 0;
}
int cmd_report(int argc, const char **argv, const char *prefix __used) int cmd_report(int argc, const char **argv, const char *prefix __used)
{ {
struct perf_session *session;
struct stat st; struct stat st;
bool has_br_stack = false;
int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee"; char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = { const char * const report_usage[] = {
"perf report [<options>]", "perf report [<options>]",
...@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) ...@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
OPT_BOOLEAN(0, "stdio", &report.use_stdio, OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"), "Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]", OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"), "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
" dso_from, symbol_to, symbol_from, mispredict"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"), "Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex", OPT_STRING('p', "parent", &parent_pattern, "regex",
...@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) ...@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
"Specify disassembler style (e.g. -M intel for intel syntax)"), "Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"), "Show a column with the sum of periods"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
"use branch records for histogram filling", parse_branch_mode),
OPT_END() OPT_END()
}; };
...@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) ...@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
else else
report.input_name = "perf.data"; report.input_name = "perf.data";
} }
session = perf_session__new(report.input_name, O_RDONLY,
report.force, false, &report.tool);
if (session == NULL)
return -ENOMEM;
report.session = session;
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (sort__branch_mode == -1 && has_br_stack)
sort__branch_mode = 1;
/* sort__branch_mode could be 0 if --no-branch-stack */
if (sort__branch_mode == 1) {
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "comm,dso_from,symbol_from,"
"dso_to,symbol_to";
}
if (strcmp(report.input_name, "-") != 0) if (strcmp(report.input_name, "-") != 0) {
setup_browser(true); setup_browser(true);
else } else {
use_browser = 0; use_browser = 0;
}
/* /*
* Only in the newt browser we are doing integrated annotation, * Only in the newt browser we are doing integrated annotation,
...@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) ...@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
} }
if (symbol__init() < 0) if (symbol__init() < 0)
return -1; goto error;
setup_sorting(report_usage, options); setup_sorting(report_usage, options);
if (parent_pattern != default_parent_pattern) { if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0) if (sort_dimension__add("parent") < 0)
return -1; goto error;
/* /*
* Only show the parent fields if we explicitly * Only show the parent fields if we explicitly
...@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) ...@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
if (argc) if (argc)
usage_with_options(report_usage, options); usage_with_options(report_usage, options);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
if (sort__branch_mode == 1) {
sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
} else {
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
}
return __cmd_report(&report); ret = __cmd_report(&report);
error:
perf_session__delete(session);
return ret;
} }
...@@ -179,6 +179,23 @@ struct ip_callchain { ...@@ -179,6 +179,23 @@ struct ip_callchain {
u64 ips[0]; u64 ips[0];
}; };
struct branch_flags {
u64 mispred:1;
u64 predicted:1;
u64 reserved:62;
};
struct branch_entry {
u64 from;
u64 to;
struct branch_flags flags;
};
struct branch_stack {
u64 nr;
struct branch_entry entries[0];
};
extern bool perf_host, perf_guest; extern bool perf_host, perf_guest;
extern const char perf_version_string[]; extern const char perf_version_string[];
...@@ -205,6 +222,7 @@ struct perf_record_opts { ...@@ -205,6 +222,7 @@ struct perf_record_opts {
unsigned int freq; unsigned int freq;
unsigned int mmap_pages; unsigned int mmap_pages;
unsigned int user_freq; unsigned int user_freq;
int branch_stack;
u64 default_interval; u64 default_interval;
u64 user_interval; u64 user_interval;
const char *cpu_list; const char *cpu_list;
......
...@@ -81,6 +81,7 @@ struct perf_sample { ...@@ -81,6 +81,7 @@ struct perf_sample {
u32 raw_size; u32 raw_size;
void *raw_data; void *raw_data;
struct ip_callchain *callchain; struct ip_callchain *callchain;
struct branch_stack *branch_stack;
}; };
#define BUILD_ID_SIZE 20 #define BUILD_ID_SIZE 20
......
...@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) ...@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
attr->watermark = 0; attr->watermark = 0;
attr->wakeup_events = 1; attr->wakeup_events = 1;
} }
if (opts->branch_stack) {
attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
attr->branch_sample_type = opts->branch_stack;
}
attr->mmap = track; attr->mmap = track;
attr->comm = track; attr->comm = track;
...@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, ...@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
data->raw_data = (void *) pdata; data->raw_data = (void *) pdata;
} }
if (type & PERF_SAMPLE_BRANCH_STACK) {
u64 sz;
data->branch_stack = (struct branch_stack *)array;
array++; /* nr */
sz = data->branch_stack->nr * sizeof(struct branch_entry);
sz /= sizeof(u64);
array += sz;
}
return 0; return 0;
} }
......
...@@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used, ...@@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used,
return do_write_string(fd, buffer); return do_write_string(fd, buffer);
} }
static int write_branch_stack(int fd __used, struct perf_header *h __used,
struct perf_evlist *evlist __used)
{
return 0;
}
static void print_hostname(struct perf_header *ph, int fd, FILE *fp) static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
{ {
char *str = do_read_string(fd, ph); char *str = do_read_string(fd, ph);
...@@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) ...@@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
uint64_t id; uint64_t id;
void *buf = NULL; void *buf = NULL;
char *str; char *str;
u32 nre, sz, nr, i, j, msz; u32 nre, sz, nr, i, j;
int ret; ssize_t ret;
size_t msz;
/* number of events */ /* number of events */
ret = read(fd, &nre, sizeof(nre)); ret = read(fd, &nre, sizeof(nre));
...@@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) ...@@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
if (ph->needs_swap) if (ph->needs_swap)
sz = bswap_32(sz); sz = bswap_32(sz);
/*
* ensure it is at least to our ABI rev
*/
if (sz < (u32)sizeof(attr))
goto error;
memset(&attr, 0, sizeof(attr)); memset(&attr, 0, sizeof(attr));
/* read entire region to sync up to next field */ /* buffer to hold on file attr struct */
buf = malloc(sz); buf = malloc(sz);
if (!buf) if (!buf)
goto error; goto error;
msz = sizeof(attr); msz = sizeof(attr);
if (sz < msz) if (sz < (ssize_t)msz)
msz = sz; msz = sz;
for (i = 0 ; i < nre; i++) { for (i = 0 ; i < nre; i++) {
/*
* must read entire on-file attr struct to
* sync up with layout.
*/
ret = read(fd, buf, sz); ret = read(fd, buf, sz);
if (ret != (ssize_t)sz) if (ret != (ssize_t)sz)
goto error; goto error;
...@@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) ...@@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
free(str); free(str);
} }
static void print_branch_stack(struct perf_header *ph __used, int fd __used,
FILE *fp)
{
fprintf(fp, "# contains samples with branch stack\n");
}
static int __event_process_build_id(struct build_id_event *bev, static int __event_process_build_id(struct build_id_event *bev,
char *filename, char *filename,
struct perf_session *session) struct perf_session *session)
...@@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { ...@@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_CMDLINE, cmdline), FEAT_OPA(HEADER_CMDLINE, cmdline),
FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
}; };
struct header_print_data { struct header_print_data {
...@@ -1804,35 +1816,101 @@ int perf_header__process_sections(struct perf_header *header, int fd, ...@@ -1804,35 +1816,101 @@ int perf_header__process_sections(struct perf_header *header, int fd,
return err; return err;
} }
static int check_magic_endian(u64 *magic, struct perf_file_header *header, static const int attr_file_abi_sizes[] = {
struct perf_header *ph) [0] = PERF_ATTR_SIZE_VER0,
[1] = PERF_ATTR_SIZE_VER1,
0,
};
/*
* In the legacy file format, the magic number is not used to encode endianness.
* hdr_sz was used to encode endianness. But given that hdr_sz can vary based
* on ABI revisions, we need to try all combinations for all endianness to
* detect the endianness.
*/
static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
{ {
int ret; uint64_t ref_size, attr_size;
int i;
/* check for legacy format */ for (i = 0 ; attr_file_abi_sizes[i]; i++) {
ret = memcmp(magic, __perf_magic1, sizeof(*magic)); ref_size = attr_file_abi_sizes[i]
if (ret == 0) { + sizeof(struct perf_file_section);
pr_debug("legacy perf.data format\n"); if (hdr_sz != ref_size) {
if (!header) attr_size = bswap_64(hdr_sz);
if (attr_size != ref_size)
continue;
ph->needs_swap = true;
}
pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
i,
ph->needs_swap);
return 0;
}
/* could not determine endianness */
return -1; return -1;
}
if (header->attr_size != sizeof(struct perf_file_attr)) { #define PERF_PIPE_HDR_VER0 16
u64 attr_size = bswap_64(header->attr_size);
if (attr_size != sizeof(struct perf_file_attr)) static const size_t attr_pipe_abi_sizes[] = {
return -1; [0] = PERF_PIPE_HDR_VER0,
0,
};
/*
* In the legacy pipe format, there is an implicit assumption that endiannesss
* between host recording the samples, and host parsing the samples is the
* same. This is not always the case given that the pipe output may always be
* redirected into a file and analyzed on a different machine with possibly a
* different endianness and perf_event ABI revsions in the perf tool itself.
*/
static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
{
u64 attr_size;
int i;
for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
if (hdr_sz != attr_pipe_abi_sizes[i]) {
attr_size = bswap_64(hdr_sz);
if (attr_size != hdr_sz)
continue;
ph->needs_swap = true; ph->needs_swap = true;
} }
pr_debug("Pipe ABI%d perf.data file detected\n", i);
return 0; return 0;
} }
return -1;
}
/* check magic number with same endianness */ static int check_magic_endian(u64 magic, uint64_t hdr_sz,
if (*magic == __perf_magic2) bool is_pipe, struct perf_header *ph)
{
int ret;
/* check for legacy format */
ret = memcmp(&magic, __perf_magic1, sizeof(magic));
if (ret == 0) {
pr_debug("legacy perf.data format\n");
if (is_pipe)
return try_all_pipe_abis(hdr_sz, ph);
return try_all_file_abis(hdr_sz, ph);
}
/*
* the new magic number serves two purposes:
* - unique number to identify actual perf.data files
* - encode endianness of file
*/
/* check magic number with one endianness */
if (magic == __perf_magic2)
return 0; return 0;
/* check magic number but opposite endianness */ /* check magic number with opposite endianness */
if (*magic != __perf_magic2_sw) if (magic != __perf_magic2_sw)
return -1; return -1;
ph->needs_swap = true; ph->needs_swap = true;
...@@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header, ...@@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header,
if (ret <= 0) if (ret <= 0)
return -1; return -1;
if (check_magic_endian(&header->magic, header, ph) < 0) if (check_magic_endian(header->magic,
header->attr_size, false, ph) < 0) {
pr_debug("magic/endian check failed\n");
return -1; return -1;
}
if (ph->needs_swap) { if (ph->needs_swap) {
mem_bswap_64(header, offsetof(struct perf_file_header, mem_bswap_64(header, offsetof(struct perf_file_header,
...@@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, ...@@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
if (ret <= 0) if (ret <= 0)
return -1; return -1;
if (check_magic_endian(&header->magic, NULL, ph) < 0) if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
return -1; pr_debug("endian/magic failed\n");
if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
return -1; return -1;
}
if (header->size != sizeof(*header)) { if (ph->needs_swap)
u64 size = bswap_64(header->size); header->size = bswap_64(header->size);
if (size != sizeof(*header)) if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
return -1; return -1;
ph->needs_swap = true;
}
return 0; return 0;
} }
...@@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) ...@@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
return 0; return 0;
} }
static int read_attr(int fd, struct perf_header *ph,
struct perf_file_attr *f_attr)
{
struct perf_event_attr *attr = &f_attr->attr;
size_t sz, left;
size_t our_sz = sizeof(f_attr->attr);
int ret;
memset(f_attr, 0, sizeof(*f_attr));
/* read minimal guaranteed structure */
ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
if (ret <= 0) {
pr_debug("cannot read %d bytes of header attr\n",
PERF_ATTR_SIZE_VER0);
return -1;
}
/* on file perf_event_attr size */
sz = attr->size;
if (ph->needs_swap)
sz = bswap_32(sz);
if (sz == 0) {
/* assume ABI0 */
sz = PERF_ATTR_SIZE_VER0;
} else if (sz > our_sz) {
pr_debug("file uses a more recent and unsupported ABI"
" (%zu bytes extra)\n", sz - our_sz);
return -1;
}
/* what we have not yet read and that we know about */
left = sz - PERF_ATTR_SIZE_VER0;
if (left) {
void *ptr = attr;
ptr += PERF_ATTR_SIZE_VER0;
ret = readn(fd, ptr, left);
}
/* read perf_file_section, ids are read in caller */
ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
return ret <= 0 ? -1 : 0;
}
int perf_session__read_header(struct perf_session *session, int fd) int perf_session__read_header(struct perf_session *session, int fd)
{ {
struct perf_header *header = &session->header; struct perf_header *header = &session->header;
...@@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) ...@@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
if (session->fd_pipe) if (session->fd_pipe)
return perf_header__read_pipe(session, fd); return perf_header__read_pipe(session, fd);
if (perf_file_header__read(&f_header, header, fd) < 0) { if (perf_file_header__read(&f_header, header, fd) < 0)
pr_debug("incompatible file format\n");
return -EINVAL; return -EINVAL;
}
nr_attrs = f_header.attrs.size / sizeof(f_attr); nr_attrs = f_header.attrs.size / f_header.attr_size;
lseek(fd, f_header.attrs.offset, SEEK_SET); lseek(fd, f_header.attrs.offset, SEEK_SET);
for (i = 0; i < nr_attrs; i++) { for (i = 0; i < nr_attrs; i++) {
struct perf_evsel *evsel; struct perf_evsel *evsel;
off_t tmp; off_t tmp;
if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) if (read_attr(fd, header, &f_attr) < 0)
goto out_errno; goto out_errno;
if (header->needs_swap) if (header->needs_swap)
......
...@@ -27,7 +27,7 @@ enum { ...@@ -27,7 +27,7 @@ enum {
HEADER_EVENT_DESC, HEADER_EVENT_DESC,
HEADER_CPU_TOPOLOGY, HEADER_CPU_TOPOLOGY,
HEADER_NUMA_TOPOLOGY, HEADER_NUMA_TOPOLOGY,
HEADER_BRANCH_STACK,
HEADER_LAST_FEATURE, HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256, HEADER_FEAT_BITS = 256,
}; };
......
...@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) ...@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
hists__set_col_len(hists, col, 0); hists__set_col_len(hists, col, 0);
} }
static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
{ {
u16 len;
if (h->ms.sym)
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
else {
const unsigned int unresolved_col_width = BITS_PER_LONG / 4; const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && if (hists__col_len(hists, dso) < unresolved_col_width &&
!symbol_conf.col_width_list_str && !symbol_conf.field_sep && !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
!symbol_conf.dso_list) !symbol_conf.dso_list)
hists__set_col_len(hists, HISTC_DSO, hists__set_col_len(hists, dso, unresolved_col_width);
unresolved_col_width); }
}
static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
{
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
u16 len;
if (h->ms.sym)
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
else
hists__set_unres_dso_col_len(hists, HISTC_DSO);
len = thread__comm_len(h->thread); len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len)) if (hists__new_col_len(hists, HISTC_COMM, len))
...@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
len = dso__name_len(h->ms.map->dso); len = dso__name_len(h->ms.map->dso);
hists__new_col_len(hists, HISTC_DSO, len); hists__new_col_len(hists, HISTC_DSO, len);
} }
if (h->branch_info) {
int symlen;
/*
* +4 accounts for '[x] ' priv level info
* +2 account of 0x prefix on raw addresses
*/
if (h->branch_info->from.sym) {
symlen = (int)h->branch_info->from.sym->namelen + 4;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
symlen = dso__name_len(h->branch_info->from.map->dso);
hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
}
if (h->branch_info->to.sym) {
symlen = (int)h->branch_info->to.sym->namelen + 4;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
symlen = dso__name_len(h->branch_info->to.map->dso);
hists__new_col_len(hists, HISTC_DSO_TO, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
}
}
} }
static void hist_entry__add_cpumode_period(struct hist_entry *he, static void hist_entry__add_cpumode_period(struct hist_entry *he,
...@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) ...@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
return 0; return 0;
} }
struct hist_entry *__hists__add_entry(struct hists *hists, static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry,
struct addr_location *al, struct addr_location *al,
struct symbol *sym_parent, u64 period) u64 period)
{ {
struct rb_node **p; struct rb_node **p;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct hist_entry *he; struct hist_entry *he;
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = al->map,
.sym = al->sym,
},
.cpu = al->cpu,
.ip = al->addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
};
int cmp; int cmp;
pthread_mutex_lock(&hists->lock); pthread_mutex_lock(&hists->lock);
...@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, ...@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
parent = *p; parent = *p;
he = rb_entry(parent, struct hist_entry, rb_node_in); he = rb_entry(parent, struct hist_entry, rb_node_in);
cmp = hist_entry__cmp(&entry, he); cmp = hist_entry__cmp(entry, he);
if (!cmp) { if (!cmp) {
he->period += period; he->period += period;
...@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, ...@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
p = &(*p)->rb_right; p = &(*p)->rb_right;
} }
he = hist_entry__new(&entry); he = hist_entry__new(entry);
if (!he) if (!he)
goto out_unlock; goto out_unlock;
...@@ -252,6 +275,51 @@ struct hist_entry *__hists__add_entry(struct hists *hists, ...@@ -252,6 +275,51 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
return he; return he;
} }
struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct branch_info *bi,
u64 period)
{
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = bi->to.map,
.sym = bi->to.sym,
},
.cpu = al->cpu,
.ip = bi->to.addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
.branch_info = bi,
};
return add_hist_entry(self, &entry, al, period);
}
struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent, u64 period)
{
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = al->map,
.sym = al->sym,
},
.cpu = al->cpu,
.ip = al->addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
};
return add_hist_entry(self, &entry, al, period);
}
int64_t int64_t
hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
{ {
......
...@@ -42,6 +42,11 @@ enum hist_column { ...@@ -42,6 +42,11 @@ enum hist_column {
HISTC_COMM, HISTC_COMM,
HISTC_PARENT, HISTC_PARENT,
HISTC_CPU, HISTC_CPU,
HISTC_MISPREDICT,
HISTC_SYMBOL_FROM,
HISTC_SYMBOL_TO,
HISTC_DSO_FROM,
HISTC_DSO_TO,
HISTC_NR_COLS, /* Last entry */ HISTC_NR_COLS, /* Last entry */
}; };
...@@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, ...@@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
struct hists *hists); struct hists *hists);
void hist_entry__free(struct hist_entry *); void hist_entry__free(struct hist_entry *);
struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct branch_info *bi,
u64 period);
void hists__output_resort(struct hists *self); void hists__output_resort(struct hists *self);
void hists__output_resort_threaded(struct hists *hists); void hists__output_resort_threaded(struct hists *hists);
void hists__collapse_resort(struct hists *self); void hists__collapse_resort(struct hists *self);
......
...@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) ...@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
self->fd = STDIN_FILENO; self->fd = STDIN_FILENO;
if (perf_session__read_header(self, self->fd) < 0) if (perf_session__read_header(self, self->fd) < 0)
pr_err("incompatible file format"); pr_err("incompatible file format (rerun with -v to learn more)");
return 0; return 0;
} }
...@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) ...@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
} }
if (perf_session__read_header(self, self->fd) < 0) { if (perf_session__read_header(self, self->fd) < 0) {
pr_err("incompatible file format"); pr_err("incompatible file format (rerun with -v to learn more)");
goto out_close; goto out_close;
} }
...@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) ...@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0; return 0;
} }
static const u8 cpumodes[] = {
PERF_RECORD_MISC_USER,
PERF_RECORD_MISC_KERNEL,
PERF_RECORD_MISC_GUEST_USER,
PERF_RECORD_MISC_GUEST_KERNEL
};
#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
static void ip__resolve_ams(struct machine *self, struct thread *thread,
struct addr_map_symbol *ams,
u64 ip)
{
struct addr_location al;
size_t i;
u8 m;
memset(&al, 0, sizeof(al));
for (i = 0; i < NCPUMODES; i++) {
m = cpumodes[i];
/*
* We cannot use the header.misc hint to determine whether a
* branch stack address is user, kernel, guest, hypervisor.
* Branches may straddle the kernel/user/hypervisor boundaries.
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
thread__find_addr_location(thread, self, m, MAP__FUNCTION,
ip, &al, NULL);
if (al.sym)
goto found;
}
found:
ams->addr = ip;
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
}
struct branch_info *machine__resolve_bstack(struct machine *self,
struct thread *thr,
struct branch_stack *bs)
{
struct branch_info *bi;
unsigned int i;
bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
bi[i].flags = bs->entries[i].flags;
}
return bi;
}
int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
struct thread *thread, struct thread *thread,
struct ip_callchain *chain, struct ip_callchain *chain,
...@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) ...@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample)
i, sample->callchain->ips[i]); i, sample->callchain->ips[i]);
} }
static void branch_stack__printf(struct perf_sample *sample)
{
uint64_t i;
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
i, sample->branch_stack->entries[i].from,
sample->branch_stack->entries[i].to);
}
static void perf_session__print_tstamp(struct perf_session *session, static void perf_session__print_tstamp(struct perf_session *session,
union perf_event *event, union perf_event *event,
struct perf_sample *sample) struct perf_sample *sample)
...@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, ...@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
if (session->sample_type & PERF_SAMPLE_CALLCHAIN) if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
callchain__printf(sample); callchain__printf(sample);
if (session->sample_type & PERF_SAMPLE_BRANCH_STACK)
branch_stack__printf(sample);
} }
static struct machine * static struct machine *
......
...@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel ...@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel
struct ip_callchain *chain, struct ip_callchain *chain,
struct symbol **parent); struct symbol **parent);
struct branch_info *machine__resolve_bstack(struct machine *self,
struct thread *thread,
struct branch_stack *bs);
bool perf_session__has_traces(struct perf_session *self, const char *msg); bool perf_session__has_traces(struct perf_session *self, const char *msg);
void mem_bswap_64(void *src, int byte_size); void mem_bswap_64(void *src, int byte_size);
......
This diff is collapsed.
...@@ -31,11 +31,16 @@ extern const char *parent_pattern; ...@@ -31,11 +31,16 @@ extern const char *parent_pattern;
extern const char default_sort_order[]; extern const char default_sort_order[];
extern int sort__need_collapse; extern int sort__need_collapse;
extern int sort__has_parent; extern int sort__has_parent;
extern int sort__branch_mode;
extern char *field_sep; extern char *field_sep;
extern struct sort_entry sort_comm; extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso; extern struct sort_entry sort_dso;
extern struct sort_entry sort_sym; extern struct sort_entry sort_sym;
extern struct sort_entry sort_parent; extern struct sort_entry sort_parent;
extern struct sort_entry sort_dso_from;
extern struct sort_entry sort_dso_to;
extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern enum sort_type sort__first_dimension; extern enum sort_type sort__first_dimension;
/** /**
...@@ -72,6 +77,7 @@ struct hist_entry { ...@@ -72,6 +77,7 @@ struct hist_entry {
struct hist_entry *pair; struct hist_entry *pair;
struct rb_root sorted_chain; struct rb_root sorted_chain;
}; };
struct branch_info *branch_info;
struct callchain_root callchain[0]; struct callchain_root callchain[0];
}; };
...@@ -82,6 +88,11 @@ enum sort_type { ...@@ -82,6 +88,11 @@ enum sort_type {
SORT_SYM, SORT_SYM,
SORT_PARENT, SORT_PARENT,
SORT_CPU, SORT_CPU,
SORT_DSO_FROM,
SORT_DSO_TO,
SORT_SYM_FROM,
SORT_SYM_TO,
SORT_MISPREDICT,
}; };
/* /*
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include "map.h" #include "map.h"
#include "../perf.h"
#include <linux/list.h> #include <linux/list.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <stdio.h> #include <stdio.h>
...@@ -96,7 +97,11 @@ struct symbol_conf { ...@@ -96,7 +97,11 @@ struct symbol_conf {
*col_width_list_str; *col_width_list_str;
struct strlist *dso_list, struct strlist *dso_list,
*comm_list, *comm_list,
*sym_list; *sym_list,
*dso_from_list,
*dso_to_list,
*sym_from_list,
*sym_to_list;
const char *symfs; const char *symfs;
}; };
...@@ -120,6 +125,19 @@ struct map_symbol { ...@@ -120,6 +125,19 @@ struct map_symbol {
bool has_children; bool has_children;
}; };
struct addr_map_symbol {
struct map *map;
struct symbol *sym;
u64 addr;
u64 al_addr;
};
struct branch_info {
struct addr_map_symbol from;
struct addr_map_symbol to;
struct branch_flags flags;
};
struct addr_location { struct addr_location {
struct thread *thread; struct thread *thread;
struct map *map; struct map *map;
......
...@@ -805,7 +805,10 @@ static struct hist_browser *hist_browser__new(struct hists *hists) ...@@ -805,7 +805,10 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
self->hists = hists; self->hists = hists;
self->b.refresh = hist_browser__refresh; self->b.refresh = hist_browser__refresh;
self->b.seek = ui_browser__hists_seek; self->b.seek = ui_browser__hists_seek;
self->b.use_navkeypressed = true, self->b.use_navkeypressed = true;
if (sort__branch_mode == 1)
self->has_symbols = sort_sym_from.list.next != NULL;
else
self->has_symbols = sort_sym.list.next != NULL; self->has_symbols = sort_sym.list.next != NULL;
} }
...@@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, ...@@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
return printed; return printed;
} }
static inline void free_popup_options(char **options, int n)
{
int i;
for (i = 0; i < n; ++i) {
free(options[i]);
options[i] = NULL;
}
}
static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
const char *helpline, const char *ev_name, const char *helpline, const char *ev_name,
bool left_exits, bool left_exits,
...@@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
{ {
struct hists *self = &evsel->hists; struct hists *self = &evsel->hists;
struct hist_browser *browser = hist_browser__new(self); struct hist_browser *browser = hist_browser__new(self);
struct branch_info *bi;
struct pstack *fstack; struct pstack *fstack;
char *options[16];
int nr_options = 0;
int key = -1; int key = -1;
if (browser == NULL) if (browser == NULL)
...@@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
ui_helpline__push(helpline); ui_helpline__push(helpline);
memset(options, 0, sizeof(options));
while (1) { while (1) {
const struct thread *thread = NULL; const struct thread *thread = NULL;
const struct dso *dso = NULL; const struct dso *dso = NULL;
char *options[16]; int choice = 0,
int nr_options = 0, choice = 0, i,
annotate = -2, zoom_dso = -2, zoom_thread = -2, annotate = -2, zoom_dso = -2, zoom_thread = -2,
browse_map = -2; annotate_f = -2, annotate_t = -2, browse_map = -2;
nr_options = 0;
key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
...@@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
thread = hist_browser__selected_thread(browser); thread = hist_browser__selected_thread(browser);
dso = browser->selection->map ? browser->selection->map->dso : NULL; dso = browser->selection->map ? browser->selection->map->dso : NULL;
} }
switch (key) { switch (key) {
case K_TAB: case K_TAB:
case K_UNTAB: case K_UNTAB:
...@@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (!browser->has_symbols) { if (!browser->has_symbols) {
ui_browser__warning(&browser->b, delay_secs * 2, ui_browser__warning(&browser->b, delay_secs * 2,
"Annotation is only available for symbolic views, " "Annotation is only available for symbolic views, "
"include \"sym\" in --sort to use it."); "include \"sym*\" in --sort to use it.");
continue; continue;
} }
...@@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (!browser->has_symbols) if (!browser->has_symbols)
goto add_exit_option; goto add_exit_option;
if (sort__branch_mode == 1) {
bi = browser->he_selection->branch_info;
if (browser->selection != NULL &&
bi &&
bi->from.sym != NULL &&
!bi->from.map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s",
bi->from.sym->name) > 0)
annotate_f = nr_options++;
if (browser->selection != NULL &&
bi &&
bi->to.sym != NULL &&
!bi->to.map->dso->annotate_warned &&
(bi->to.sym != bi->from.sym ||
bi->to.map->dso != bi->from.map->dso) &&
asprintf(&options[nr_options], "Annotate %s",
bi->to.sym->name) > 0)
annotate_t = nr_options++;
} else {
if (browser->selection != NULL && if (browser->selection != NULL &&
browser->selection->sym != NULL && browser->selection->sym != NULL &&
!browser->selection->map->dso->annotate_warned && !browser->selection->map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s", asprintf(&options[nr_options], "Annotate %s",
browser->selection->sym->name) > 0) browser->selection->sym->name) > 0)
annotate = nr_options++; annotate = nr_options++;
}
if (thread != NULL && if (thread != NULL &&
asprintf(&options[nr_options], "Zoom %s %s(%d) thread", asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
...@@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
browse_map = nr_options++; browse_map = nr_options++;
add_exit_option: add_exit_option:
options[nr_options++] = (char *)"Exit"; options[nr_options++] = (char *)"Exit";
retry_popup_menu:
choice = ui__popup_menu(nr_options, options); choice = ui__popup_menu(nr_options, options);
for (i = 0; i < nr_options - 1; ++i)
free(options[i]);
if (choice == nr_options - 1) if (choice == nr_options - 1)
break; break;
if (choice == -1) if (choice == -1) {
free_popup_options(options, nr_options - 1);
continue; continue;
}
if (choice == annotate) { if (choice == annotate || choice == annotate_t || choice == annotate_f) {
struct hist_entry *he; struct hist_entry *he;
int err; int err;
do_annotate: do_annotate:
he = hist_browser__selected_entry(browser); he = hist_browser__selected_entry(browser);
if (he == NULL) if (he == NULL)
continue; continue;
/*
* we stash the branch_info symbol + map into the
* the ms so we don't have to rewrite all the annotation
* code to use branch_info.
* in branch mode, the ms struct is not used
*/
if (choice == annotate_f) {
he->ms.sym = he->branch_info->from.sym;
he->ms.map = he->branch_info->from.map;
} else if (choice == annotate_t) {
he->ms.sym = he->branch_info->to.sym;
he->ms.map = he->branch_info->to.map;
}
/* /*
* Don't let this be freed, say, by hists__decay_entry. * Don't let this be freed, say, by hists__decay_entry.
*/ */
...@@ -1024,9 +1078,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -1024,9 +1078,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
err = hist_entry__tui_annotate(he, evsel->idx, err = hist_entry__tui_annotate(he, evsel->idx,
timer, arg, delay_secs); timer, arg, delay_secs);
he->used = false; he->used = false;
/*
* offer option to annotate the other branch source or target
* (if they exists) when returning from annotate
*/
if ((err == 'q' || err == CTRL('c'))
&& annotate_t != -2 && annotate_f != -2)
goto retry_popup_menu;
ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
if (err) if (err)
ui_browser__handle_resize(&browser->b); ui_browser__handle_resize(&browser->b);
} else if (choice == browse_map) } else if (choice == browse_map)
map__browse(browser->selection->map); map__browse(browser->selection->map);
else if (choice == zoom_dso) { else if (choice == zoom_dso) {
...@@ -1072,6 +1135,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ...@@ -1072,6 +1135,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
pstack__delete(fstack); pstack__delete(fstack);
out: out:
hist_browser__delete(browser); hist_browser__delete(browser);
free_popup_options(options, nr_options - 1);
return key; return key;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment