Commit bea95c15 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'perf/hw-branch-sampling' into perf/core

Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents f9b4eeb8 24bff2dc
......@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
{
int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
......
......@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (armpmu->map_event(event) == -ENOENT)
return -ENOENT;
......
......@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
{
int err = 0;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
......
......@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
......
......@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
{
int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
......
......@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)
......
......@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
......
......@@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}
/*
* check that branch_sample_type is compatible with
* settings needed for precise_ip > 1 which implies
* using the LBR to capture ALL taken branches at the
* priv levels of the measurement
*/
static inline int precise_br_compat(struct perf_event *event)
{
u64 m = event->attr.branch_sample_type;
u64 b = 0;
/* must capture all branches */
if (!(m & PERF_SAMPLE_BRANCH_ANY))
return 0;
m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_user)
b |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
b |= PERF_SAMPLE_BRANCH_KERNEL;
/*
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
*/
return m == b;
}
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
......@@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
/*
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
if (event->attr.precise_ip > 1) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
if (!precise_br_compat(event))
return -EOPNOTSUPP;
/* branch_sample_type is compatible */
} else {
/*
* user did not specify branch_sample_type
*
* For PEBS fixups, we capture all
* the branches at the priv level of the
* event.
*/
*br_type = PERF_SAMPLE_BRANCH_ANY;
if (!event->attr.exclude_user)
*br_type |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
}
}
}
/*
......@@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
}
......@@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
static void x86_pmu_flush_branch_stack(void)
{
if (x86_pmu.flush_branch_stack)
x86_pmu.flush_branch_stack();
}
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
};
void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
......
......@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
......@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;
/*
* Intel host/guest exclude bits
......@@ -344,6 +347,7 @@ struct x86_pmu {
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
void (*flush_branch_stack)(void);
/*
* Intel Arch Perfmon v2+
......@@ -365,6 +369,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
......@@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
......@@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
void intel_pmu_lbr_init_snb(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
int p4_pmu_init(void);
int p6_pmu_init(void);
......
......@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
......
......@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
if (has_branch_stack(event))
return true;
/* implicit branch sampling to correct PEBS skid */
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
return true;
return false;
}
static void intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
/*
* must disable before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_disable(event);
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
......@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config);
return;
}
/*
* must enabled before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
......@@ -1058,6 +1084,9 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
data.period = event->hw.last_period;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
......@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
struct perf_event *event,
struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
return &unconstrained;
return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
......@@ -1157,14 +1186,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
reg->alloc = 1;
/*
* All events using extra_reg are unconstrained.
* Avoids calling x86_get_event_constraints()
*
* Must revisit if extra_reg controlling events
* ever have constraints. Worst case we go through
* the regular event constraint table.
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = &unconstrained;
c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
......@@ -1201,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct event_constraint *c = NULL;
if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
c = __intel_shared_reg_get_constraints(cpuc, event);
struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;
xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c;
}
......@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
......@@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->hw.config = alt_config;
}
if (intel_pmu_needs_lbr_smpl(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
}
if (event->attr.type != PERF_TYPE_RAW)
return 0;
......@@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
if (!x86_pmu.extra_regs)
if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
......@@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
cpuc->lbr_sel = NULL;
if (!cpuc->shared_regs)
return;
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
}
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
......@@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
static void intel_pmu_flush_branch_stack(void)
{
/*
* Intel LBR does not tag entries with the
* PID of the current task, then we need to
* flush it on ctxsw
* For now, we simply reset it
*/
if (x86_pmu.lbr_nr)
intel_pmu_lbr_reset();
}
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
......@@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
.flush_branch_stack = intel_pmu_flush_branch_stack,
};
static __init void intel_clovertown_quirk(void)
......@@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
intel_pmu_lbr_init_nhm();
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
......
......@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "perf_event.h"
......@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_enable(event);
}
void intel_pmu_pebs_disable(struct perf_event *event)
......@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_disable(event);
}
void intel_pmu_pebs_enable_all(void)
......@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
#include <asm/insn.h>
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this
* routine.
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
......@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0);
}
......
......@@ -3,6 +3,7 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
#include <asm/insn.h>
#include "perf_event.h"
......@@ -13,6 +14,100 @@ enum {
LBR_FORMAT_EIP_FLAGS = 0x03,
};
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
*
* Hardware branch filter (not available on all CPUs)
*/
#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
#define LBR_JCC_BIT 2 /* do not capture conditional branches */
#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
#define LBR_RETURN_BIT 5 /* do not capture near returns */
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
#define LBR_FAR_BIT 8 /* do not capture far branches */
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
#define LBR_USER (1 << LBR_USER_BIT)
#define LBR_JCC (1 << LBR_JCC_BIT)
#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
#define LBR_RETURN (1 << LBR_RETURN_BIT)
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
#define LBR_FAR (1 << LBR_FAR_BIT)
#define LBR_PLM (LBR_KERNEL | LBR_USER)
#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
#define LBR_IGN 0 /* ignored */
#define LBR_ANY \
(LBR_JCC |\
LBR_REL_CALL |\
LBR_IND_CALL |\
LBR_RETURN |\
LBR_REL_JMP |\
LBR_IND_JMP |\
LBR_FAR)
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
#define for_each_branch_sample_type(x) \
for ((x) = PERF_SAMPLE_BRANCH_USER; \
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
/*
* x86control flow change classification
* x86control flow changes include branches, interrupts, traps, faults
*/
enum {
X86_BR_NONE = 0, /* unknown */
X86_BR_USER = 1 << 0, /* branch target is user */
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
X86_BR_CALL = 1 << 2, /* call */
X86_BR_RET = 1 << 3, /* return */
X86_BR_SYSCALL = 1 << 4, /* syscall */
X86_BR_SYSRET = 1 << 5, /* syscall return */
X86_BR_INT = 1 << 6, /* sw interrupt */
X86_BR_IRET = 1 << 7, /* return from interrupt */
X86_BR_JCC = 1 << 8, /* conditional */
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
#define X86_BR_ANY \
(X86_BR_CALL |\
X86_BR_RET |\
X86_BR_SYSCALL |\
X86_BR_SYSRET |\
X86_BR_INT |\
X86_BR_IRET |\
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
X86_BR_IND_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
#define X86_BR_ANY_CALL \
(X86_BR_CALL |\
X86_BR_IND_CALL |\
X86_BR_SYSCALL |\
X86_BR_IRQ |\
X86_BR_INT)
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
......@@ -21,6 +116,10 @@ enum {
static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
......@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
* Reset the LBR stack if we changed task context to
* avoid data leaks.
*/
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
intel_pmu_lbr_reset();
cpuc->lbr_context = event->ctx;
}
cpuc->br_sel = event->hw.branch_reg.reg;
cpuc->lbr_users++;
}
......@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
if (cpuc->enabled && !cpuc->lbr_users)
if (cpuc->enabled && !cpuc->lbr_users) {
__intel_pmu_lbr_disable();
/* avoid stale pointer */
cpuc->lbr_context = NULL;
}
}
void intel_pmu_lbr_enable_all(void)
......@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
__intel_pmu_lbr_disable();
}
/*
* TOS = most recently recorded branch
*/
static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;
......@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
cpuc->lbr_entries[i].from = msr_lastbranch.from;
cpuc->lbr_entries[i].to = msr_lastbranch.to;
cpuc->lbr_entries[i].flags = 0;
cpuc->lbr_entries[i].from = msr_lastbranch.from;
cpuc->lbr_entries[i].to = msr_lastbranch.to;
cpuc->lbr_entries[i].mispred = 0;
cpuc->lbr_entries[i].predicted = 0;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
/*
* Due to lack of segmentation in Linux the effective address (offset)
* is the same as the linear address, allowing us to merge the LIP and EIP
......@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, flags = 0;
u64 from, to, mis = 0, pred = 0;
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
flags = !!(from & LBR_FROM_FLAG_MISPRED);
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
from = (u64)((((s64)from) << 1) >> 1);
}
cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].flags = flags;
cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].mispred = mis;
cpuc->lbr_entries[i].predicted = pred;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
......@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
intel_pmu_lbr_read_32(cpuc);
else
intel_pmu_lbr_read_64(cpuc);
intel_pmu_lbr_filter(cpuc);
}
/*
* SW filter is used:
* - in case there is no HW filter
* - in case the HW filter has errata or limitations
*/
static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
{
u64 br_type = event->attr.branch_sample_type;
int mask = 0;
if (br_type & PERF_SAMPLE_BRANCH_USER)
mask |= X86_BR_USER;
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
mask |= X86_BR_KERNEL;
/* we ignore BRANCH_HV here */
if (br_type & PERF_SAMPLE_BRANCH_ANY)
mask |= X86_BR_ANY;
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
mask |= X86_BR_ANY_CALL;
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
mask |= X86_BR_IND_CALL;
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
*/
event->hw.branch_reg.reg = mask;
}
/*
* setup the HW LBR filter
* Used only when available, may not be enough to disambiguate
* all branches, may need the help of the SW filter
*/
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
{
struct hw_perf_event_extra *reg;
u64 br_type = event->attr.branch_sample_type;
u64 mask = 0, m;
u64 v;
for_each_branch_sample_type(m) {
if (!(br_type & m))
continue;
v = x86_pmu.lbr_sel_map[m];
if (v == LBR_NOT_SUPP)
return -EOPNOTSUPP;
if (v != LBR_IGN)
mask |= v;
}
reg = &event->hw.branch_reg;
reg->idx = EXTRA_REG_LBR;
/* LBR_SELECT operates in suppress mode so invert mask */
reg->config = ~mask & x86_pmu.lbr_sel_mask;
return 0;
}
int intel_pmu_setup_lbr_filter(struct perf_event *event)
{
int ret = 0;
/*
* no LBR on this PMU
*/
if (!x86_pmu.lbr_nr)
return -EOPNOTSUPP;
/*
* setup SW LBR filter
*/
intel_pmu_setup_sw_lbr_filter(event);
/*
* setup HW LBR filter, if any
*/
if (x86_pmu.lbr_sel_map)
ret = intel_pmu_setup_hw_lbr_filter(event);
return ret;
}
/*
* return the type of control flow change at address "from"
* intruction is not necessarily a branch (in case of interrupt).
*
* The branch type returned also includes the priv level of the
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
*
* If a branch type is unknown OR the instruction cannot be
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*/
static int branch_type(unsigned long from, unsigned long to)
{
struct insn insn;
void *addr;
int bytes, size = MAX_INSN_SIZE;
int ret = X86_BR_NONE;
int ext, to_plm, from_plm;
u8 buf[MAX_INSN_SIZE];
int is64 = 0;
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
/*
* maybe zero if lbr did not fill up after a reset by the time
* we get a PMU interrupt
*/
if (from == 0 || to == 0)
return X86_BR_NONE;
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
* and we interrupt in a kernel thread, e.g., idle.
*/
if (!current->mm)
return X86_BR_NONE;
/* may fail if text not present */
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
if (bytes != size)
return X86_BR_NONE;
addr = buf;
} else
addr = (void *)from;
/*
* decoder needs to know the ABI especially
* on 64-bit systems running 32-bit apps
*/
#ifdef CONFIG_X86_64
is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
#endif
insn_init(&insn, addr, is64);
insn_get_opcode(&insn);
switch (insn.opcode.bytes[0]) {
case 0xf:
switch (insn.opcode.bytes[1]) {
case 0x05: /* syscall */
case 0x34: /* sysenter */
ret = X86_BR_SYSCALL;
break;
case 0x07: /* sysret */
case 0x35: /* sysexit */
ret = X86_BR_SYSRET;
break;
case 0x80 ... 0x8f: /* conditional */
ret = X86_BR_JCC;
break;
default:
ret = X86_BR_NONE;
}
break;
case 0x70 ... 0x7f: /* conditional */
ret = X86_BR_JCC;
break;
case 0xc2: /* near ret */
case 0xc3: /* near ret */
case 0xca: /* far ret */
case 0xcb: /* far ret */
ret = X86_BR_RET;
break;
case 0xcf: /* iret */
ret = X86_BR_IRET;
break;
case 0xcc ... 0xce: /* int */
ret = X86_BR_INT;
break;
case 0xe8: /* call near rel */
case 0x9a: /* call far absolute */
ret = X86_BR_CALL;
break;
case 0xe0 ... 0xe3: /* loop jmp */
ret = X86_BR_JCC;
break;
case 0xe9 ... 0xeb: /* jmp */
ret = X86_BR_JMP;
break;
case 0xff: /* call near absolute, call far absolute ind */
insn_get_modrm(&insn);
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
switch (ext) {
case 2: /* near ind call */
case 3: /* far ind call */
ret = X86_BR_IND_CALL;
break;
case 4:
case 5:
ret = X86_BR_JMP;
break;
}
break;
default:
ret = X86_BR_NONE;
}
/*
* interrupts, traps, faults (and thus ring transition) may
* occur on any instructions. Thus, to classify them correctly,
* we need to first look at the from and to priv levels. If they
* are different and to is in the kernel, then it indicates
* a ring transition. If the from instruction is not a ring
* transition instr (syscall, systenter, int), then it means
* it was a irq, trap or fault.
*
* we have no way of detecting kernel to kernel faults.
*/
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
ret = X86_BR_IRQ;
/*
* branch priv level determined by target as
* is done by HW when LBR_SELECT is implemented
*/
if (ret != X86_BR_NONE)
ret |= to_plm;
return ret;
}
/*
* implement actual branch filter based on user demand.
* Hardware may not exactly satisfy that request, thus
* we need to inspect opcodes. Mismatched branches are
* discarded. Therefore, the number of branches returned
* in PERF_SAMPLE_BRANCH_STACK sample may vary.
*/
static void
intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
{
u64 from, to;
int br_sel = cpuc->br_sel;
int i, j, type;
bool compress = false;
/* if sampling all branches, then nothing to filter */
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
return;
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
from = cpuc->lbr_entries[i].from;
to = cpuc->lbr_entries[i].to;
type = branch_type(from, to);
/* if type does not correspond, then discard */
if (type == X86_BR_NONE || (br_sel & type) != type) {
cpuc->lbr_entries[i].from = 0;
compress = true;
}
}
if (!compress)
return;
/* remove all entries with from=0 */
for (i = 0; i < cpuc->lbr_stack.nr; ) {
if (!cpuc->lbr_entries[i].from) {
j = i;
while (++j < cpuc->lbr_stack.nr)
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
cpuc->lbr_stack.nr--;
if (!cpuc->lbr_entries[i].from)
continue;
}
i++;
}
}
/*
* Map interface branch filters onto LBR filters
*/
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
| LBR_IND_JMP | LBR_FAR,
/*
* NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
*/
[PERF_SAMPLE_BRANCH_ANY_CALL] =
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
/*
* NHM/WSM erratum: must include IND_JMP to capture IND_CALL
*/
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
};
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
[PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
| LBR_FAR,
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
};
/* core */
void intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_nr = 4;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x40;
x86_pmu.lbr_to = 0x60;
x86_pmu.lbr_tos = MSR_LBR_TOS;
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
/*
* SW branch filter usage:
* - compensate for lack of HW filter
*/
pr_cont("4-deep LBR, ");
}
/* nehalem/westmere */
void intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_nr = 16;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x680;
x86_pmu.lbr_to = 0x6c0;
x86_pmu.lbr_tos = MSR_LBR_TOS;
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
/*
* SW branch filter usage:
* - workaround LBR_SEL errata (see above)
* - support syscall, sysret capture.
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
pr_cont("16-deep LBR, ");
}
/* sandy bridge */
void intel_pmu_lbr_init_snb(void)
{
x86_pmu.lbr_nr = 16;
x86_pmu.lbr_tos = MSR_LBR_TOS;
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
/*
* SW branch filter usage:
* - support syscall, sysret capture.
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
pr_cont("16-deep LBR, ");
}
/* atom */
void intel_pmu_lbr_init_atom(void)
{
/*
* only models starting at stepping 10 seems
* to have an operational LBR which can freeze
* on PMU interrupt
*/
if (boot_cpu_data.x86_mask < 10) {
pr_cont("LBR disabled due to erratum");
return;
}
x86_pmu.lbr_nr = 8;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x40;
x86_pmu.lbr_to = 0x60;
x86_pmu.lbr_tos = MSR_LBR_TOS;
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
/*
* SW branch filter usage:
* - compensate for lack of HW filter
*/
pr_cont("8-deep LBR, ");
}
......@@ -129,10 +129,39 @@ enum perf_event_sample_format {
PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9,
PERF_SAMPLE_RAW = 1U << 10,
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
};
/*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
* not have to match. Branch priv level is checked for permissions.
*
* The branch types can be combined, however BRANCH_ANY covers all types
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
PERF_SAMPLE_BRANCH_HV)
/*
* The format of the data returned by read() on a perf event fd,
* as specified by attr.read_format:
......@@ -163,6 +192,8 @@ enum perf_event_read_format {
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
/*
* Hardware event_id to monitor via a performance monitoring event:
......@@ -240,6 +271,7 @@ struct perf_event_attr {
__u64 bp_len;
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum branch_sample_type */
};
/*
......@@ -458,6 +490,8 @@ enum perf_event_type {
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
* };
*/
PERF_RECORD_SAMPLE = 9,
......@@ -530,12 +564,34 @@ struct perf_raw_record {
void *data;
};
/*
* single taken branch record layout:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
* mispred: branch target was mispredicted
* predicted: branch target was predicted
*
* support for mispred, predicted is optional. In case it
* is not supported mispred = predicted = 0.
*/
struct perf_branch_entry {
__u64 from;
__u64 to;
__u64 flags;
__u64 from;
__u64 to;
__u64 mispred:1, /* target mispredicted */
predicted:1,/* target predicted */
reserved:62;
};
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
*/
struct perf_branch_stack {
__u64 nr;
struct perf_branch_entry entries[0];
......@@ -566,7 +622,9 @@ struct hw_perf_event {
unsigned long event_base;
int idx;
int last_cpu;
struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg;
};
struct { /* software */
struct hrtimer hrtimer;
......@@ -690,6 +748,11 @@ struct pmu {
* if no implementation is provided it will default to: event->hw.idx + 1.
*/
int (*event_idx) (struct perf_event *event); /*optional */
/*
* flush branch stack on context-switches (needed in cpu-wide mode)
*/
void (*flush_branch_stack) (void);
};
/**
......@@ -923,7 +986,8 @@ struct perf_event_context {
u64 parent_gen;
u64 generation;
int pin_count;
int nr_cgroups; /* cgroup events present */
int nr_cgroups; /* cgroup evts */
int nr_branch_stack; /* branch_stack evt */
struct rcu_head rcu_head;
};
......@@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
struct perf_sample_data {
u64 type;
......@@ -1007,12 +1072,14 @@ struct perf_sample_data {
u64 period;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
};
static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
{
data->addr = addr;
data->raw = NULL;
data->br_stack = NULL;
}
extern void perf_output_sample(struct perf_output_handle *handle,
......@@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
# define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif
static inline bool has_branch_stack(struct perf_event *event)
{
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
}
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size);
extern void perf_output_end(struct perf_output_handle *handle);
......
......@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP)
/*
* branch priv levels that need permission checks
*/
#define PERF_SAMPLE_BRANCH_PERM_PLM \
(PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
......@@ -130,6 +137,7 @@ enum event_type_t {
*/
struct static_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
......@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (is_cgroup_event(event))
ctx->nr_cgroups++;
if (has_branch_stack(event))
ctx->nr_branch_stack++;
list_add_rcu(&event->event_entry, &ctx->event_list);
if (!ctx->nr_events)
perf_pmu_rotate_start(ctx->pmu);
......@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
cpuctx->cgrp = NULL;
}
if (has_branch_stack(event))
ctx->nr_branch_stack--;
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
......@@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
perf_pmu_rotate_start(ctx->pmu);
}
/*
* When sampling the branck stack in system-wide, it may be necessary
* to flush the stack on context switch. This happens when the branch
* stack does not tag its entries with the pid of the current task.
* Otherwise it becomes impossible to associate a branch entry with a
* task. This ambiguity is more likely to appear when the branch stack
* supports priv level filtering and the user sets it to monitor only
* at the user level (which could be a useful measurement in system-wide
* mode). In that case, the risk is high of having a branch stack with
* branch from multiple tasks. Flushing may mean dropping the existing
* entries or stashing them somewhere in the PMU specific code layer.
*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when there is at least one system-wide context
* with at least one active event using taken branch sampling.
*/
static void perf_branch_stack_sched_in(struct task_struct *prev,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
unsigned long flags;
/* no need to flush branch stack if not changing task */
if (prev == task)
return;
local_irq_save(flags);
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
/*
* check if the context has at least one
* event using PERF_SAMPLE_BRANCH_STACK
*/
if (cpuctx->ctx.nr_branch_stack > 0
&& pmu->flush_branch_stack) {
pmu = cpuctx->ctx.pmu;
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
pmu->flush_branch_stack();
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
}
rcu_read_unlock();
local_irq_restore(flags);
}
/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
......@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);
/* check for system-wide branch_stack events */
if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
perf_branch_stack_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
......@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event)
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
static_key_slow_dec_deferred(&perf_sched_events);
}
if (has_branch_stack(event)) {
static_key_slow_dec_deferred(&perf_sched_events);
/* is system-wide event */
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_dec(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
if (event->rb) {
......@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) {
size_t size;
size = data->br_stack->nr
* sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr);
perf_output_copy(handle, data->br_stack->entries, size);
} else {
/*
* we always store at least the value of nr
*/
u64 nr = 0;
perf_output_put(handle, nr);
}
}
}
void perf_prepare_sample(struct perf_event_header *header,
......@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
WARN_ON_ONCE(size & (sizeof(u64)-1));
header->size += size;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->br_stack) {
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
header->size += size;
}
}
static void perf_event_output(struct perf_event *event,
......@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
case PERF_COUNT_SW_TASK_CLOCK:
......@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -ENOENT;
/*
* no branch sampling for tracepoint events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
err = perf_trace_init(event);
if (err)
return err;
......@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event);
return 0;
......@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event);
return 0;
......@@ -5866,6 +6003,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
return ERR_PTR(err);
}
}
if (has_branch_stack(event)) {
static_key_slow_inc(&perf_sched_events.key);
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_inc(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
return event;
......@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->read_format & ~(PERF_FORMAT_MAX-1))
return -EINVAL;
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
u64 mask = attr->branch_sample_type;
/* only using defined bits */
if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
return -EINVAL;
/* at least one branch bit must be set */
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
return -EINVAL;
/* kernel level capture: check permissions */
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
/* propagate priv level, when not set for branch */
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
/* exclude_kernel checked on syscall entry */
if (!attr->exclude_kernel)
mask |= PERF_SAMPLE_BRANCH_KERNEL;
if (!attr->exclude_user)
mask |= PERF_SAMPLE_BRANCH_USER;
if (!attr->exclude_hv)
mask |= PERF_SAMPLE_BRANCH_HV;
/*
* adjust user setting (for HW filter setup)
*/
attr->branch_sample_type = mask;
}
}
out:
return ret;
......
......@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
if (bp->attr.type != PERF_TYPE_BREAKPOINT)
return -ENOENT;
/*
* no branch sampling for breakpoint events
*/
if (has_branch_stack(bp))
return -EOPNOTSUPP;
err = register_perf_hw_breakpoint(bp);
if (err)
return err;
......
......@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
corresponding events, i.e., they always refer to events defined earlier on the command
line.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters. The
following filters are defined:
- any: any type of branches
- any_call: any function call or system call
- any_ret: any function return or system call return
- any_ind: any indirect branch
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
+
The option requires at least one branch type among any, any_call, any_ret, ind_call.
The privilege levels may be ommitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
......@@ -153,6 +153,16 @@ OPTIONS
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
-b::
--branch-stack::
Use the addresses of sampled taken branches instead of the instruction
address to build the histograms. To generate meaningful output, the
perf.data file must have been obtained using perf record -b or
perf record --branch-filter xxx where xxx is a branch filter option.
perf report is able to auto-detect whether a perf.data file contains
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
......@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (!have_tracepoints(&evsel_list->entries))
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
if (!rec->opts.branch_stack)
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
if (!rec->file_new) {
err = perf_session__read_header(session, output);
if (err < 0)
......@@ -638,6 +641,90 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
return err;
}
#define BRANCH_OPT(n, m) \
{ .name = n, .mode = (m) }
#define BRANCH_END { .name = NULL }
struct branch_mode {
const char *name;
int mode;
};
static const struct branch_mode branch_modes[] = {
BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
BRANCH_END
};
static int
parse_branch_stack(const struct option *opt, const char *str, int unset)
{
#define ONLY_PLM \
(PERF_SAMPLE_BRANCH_USER |\
PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
uint64_t *mode = (uint64_t *)opt->value;
const struct branch_mode *br;
char *s, *os = NULL, *p;
int ret = -1;
if (unset)
return 0;
/*
* cannot set it twice, -b + --branch-filter for instance
*/
if (*mode)
return -1;
/* str may be NULL in case no arg is passed to -b */
if (str) {
/* because str is read-only */
s = os = strdup(str);
if (!s)
return -1;
for (;;) {
p = strchr(s, ',');
if (p)
*p = '\0';
for (br = branch_modes; br->name; br++) {
if (!strcasecmp(s, br->name))
break;
}
if (!br->name) {
ui__warning("unknown branch filter %s,"
" check man page\n", s);
goto error;
}
*mode |= br->mode;
if (!p)
break;
s = p + 1;
}
}
ret = 0;
/* default to any branch */
if ((*mode & ~ONLY_PLM) == 0) {
*mode = PERF_SAMPLE_BRANCH_ANY;
}
error:
free(os);
return ret;
}
static const char * const record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
......@@ -727,6 +814,14 @@ const struct option record_options[] = {
"monitor event in cgroup name only",
parse_cgroups),
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
"branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_END()
};
......
......@@ -53,6 +53,82 @@ struct perf_report {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
struct addr_location *al,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct symbol *parent = NULL;
int err = 0;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
if ((sort__has_parent || symbol_conf.use_callchain)
&& sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample->callchain, &parent);
if (err)
return err;
}
bi = machine__resolve_bstack(machine, al->thread,
sample->branch_stack);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_branch_entry(&evsel->hists, al, parent,
&bi[i], 1);
if (he) {
struct annotation *notes;
err = -ENOMEM;
bx = he->branch_info;
if (bx->from.sym && use_browser > 0) {
notes = symbol__annotation(bx->from.sym);
if (!notes->src
&& symbol__alloc_hist(bx->from.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->from.sym,
bx->from.map,
evsel->idx,
bx->from.al_addr);
if (err)
goto out;
}
if (bx->to.sym && use_browser > 0) {
notes = symbol__annotation(bx->to.sym);
if (!notes->src
&& symbol__alloc_hist(bx->to.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->to.sym,
bx->to.map,
evsel->idx,
bx->to.al_addr);
if (err)
goto out;
}
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
err = 0;
} else
return -ENOMEM;
}
out:
return err;
}
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample,
......@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
if (al.map != NULL)
al.map->dso->hit = 1;
if (sort__branch_mode == 1) {
if (perf_report__add_branch_hist_entry(tool, &al, sample,
evsel, machine)) {
pr_debug("problem adding lbr entry, skipping event\n");
return -1;
}
} else {
if (al.map != NULL)
al.map->dso->hit = 1;
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
}
}
return 0;
}
......@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
}
}
if (sort__branch_mode == 1) {
if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
fprintf(stderr, "selected -b but no branch data."
" Did you call perf record without"
" -b?\n");
return -1;
}
}
return 0;
}
......@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
{
int ret = -EINVAL;
u64 nr_samples;
struct perf_session *session;
struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct map *kernel_map;
struct kmap *kernel_kmap;
......@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
signal(SIGINT, sig_handler);
session = perf_session__new(rep->input_name, O_RDONLY,
rep->force, false, &rep->tool);
if (session == NULL)
return -ENOMEM;
rep->session = session;
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
......@@ -427,9 +512,19 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return 0;
}
static int
parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
{
sort__branch_mode = !unset;
return 0;
}
int cmd_report(int argc, const char **argv, const char *prefix __used)
{
struct perf_session *session;
struct stat st;
bool has_br_stack = false;
int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
"perf report [<options>]",
......@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
"sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
" dso_from, symbol_to, symbol_from, mispredict"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
......@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
"use branch records for histogram filling", parse_branch_mode),
OPT_END()
};
......@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
else
report.input_name = "perf.data";
}
session = perf_session__new(report.input_name, O_RDONLY,
report.force, false, &report.tool);
if (session == NULL)
return -ENOMEM;
if (strcmp(report.input_name, "-") != 0)
report.session = session;
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (sort__branch_mode == -1 && has_br_stack)
sort__branch_mode = 1;
/* sort__branch_mode could be 0 if --no-branch-stack */
if (sort__branch_mode == 1) {
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "comm,dso_from,symbol_from,"
"dso_to,symbol_to";
}
if (strcmp(report.input_name, "-") != 0) {
setup_browser(true);
else
} else {
use_browser = 0;
}
/*
* Only in the newt browser we are doing integrated annotation,
......@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
}
if (symbol__init() < 0)
return -1;
goto error;
setup_sorting(report_usage, options);
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
return -1;
goto error;
/*
* Only show the parent fields if we explicitly
......@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
if (argc)
usage_with_options(report_usage, options);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
return __cmd_report(&report);
if (sort__branch_mode == 1) {
sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
} else {
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
}
ret = __cmd_report(&report);
error:
perf_session__delete(session);
return ret;
}
......@@ -179,6 +179,23 @@ struct ip_callchain {
u64 ips[0];
};
struct branch_flags {
u64 mispred:1;
u64 predicted:1;
u64 reserved:62;
};
struct branch_entry {
u64 from;
u64 to;
struct branch_flags flags;
};
struct branch_stack {
u64 nr;
struct branch_entry entries[0];
};
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
......@@ -205,6 +222,7 @@ struct perf_record_opts {
unsigned int freq;
unsigned int mmap_pages;
unsigned int user_freq;
int branch_stack;
u64 default_interval;
u64 user_interval;
const char *cpu_list;
......
......@@ -81,6 +81,7 @@ struct perf_sample {
u32 raw_size;
void *raw_data;
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
};
#define BUILD_ID_SIZE 20
......
......@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
attr->watermark = 0;
attr->wakeup_events = 1;
}
if (opts->branch_stack) {
attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
attr->branch_sample_type = opts->branch_stack;
}
attr->mmap = track;
attr->comm = track;
......@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
data->raw_data = (void *) pdata;
}
if (type & PERF_SAMPLE_BRANCH_STACK) {
u64 sz;
data->branch_stack = (struct branch_stack *)array;
array++; /* nr */
sz = data->branch_stack->nr * sizeof(struct branch_entry);
sz /= sizeof(u64);
array += sz;
}
return 0;
}
......
......@@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used,
return do_write_string(fd, buffer);
}
static int write_branch_stack(int fd __used, struct perf_header *h __used,
struct perf_evlist *evlist __used)
{
return 0;
}
static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
{
char *str = do_read_string(fd, ph);
......@@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
uint64_t id;
void *buf = NULL;
char *str;
u32 nre, sz, nr, i, j, msz;
int ret;
u32 nre, sz, nr, i, j;
ssize_t ret;
size_t msz;
/* number of events */
ret = read(fd, &nre, sizeof(nre));
......@@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
if (ph->needs_swap)
sz = bswap_32(sz);
/*
* ensure it is at least to our ABI rev
*/
if (sz < (u32)sizeof(attr))
goto error;
memset(&attr, 0, sizeof(attr));
/* read entire region to sync up to next field */
/* buffer to hold on file attr struct */
buf = malloc(sz);
if (!buf)
goto error;
msz = sizeof(attr);
if (sz < msz)
if (sz < (ssize_t)msz)
msz = sz;
for (i = 0 ; i < nre; i++) {
/*
* must read entire on-file attr struct to
* sync up with layout.
*/
ret = read(fd, buf, sz);
if (ret != (ssize_t)sz)
goto error;
......@@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
free(str);
}
static void print_branch_stack(struct perf_header *ph __used, int fd __used,
FILE *fp)
{
fprintf(fp, "# contains samples with branch stack\n");
}
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
......@@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_CMDLINE, cmdline),
FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
};
struct header_print_data {
......@@ -1804,35 +1816,101 @@ int perf_header__process_sections(struct perf_header *header, int fd,
return err;
}
static int check_magic_endian(u64 *magic, struct perf_file_header *header,
struct perf_header *ph)
static const int attr_file_abi_sizes[] = {
[0] = PERF_ATTR_SIZE_VER0,
[1] = PERF_ATTR_SIZE_VER1,
0,
};
/*
* In the legacy file format, the magic number is not used to encode endianness.
* hdr_sz was used to encode endianness. But given that hdr_sz can vary based
* on ABI revisions, we need to try all combinations for all endianness to
* detect the endianness.
*/
static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
{
int ret;
uint64_t ref_size, attr_size;
int i;
/* check for legacy format */
ret = memcmp(magic, __perf_magic1, sizeof(*magic));
if (ret == 0) {
pr_debug("legacy perf.data format\n");
if (!header)
return -1;
for (i = 0 ; attr_file_abi_sizes[i]; i++) {
ref_size = attr_file_abi_sizes[i]
+ sizeof(struct perf_file_section);
if (hdr_sz != ref_size) {
attr_size = bswap_64(hdr_sz);
if (attr_size != ref_size)
continue;
if (header->attr_size != sizeof(struct perf_file_attr)) {
u64 attr_size = bswap_64(header->attr_size);
ph->needs_swap = true;
}
pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
i,
ph->needs_swap);
return 0;
}
/* could not determine endianness */
return -1;
}
if (attr_size != sizeof(struct perf_file_attr))
return -1;
#define PERF_PIPE_HDR_VER0 16
static const size_t attr_pipe_abi_sizes[] = {
[0] = PERF_PIPE_HDR_VER0,
0,
};
/*
* In the legacy pipe format, there is an implicit assumption that endiannesss
* between host recording the samples, and host parsing the samples is the
* same. This is not always the case given that the pipe output may always be
* redirected into a file and analyzed on a different machine with possibly a
* different endianness and perf_event ABI revsions in the perf tool itself.
*/
static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
{
u64 attr_size;
int i;
for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
if (hdr_sz != attr_pipe_abi_sizes[i]) {
attr_size = bswap_64(hdr_sz);
if (attr_size != hdr_sz)
continue;
ph->needs_swap = true;
}
pr_debug("Pipe ABI%d perf.data file detected\n", i);
return 0;
}
return -1;
}
static int check_magic_endian(u64 magic, uint64_t hdr_sz,
bool is_pipe, struct perf_header *ph)
{
int ret;
/* check for legacy format */
ret = memcmp(&magic, __perf_magic1, sizeof(magic));
if (ret == 0) {
pr_debug("legacy perf.data format\n");
if (is_pipe)
return try_all_pipe_abis(hdr_sz, ph);
return try_all_file_abis(hdr_sz, ph);
}
/*
* the new magic number serves two purposes:
* - unique number to identify actual perf.data files
* - encode endianness of file
*/
/* check magic number with same endianness */
if (*magic == __perf_magic2)
/* check magic number with one endianness */
if (magic == __perf_magic2)
return 0;
/* check magic number but opposite endianness */
if (*magic != __perf_magic2_sw)
/* check magic number with opposite endianness */
if (magic != __perf_magic2_sw)
return -1;
ph->needs_swap = true;
......@@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header,
if (ret <= 0)
return -1;
if (check_magic_endian(&header->magic, header, ph) < 0)
if (check_magic_endian(header->magic,
header->attr_size, false, ph) < 0) {
pr_debug("magic/endian check failed\n");
return -1;
}
if (ph->needs_swap) {
mem_bswap_64(header, offsetof(struct perf_file_header,
......@@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
if (ret <= 0)
return -1;
if (check_magic_endian(&header->magic, NULL, ph) < 0)
if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
pr_debug("endian/magic failed\n");
return -1;
}
if (ph->needs_swap)
header->size = bswap_64(header->size);
if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
return -1;
if (header->size != sizeof(*header)) {
u64 size = bswap_64(header->size);
if (size != sizeof(*header))
return -1;
ph->needs_swap = true;
}
return 0;
}
......@@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
return 0;
}
static int read_attr(int fd, struct perf_header *ph,
struct perf_file_attr *f_attr)
{
struct perf_event_attr *attr = &f_attr->attr;
size_t sz, left;
size_t our_sz = sizeof(f_attr->attr);
int ret;
memset(f_attr, 0, sizeof(*f_attr));
/* read minimal guaranteed structure */
ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
if (ret <= 0) {
pr_debug("cannot read %d bytes of header attr\n",
PERF_ATTR_SIZE_VER0);
return -1;
}
/* on file perf_event_attr size */
sz = attr->size;
if (ph->needs_swap)
sz = bswap_32(sz);
if (sz == 0) {
/* assume ABI0 */
sz = PERF_ATTR_SIZE_VER0;
} else if (sz > our_sz) {
pr_debug("file uses a more recent and unsupported ABI"
" (%zu bytes extra)\n", sz - our_sz);
return -1;
}
/* what we have not yet read and that we know about */
left = sz - PERF_ATTR_SIZE_VER0;
if (left) {
void *ptr = attr;
ptr += PERF_ATTR_SIZE_VER0;
ret = readn(fd, ptr, left);
}
/* read perf_file_section, ids are read in caller */
ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
return ret <= 0 ? -1 : 0;
}
int perf_session__read_header(struct perf_session *session, int fd)
{
struct perf_header *header = &session->header;
......@@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
if (session->fd_pipe)
return perf_header__read_pipe(session, fd);
if (perf_file_header__read(&f_header, header, fd) < 0) {
pr_debug("incompatible file format\n");
if (perf_file_header__read(&f_header, header, fd) < 0)
return -EINVAL;
}
nr_attrs = f_header.attrs.size / sizeof(f_attr);
nr_attrs = f_header.attrs.size / f_header.attr_size;
lseek(fd, f_header.attrs.offset, SEEK_SET);
for (i = 0; i < nr_attrs; i++) {
struct perf_evsel *evsel;
off_t tmp;
if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
if (read_attr(fd, header, &f_attr) < 0)
goto out_errno;
if (header->needs_swap)
......
......@@ -27,7 +27,7 @@ enum {
HEADER_EVENT_DESC,
HEADER_CPU_TOPOLOGY,
HEADER_NUMA_TOPOLOGY,
HEADER_BRANCH_STACK,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
......
......@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
hists__set_col_len(hists, col, 0);
}
static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
{
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
if (hists__col_len(hists, dso) < unresolved_col_width &&
!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
!symbol_conf.dso_list)
hists__set_col_len(hists, dso, unresolved_col_width);
}
static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
{
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
u16 len;
if (h->ms.sym)
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
else {
const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
!symbol_conf.dso_list)
hists__set_col_len(hists, HISTC_DSO,
unresolved_col_width);
}
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
else
hists__set_unres_dso_col_len(hists, HISTC_DSO);
len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len))
......@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
len = dso__name_len(h->ms.map->dso);
hists__new_col_len(hists, HISTC_DSO, len);
}
if (h->branch_info) {
int symlen;
/*
* +4 accounts for '[x] ' priv level info
* +2 account of 0x prefix on raw addresses
*/
if (h->branch_info->from.sym) {
symlen = (int)h->branch_info->from.sym->namelen + 4;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
symlen = dso__name_len(h->branch_info->from.map->dso);
hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
}
if (h->branch_info->to.sym) {
symlen = (int)h->branch_info->to.sym->namelen + 4;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
symlen = dso__name_len(h->branch_info->to.map->dso);
hists__new_col_len(hists, HISTC_DSO_TO, symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
}
}
}
static void hist_entry__add_cpumode_period(struct hist_entry *he,
......@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
return 0;
}
struct hist_entry *__hists__add_entry(struct hists *hists,
static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry,
struct addr_location *al,
struct symbol *sym_parent, u64 period)
u64 period)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = al->map,
.sym = al->sym,
},
.cpu = al->cpu,
.ip = al->addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
};
int cmp;
pthread_mutex_lock(&hists->lock);
......@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
parent = *p;
he = rb_entry(parent, struct hist_entry, rb_node_in);
cmp = hist_entry__cmp(&entry, he);
cmp = hist_entry__cmp(entry, he);
if (!cmp) {
he->period += period;
......@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
p = &(*p)->rb_right;
}
he = hist_entry__new(&entry);
he = hist_entry__new(entry);
if (!he)
goto out_unlock;
......@@ -252,6 +275,51 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
return he;
}
struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct branch_info *bi,
u64 period)
{
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = bi->to.map,
.sym = bi->to.sym,
},
.cpu = al->cpu,
.ip = bi->to.addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
.branch_info = bi,
};
return add_hist_entry(self, &entry, al, period);
}
struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent, u64 period)
{
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = al->map,
.sym = al->sym,
},
.cpu = al->cpu,
.ip = al->addr,
.level = al->level,
.period = period,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
};
return add_hist_entry(self, &entry, al, period);
}
int64_t
hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
{
......
......@@ -42,6 +42,11 @@ enum hist_column {
HISTC_COMM,
HISTC_PARENT,
HISTC_CPU,
HISTC_MISPREDICT,
HISTC_SYMBOL_FROM,
HISTC_SYMBOL_TO,
HISTC_DSO_FROM,
HISTC_DSO_TO,
HISTC_NR_COLS, /* Last entry */
};
......@@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
struct hists *hists);
void hist_entry__free(struct hist_entry *);
struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct branch_info *bi,
u64 period);
void hists__output_resort(struct hists *self);
void hists__output_resort_threaded(struct hists *hists);
void hists__collapse_resort(struct hists *self);
......
......@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
self->fd = STDIN_FILENO;
if (perf_session__read_header(self, self->fd) < 0)
pr_err("incompatible file format");
pr_err("incompatible file format (rerun with -v to learn more)");
return 0;
}
......@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
}
if (perf_session__read_header(self, self->fd) < 0) {
pr_err("incompatible file format");
pr_err("incompatible file format (rerun with -v to learn more)");
goto out_close;
}
......@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0;
}
static const u8 cpumodes[] = {
PERF_RECORD_MISC_USER,
PERF_RECORD_MISC_KERNEL,
PERF_RECORD_MISC_GUEST_USER,
PERF_RECORD_MISC_GUEST_KERNEL
};
#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
static void ip__resolve_ams(struct machine *self, struct thread *thread,
struct addr_map_symbol *ams,
u64 ip)
{
struct addr_location al;
size_t i;
u8 m;
memset(&al, 0, sizeof(al));
for (i = 0; i < NCPUMODES; i++) {
m = cpumodes[i];
/*
* We cannot use the header.misc hint to determine whether a
* branch stack address is user, kernel, guest, hypervisor.
* Branches may straddle the kernel/user/hypervisor boundaries.
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
thread__find_addr_location(thread, self, m, MAP__FUNCTION,
ip, &al, NULL);
if (al.sym)
goto found;
}
found:
ams->addr = ip;
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
}
struct branch_info *machine__resolve_bstack(struct machine *self,
struct thread *thr,
struct branch_stack *bs)
{
struct branch_info *bi;
unsigned int i;
bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
bi[i].flags = bs->entries[i].flags;
}
return bi;
}
int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
struct thread *thread,
struct ip_callchain *chain,
......@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample)
i, sample->callchain->ips[i]);
}
static void branch_stack__printf(struct perf_sample *sample)
{
uint64_t i;
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++)
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
i, sample->branch_stack->entries[i].from,
sample->branch_stack->entries[i].to);
}
static void perf_session__print_tstamp(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample)
......@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
callchain__printf(sample);
if (session->sample_type & PERF_SAMPLE_BRANCH_STACK)
branch_stack__printf(sample);
}
static struct machine *
......
......@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel
struct ip_callchain *chain,
struct symbol **parent);
struct branch_info *machine__resolve_bstack(struct machine *self,
struct thread *thread,
struct branch_stack *bs);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
void mem_bswap_64(void *src, int byte_size);
......
......@@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol";
const char *sort_order = default_sort_order;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__branch_mode = -1; /* -1 = means not set */
enum sort_type sort__first_dimension;
......@@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
}
static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
{
struct dso *dso_l = map_l ? map_l->dso : NULL;
struct dso *dso_r = map_r ? map_r->dso : NULL;
const char *dso_name_l, *dso_name_r;
if (!dso_l || !dso_r)
return cmp_null(dso_l, dso_r);
if (verbose) {
dso_name_l = dso_l->long_name;
dso_name_r = dso_r->long_name;
} else {
dso_name_l = dso_l->short_name;
dso_name_r = dso_r->short_name;
}
return strcmp(dso_name_l, dso_name_r);
}
struct sort_entry sort_comm = {
.se_header = "Command",
.se_cmp = sort__comm_cmp,
......@@ -107,36 +128,74 @@ struct sort_entry sort_comm = {
static int64_t
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL;
const char *dso_name_l, *dso_name_r;
return _sort__dso_cmp(left->ms.map, right->ms.map);
}
if (!dso_l || !dso_r)
return cmp_null(dso_l, dso_r);
if (verbose) {
dso_name_l = dso_l->long_name;
dso_name_r = dso_r->long_name;
} else {
dso_name_l = dso_l->short_name;
dso_name_r = dso_r->short_name;
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
u64 ip_l, u64 ip_r)
{
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
if (sym_l == sym_r)
return 0;
if (sym_l)
ip_l = sym_l->start;
if (sym_r)
ip_r = sym_r->start;
return (int64_t)(ip_r - ip_l);
}
static int _hist_entry__dso_snprintf(struct map *map, char *bf,
size_t size, unsigned int width)
{
if (map && map->dso) {
const char *dso_name = !verbose ? map->dso->short_name :
map->dso->long_name;
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
}
return strcmp(dso_name_l, dso_name_r);
return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
}
static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
if (self->ms.map && self->ms.map->dso) {
const char *dso_name = !verbose ? self->ms.map->dso->short_name :
self->ms.map->dso->long_name;
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
}
static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
u64 ip, char level, char *bf, size_t size,
unsigned int width __used)
{
size_t ret = 0;
if (verbose) {
char o = map ? dso__symtab_origin(map->dso) : '!';
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
BITS_PER_LONG / 4, ip, o);
}
return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
if (sym)
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
width - ret,
sym->name);
else {
size_t len = BITS_PER_LONG / 4;
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
len, ip);
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
width - ret, "");
}
return ret;
}
struct sort_entry sort_dso = {
.se_header = "Shared Object",
.se_cmp = sort__dso_cmp,
......@@ -144,8 +203,14 @@ struct sort_entry sort_dso = {
.se_width_idx = HISTC_DSO,
};
/* --sort symbol */
static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
self->level, bf, size, width);
}
/* --sort symbol */
static int64_t
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
{
......@@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
ip_l = left->ms.sym->start;
ip_r = right->ms.sym->start;
return (int64_t)(ip_r - ip_l);
}
static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
size_t ret = 0;
if (verbose) {
char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
BITS_PER_LONG / 4, self->ip, o);
}
if (!sort_dso.elide)
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
if (self->ms.sym)
ret += repsep_snprintf(bf + ret, size - ret, "%s",
self->ms.sym->name);
else
ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
BITS_PER_LONG / 4, self->ip);
return ret;
return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
}
struct sort_entry sort_sym = {
......@@ -246,19 +287,155 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
static int64_t
sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
{
return _sort__dso_cmp(left->branch_info->from.map,
right->branch_info->from.map);
}
static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return _hist_entry__dso_snprintf(self->branch_info->from.map,
bf, size, width);
}
struct sort_entry sort_dso_from = {
.se_header = "Source Shared Object",
.se_cmp = sort__dso_from_cmp,
.se_snprintf = hist_entry__dso_from_snprintf,
.se_width_idx = HISTC_DSO_FROM,
};
static int64_t
sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
{
return _sort__dso_cmp(left->branch_info->to.map,
right->branch_info->to.map);
}
static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return _hist_entry__dso_snprintf(self->branch_info->to.map,
bf, size, width);
}
static int64_t
sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct addr_map_symbol *from_l = &left->branch_info->from;
struct addr_map_symbol *from_r = &right->branch_info->from;
if (!from_l->sym && !from_r->sym)
return right->level - left->level;
return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
from_r->addr);
}
static int64_t
sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct addr_map_symbol *to_l = &left->branch_info->to;
struct addr_map_symbol *to_r = &right->branch_info->to;
if (!to_l->sym && !to_r->sym)
return right->level - left->level;
return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
}
static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
struct addr_map_symbol *from = &self->branch_info->from;
return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
self->level, bf, size, width);
}
static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
struct addr_map_symbol *to = &self->branch_info->to;
return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
self->level, bf, size, width);
}
struct sort_entry sort_dso_to = {
.se_header = "Target Shared Object",
.se_cmp = sort__dso_to_cmp,
.se_snprintf = hist_entry__dso_to_snprintf,
.se_width_idx = HISTC_DSO_TO,
};
struct sort_entry sort_sym_from = {
.se_header = "Source Symbol",
.se_cmp = sort__sym_from_cmp,
.se_snprintf = hist_entry__sym_from_snprintf,
.se_width_idx = HISTC_SYMBOL_FROM,
};
struct sort_entry sort_sym_to = {
.se_header = "Target Symbol",
.se_cmp = sort__sym_to_cmp,
.se_snprintf = hist_entry__sym_to_snprintf,
.se_width_idx = HISTC_SYMBOL_TO,
};
static int64_t
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
{
const unsigned char mp = left->branch_info->flags.mispred !=
right->branch_info->flags.mispred;
const unsigned char p = left->branch_info->flags.predicted !=
right->branch_info->flags.predicted;
return mp || p;
}
static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width){
static const char *out = "N/A";
if (self->branch_info->flags.predicted)
out = "N";
else if (self->branch_info->flags.mispred)
out = "Y";
return repsep_snprintf(bf, size, "%-*s", width, out);
}
struct sort_entry sort_mispredict = {
.se_header = "Branch Mispredicted",
.se_cmp = sort__mispredict_cmp,
.se_snprintf = hist_entry__mispredict_snprintf,
.se_width_idx = HISTC_MISPREDICT,
};
struct sort_dimension {
const char *name;
struct sort_entry *entry;
int taken;
};
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
static struct sort_dimension sort_dimensions[] = {
{ .name = "pid", .entry = &sort_thread, },
{ .name = "comm", .entry = &sort_comm, },
{ .name = "dso", .entry = &sort_dso, },
{ .name = "symbol", .entry = &sort_sym, },
{ .name = "parent", .entry = &sort_parent, },
{ .name = "cpu", .entry = &sort_cpu, },
DIM(SORT_PID, "pid", sort_thread),
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
DIM(SORT_SYM, "symbol", sort_sym),
DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
};
int sort_dimension__add(const char *tok)
......@@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
if (sd->entry == &sort_parent) {
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
if (ret) {
......@@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok)
sort__first_dimension = SORT_PARENT;
else if (!strcmp(sd->name, "cpu"))
sort__first_dimension = SORT_CPU;
else if (!strcmp(sd->name, "symbol_from"))
sort__first_dimension = SORT_SYM_FROM;
else if (!strcmp(sd->name, "symbol_to"))
sort__first_dimension = SORT_SYM_TO;
else if (!strcmp(sd->name, "dso_from"))
sort__first_dimension = SORT_DSO_FROM;
else if (!strcmp(sd->name, "dso_to"))
sort__first_dimension = SORT_DSO_TO;
else if (!strcmp(sd->name, "mispredict"))
sort__first_dimension = SORT_MISPREDICT;
}
list_add_tail(&sd->entry->list, &hist_entry__sort_list);
......@@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok)
return 0;
}
return -ESRCH;
}
......
......@@ -31,11 +31,16 @@ extern const char *parent_pattern;
extern const char default_sort_order[];
extern int sort__need_collapse;
extern int sort__has_parent;
extern int sort__branch_mode;
extern char *field_sep;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
extern struct sort_entry sort_sym;
extern struct sort_entry sort_parent;
extern struct sort_entry sort_dso_from;
extern struct sort_entry sort_dso_to;
extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern enum sort_type sort__first_dimension;
/**
......@@ -72,6 +77,7 @@ struct hist_entry {
struct hist_entry *pair;
struct rb_root sorted_chain;
};
struct branch_info *branch_info;
struct callchain_root callchain[0];
};
......@@ -82,6 +88,11 @@ enum sort_type {
SORT_SYM,
SORT_PARENT,
SORT_CPU,
SORT_DSO_FROM,
SORT_DSO_TO,
SORT_SYM_FROM,
SORT_SYM_TO,
SORT_MISPREDICT,
};
/*
......
......@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <stdint.h>
#include "map.h"
#include "../perf.h"
#include <linux/list.h>
#include <linux/rbtree.h>
#include <stdio.h>
......@@ -96,7 +97,11 @@ struct symbol_conf {
*col_width_list_str;
struct strlist *dso_list,
*comm_list,
*sym_list;
*sym_list,
*dso_from_list,
*dso_to_list,
*sym_from_list,
*sym_to_list;
const char *symfs;
};
......@@ -120,6 +125,19 @@ struct map_symbol {
bool has_children;
};
struct addr_map_symbol {
struct map *map;
struct symbol *sym;
u64 addr;
u64 al_addr;
};
struct branch_info {
struct addr_map_symbol from;
struct addr_map_symbol to;
struct branch_flags flags;
};
struct addr_location {
struct thread *thread;
struct map *map;
......
......@@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
self->hists = hists;
self->b.refresh = hist_browser__refresh;
self->b.seek = ui_browser__hists_seek;
self->b.use_navkeypressed = true,
self->has_symbols = sort_sym.list.next != NULL;
self->b.use_navkeypressed = true;
if (sort__branch_mode == 1)
self->has_symbols = sort_sym_from.list.next != NULL;
else
self->has_symbols = sort_sym.list.next != NULL;
}
return self;
......@@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
return printed;
}
static inline void free_popup_options(char **options, int n)
{
int i;
for (i = 0; i < n; ++i) {
free(options[i]);
options[i] = NULL;
}
}
static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
const char *helpline, const char *ev_name,
bool left_exits,
......@@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
{
struct hists *self = &evsel->hists;
struct hist_browser *browser = hist_browser__new(self);
struct branch_info *bi;
struct pstack *fstack;
char *options[16];
int nr_options = 0;
int key = -1;
if (browser == NULL)
......@@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
ui_helpline__push(helpline);
memset(options, 0, sizeof(options));
while (1) {
const struct thread *thread = NULL;
const struct dso *dso = NULL;
char *options[16];
int nr_options = 0, choice = 0, i,
int choice = 0,
annotate = -2, zoom_dso = -2, zoom_thread = -2,
browse_map = -2;
annotate_f = -2, annotate_t = -2, browse_map = -2;
nr_options = 0;
key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
......@@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
thread = hist_browser__selected_thread(browser);
dso = browser->selection->map ? browser->selection->map->dso : NULL;
}
switch (key) {
case K_TAB:
case K_UNTAB:
......@@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (!browser->has_symbols) {
ui_browser__warning(&browser->b, delay_secs * 2,
"Annotation is only available for symbolic views, "
"include \"sym\" in --sort to use it.");
"include \"sym*\" in --sort to use it.");
continue;
}
......@@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (!browser->has_symbols)
goto add_exit_option;
if (browser->selection != NULL &&
browser->selection->sym != NULL &&
!browser->selection->map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s",
browser->selection->sym->name) > 0)
annotate = nr_options++;
if (sort__branch_mode == 1) {
bi = browser->he_selection->branch_info;
if (browser->selection != NULL &&
bi &&
bi->from.sym != NULL &&
!bi->from.map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s",
bi->from.sym->name) > 0)
annotate_f = nr_options++;
if (browser->selection != NULL &&
bi &&
bi->to.sym != NULL &&
!bi->to.map->dso->annotate_warned &&
(bi->to.sym != bi->from.sym ||
bi->to.map->dso != bi->from.map->dso) &&
asprintf(&options[nr_options], "Annotate %s",
bi->to.sym->name) > 0)
annotate_t = nr_options++;
} else {
if (browser->selection != NULL &&
browser->selection->sym != NULL &&
!browser->selection->map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s",
browser->selection->sym->name) > 0)
annotate = nr_options++;
}
if (thread != NULL &&
asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
......@@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
browse_map = nr_options++;
add_exit_option:
options[nr_options++] = (char *)"Exit";
retry_popup_menu:
choice = ui__popup_menu(nr_options, options);
for (i = 0; i < nr_options - 1; ++i)
free(options[i]);
if (choice == nr_options - 1)
break;
if (choice == -1)
if (choice == -1) {
free_popup_options(options, nr_options - 1);
continue;
}
if (choice == annotate) {
if (choice == annotate || choice == annotate_t || choice == annotate_f) {
struct hist_entry *he;
int err;
do_annotate:
he = hist_browser__selected_entry(browser);
if (he == NULL)
continue;
/*
* we stash the branch_info symbol + map into the
* the ms so we don't have to rewrite all the annotation
* code to use branch_info.
* in branch mode, the ms struct is not used
*/
if (choice == annotate_f) {
he->ms.sym = he->branch_info->from.sym;
he->ms.map = he->branch_info->from.map;
} else if (choice == annotate_t) {
he->ms.sym = he->branch_info->to.sym;
he->ms.map = he->branch_info->to.map;
}
/*
* Don't let this be freed, say, by hists__decay_entry.
*/
......@@ -1024,9 +1078,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
err = hist_entry__tui_annotate(he, evsel->idx,
timer, arg, delay_secs);
he->used = false;
/*
* offer option to annotate the other branch source or target
* (if they exists) when returning from annotate
*/
if ((err == 'q' || err == CTRL('c'))
&& annotate_t != -2 && annotate_f != -2)
goto retry_popup_menu;
ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
if (err)
ui_browser__handle_resize(&browser->b);
} else if (choice == browse_map)
map__browse(browser->selection->map);
else if (choice == zoom_dso) {
......@@ -1072,6 +1135,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
pstack__delete(fstack);
out:
hist_browser__delete(browser);
free_popup_options(options, nr_options - 1);
return key;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment