Commit 35b740e4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (106 commits)
  perf kvm: Fix copy & paste error in description
  perf script: Kill script_spec__delete
  perf top: Fix a memory leak
  perf stat: Introduce get_ratio_color() helper
  perf session: Remove impossible condition check
  perf tools: Fix feature-bits rework fallout, remove unused variable
  perf script: Add generic perl handler to process events
  perf tools: Use for_each_set_bit() to iterate over feature flags
  perf tools: Unify handling of features when writing feature section
  perf report: Accept fifos as input file
  perf tools: Moving code in some files
  perf tools: Fix out-of-bound access to struct perf_session
  perf tools: Continue processing header on unknown features
  perf tools: Improve macros for struct feature_ops
  perf: builtin-record: Document and check that mmap_pages must be a power of two.
  perf: builtin-record: Provide advice if mmap'ing fails with EPERM.
  perf tools: Fix truncated annotation
  perf script: look up thread using tid instead of pid
  perf tools: Look up thread names for system wide profiling
  perf tools: Fix comm for processes with named threads
  ...
parents 423d091d 9e183426
......@@ -1885,6 +1885,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
arch_perfmon: [X86] Force use of architectural
perfmon on Intel CPUs instead of the
CPU specific event set.
timer: [X86] Force use of architectural NMI
timer mode (see also oprofile.timer
for generic hr timer mode)
[s390] Force legacy basic mode sampling
(report cpu_type "timer")
oops=panic Always panic on oopses. Default is to just kill the
process, but there is a small probability of
......
......@@ -191,8 +191,6 @@ And for string fields they are:
Currently, only exact string matches are supported.
Currently, the maximum number of predicates in a filter is 16.
5.2 Setting filters
-------------------
......
......@@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX
config HAVE_OPROFILE
bool
config OPROFILE_NMI_TIMER
def_bool y
depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
config KPROBES
bool "Kprobes"
depends on MODULES
......
......@@ -22,6 +22,7 @@
#include <asm/irq.h>
#include "hwsampler.h"
#include "op_counter.h"
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
......@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
if (sample_data_ptr->P == 1) {
/* userspace sample */
unsigned int pid = sample_data_ptr->prim_asn;
if (!counter_config.user)
goto skip_sample;
rcu_read_lock();
tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
if (tsk)
......@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
rcu_read_unlock();
} else {
/* kernelspace sample */
if (!counter_config.kernel)
goto skip_sample;
regs = task_pt_regs(current);
}
......@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
!sample_data_ptr->P, tsk);
mutex_unlock(&hws_sem);
skip_sample:
sample_data_ptr++;
}
}
......
This diff is collapsed.
/**
* arch/s390/oprofile/op_counter.h
*
* Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
*
* @remark Copyright 2011 OProfile authors
*/
#ifndef OP_COUNTER_H
#define OP_COUNTER_H
struct op_counter_config {
/* `enabled' maps to the hwsampler_file variable. */
/* `count' maps to the oprofile_hw_interval variable. */
/* `event' and `unit_mask' are unused. */
unsigned long kernel;
unsigned long user;
};
extern struct op_counter_config counter_config;
#endif /* OP_COUNTER_H */
......@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
return (insn->vex_prefix.value != 0);
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{
return insn->opcode.got && insn->modrm.got && insn->sib.got &&
insn->displacement.got && insn->immediate.got;
}
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
......
......@@ -57,6 +57,7 @@
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
#define ARCH_PERFMON_EVENTS_COUNT 7
/*
* Intel "Architectural Performance Monitoring" CPUID
......@@ -72,6 +73,19 @@ union cpuid10_eax {
unsigned int full;
};
union cpuid10_ebx {
struct {
unsigned int no_unhalted_core_cycles:1;
unsigned int no_instructions_retired:1;
unsigned int no_unhalted_reference_cycles:1;
unsigned int no_llc_reference:1;
unsigned int no_llc_misses:1;
unsigned int no_branch_instruction_retired:1;
unsigned int no_branch_misses_retired:1;
} split;
unsigned int full;
};
union cpuid10_edx {
struct {
unsigned int num_counters_fixed:5;
......@@ -81,6 +95,15 @@ union cpuid10_edx {
unsigned int full;
};
struct x86_pmu_capability {
int version;
int num_counters_gp;
int num_counters_fixed;
int bit_width_gp;
int bit_width_fixed;
unsigned int events_mask;
int events_mask_len;
};
/*
* Fixed-purpose performance events:
......@@ -89,23 +112,24 @@ union cpuid10_edx {
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
*/
/* Instr_Retired.Any: */
#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
/*
* We model BTS tracing as another fixed-mode PMC.
......@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
};
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
#else
static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
......@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
return NULL;
}
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
memset(cap, 0, sizeof(*cap));
}
static inline void perf_events_lapic_init(void) { }
#endif
......
......@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
/*
* Event scheduler state:
*
* Assign events iterating over all events and counters, beginning
* with events with least weights first. Keep the current iterator
* state in struct sched_state.
*/
struct sched_state {
int weight;
int event; /* event index */
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
#define SCHED_STATES_MAX 2
struct perf_sched {
int max_weight;
int max_events;
struct event_constraint **constraints;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
};
/*
* Initialize interator that runs through all events and counters.
*/
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
int num, int wmin, int wmax)
{
int idx;
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
sched->constraints = c;
for (idx = 0; idx < num; idx++) {
if (c[idx]->weight == wmin)
break;
}
sched->state.event = idx; /* start with min weight */
sched->state.weight = wmin;
sched->state.unassigned = num;
}
static void perf_sched_save_state(struct perf_sched *sched)
{
if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
return;
sched->saved[sched->saved_states] = sched->state;
sched->saved_states++;
}
static bool perf_sched_restore_state(struct perf_sched *sched)
{
if (!sched->saved_states)
return false;
sched->saved_states--;
sched->state = sched->saved[sched->saved_states];
/* continue with next counter: */
clear_bit(sched->state.counter++, sched->state.used);
return true;
}
/*
* Select a counter for the current event to schedule. Return true on
* success.
*/
static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
int idx;
if (!sched->state.unassigned)
return false;
if (sched->state.event >= sched->max_events)
return false;
c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
if (x86_pmu.num_counters_fixed) {
idx = X86_PMC_IDX_FIXED;
for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
return false;
done:
sched->state.counter = idx;
if (c->overlap)
perf_sched_save_state(sched);
return true;
}
static bool perf_sched_find_counter(struct perf_sched *sched)
{
while (!__perf_sched_find_counter(sched)) {
if (!perf_sched_restore_state(sched))
return false;
}
return true;
}
/*
* Go through all unassigned events and find the next one to schedule.
* Take events with the least weight first. Return true on success.
*/
static bool perf_sched_next_event(struct perf_sched *sched)
{
struct event_constraint *c;
if (!sched->state.unassigned || !--sched->state.unassigned)
return false;
do {
/* next event */
sched->state.event++;
if (sched->state.event >= sched->max_events) {
/* next weight */
sched->state.event = 0;
sched->state.weight++;
if (sched->state.weight > sched->max_weight)
return false;
}
c = sched->constraints[sched->state.event];
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
return true;
}
/*
* Assign a counter for each event.
*/
static int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
perf_sched_init(&sched, constraints, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
break; /* failed */
if (assign)
assign[sched.state.event] = sched.state.counter;
} while (perf_sched_next_event(&sched));
return sched.state.unassigned;
}
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int i, j, w, wmax, num = 0;
int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
for (i = 0; i < n; i++) {
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
constraints[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
/*
......@@ -521,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (assign)
assign[i] = hwc->idx;
}
if (i == n)
goto done;
/*
* begin slow path
*/
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
/*
* weight = number of possible counters
*
* 1 = most constrained, only works on one counter
* wmax = least constrained, works on any counter
*
* assign events to counters starting with most
* constrained events.
*/
wmax = x86_pmu.num_counters;
/* slow path */
if (i != n)
num = perf_assign_events(constraints, n, wmin, wmax, assign);
/*
* when fixed event counters are present,
* wmax is incremented by 1 to account
* for one more choice
*/
if (x86_pmu.num_counters_fixed)
wmax++;
for (w = 1, num = n; num && w <= wmax; w++) {
/* for each event */
for (i = 0; num && i < n; i++) {
c = constraints[i];
hwc = &cpuc->event_list[i]->hw;
if (c->weight != w)
continue;
for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
if (!test_bit(j, used_mask))
break;
}
if (j == X86_PMC_IDX_MAX)
break;
__set_bit(j, used_mask);
if (assign)
assign[i] = j;
num--;
}
}
done:
/*
* scheduling failed or is just a simulation,
* free resources if necessary
......@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
struct event_constraint *c;
int err;
......@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.quirks)
x86_pmu.quirks();
for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
quirk->func();
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
......@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
0, x86_pmu.num_counters);
0, x86_pmu.num_counters, 0);
if (x86_pmu.event_constraints) {
/*
* event on fixed counter2 (REF_CYCLES) only works on this
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
if (c->cmask != X86_RAW_EVENT_MASK)
if (c->cmask != X86_RAW_EVENT_MASK
|| c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
continue;
}
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
c->weight += x86_pmu.num_counters;
......@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
return misc;
}
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu.num_counters;
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
cap->events_mask_len = x86_pmu.events_mask_len;
}
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
......@@ -45,6 +45,7 @@ struct event_constraint {
u64 code;
u64 cmask;
int weight;
int overlap;
};
struct amd_nb {
......@@ -151,15 +152,40 @@ struct cpu_hw_events {
void *kfree_on_online;
};
#define __EVENT_CONSTRAINT(c, n, m, w) {\
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
}
#define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
/*
* The overlap flag marks event constraints with overlapping counter
* masks. This is the case if the counter mask of such an event is not
* a subset of any other counter mask of a constraint with an equal or
* higher weight, e.g.:
*
* c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
* c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
* c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
*
* The event scheduler may not select the correct counter in the first
* cycle because it needs to know which subsequent events will be
* scheduled. It may fail to schedule the events then. So we set the
* overlap flag for such constraints to give the scheduler a hint which
* events to select for counter rescheduling.
*
* Care must be taken as the rescheduling algorithm is O(n!) which
* will increase scheduling cycles for an over-commited system
* dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
* and its counter masks must be kept at a minimum.
*/
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
/*
* Constraint on the Event code.
......@@ -235,6 +261,11 @@ union perf_capabilities {
u64 capabilities;
};
struct x86_pmu_quirk {
struct x86_pmu_quirk *next;
void (*func)(void);
};
/*
* struct x86_pmu - generic x86 pmu
*/
......@@ -259,6 +290,11 @@ struct x86_pmu {
int num_counters_fixed;
int cntval_bits;
u64 cntval_mask;
union {
unsigned long events_maskl;
unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
};
int events_mask_len;
int apic;
u64 max_period;
struct event_constraint *
......@@ -268,7 +304,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
void (*quirks)(void);
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
int (*cpu_prepare)(int cpu);
......@@ -309,6 +345,15 @@ struct x86_pmu {
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
};
#define x86_add_quirk(func_) \
do { \
static struct x86_pmu_quirk __quirk __initdata = { \
.func = func_, \
}; \
__quirk.next = x86_pmu.quirks; \
x86_pmu.quirks = &__quirk; \
} while (0)
#define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2
......
......@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
......
......@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
......@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/*
* Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
* 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
* ratio between these counters.
*/
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
......@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
......@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
......@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
......@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
......@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
};
static void intel_clovertown_quirks(void)
static __init void intel_clovertown_quirk(void)
{
/*
* PEBS is unreliable due to:
......@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
x86_pmu.pebs_constraints = NULL;
}
static void intel_sandybridge_quirks(void)
static __init void intel_sandybridge_quirk(void)
{
printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
};
static __init void intel_arch_events_quirk(void)
{
int bit;
/* disable event that reported as not presend by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
intel_arch_events_map[bit].name);
}
}
static __init void intel_nehalem_quirk(void)
{
union cpuid10_ebx ebx;
ebx.full = x86_pmu.events_maskl;
if (ebx.split.no_branch_misses_retired) {
/*
* Erratum AAJ80 detected, we work it around by using
* the BR_MISP_EXEC.ANY event. This will over-count
* branch-misses, but it's still much better than the
* architectural event which is often completely bogus:
*/
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
ebx.split.no_branch_misses_retired = 0;
x86_pmu.events_maskl = ebx.full;
printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
}
}
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
unsigned int unused;
unsigned int ebx;
int version;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
......@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
version = eax.split.version_id;
......@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
......@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
intel_ds_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
/*
* Install the hw-cache-events table:
*/
......@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
break;
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
x86_pmu.quirks = intel_clovertown_quirks;
x86_add_quirk(intel_clovertown_quirk);
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
case 29: /* six-core 45 nm xeon "Dunnington" */
......@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
if (ebx & 0x40) {
/*
* Erratum AAJ80 detected, we work it around by using
* the BR_MISP_EXEC.ANY event. This will over-count
* branch-misses, but it's still much better than the
* architectural event which is often completely bogus:
*/
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
x86_add_quirk(intel_nehalem_quirk);
pr_cont("erratum AAJ80 worked around, ");
}
pr_cont("Nehalem events, ");
break;
......@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
x86_pmu.quirks = intel_sandybridge_quirks;
x86_add_quirk(intel_sandybridge_quirk);
case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
......@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
break;
}
}
return 0;
}
......@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
put_online_cpus();
}
void arch_jump_label_transform_static(struct jump_entry *entry,
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
__jump_label_transform(entry, type, text_poke_early);
......
......@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
const insn_attr_t *table;
if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
return 0;
table = inat_avx_tables[vex_m][vex_p];
/* At first, this checks the master table */
table = inat_avx_tables[vex_m][0];
if (!table)
return 0;
if (!inat_is_group(table[opcode]) && vex_p) {
/* If this is not a group, get attribute directly */
table = inat_avx_tables[vex_m][vex_p];
if (!table)
return 0;
}
return table[opcode];
}
......@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
if (!inat_accept_vex(insn->attr))
if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
insn->attr = 0; /* This instruction is bad */
goto end; /* VEX has only 1 byte for opcode */
}
......@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
pfx = insn_last_prefix(insn);
insn->attr = inat_get_group_attribute(mod, pfx,
insn->attr);
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
insn->attr = 0; /* This is bad */
}
}
......
This diff is collapsed.
......@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o \
timer_int.o )
timer_int.o nmi_timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
op_model_ppro.o op_model_p4.o
oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
......@@ -16,37 +16,23 @@
* with the NMI mode driver.
*/
#ifdef CONFIG_X86_LOCAL_APIC
extern int op_nmi_init(struct oprofile_operations *ops);
extern int op_nmi_timer_init(struct oprofile_operations *ops);
extern void op_nmi_exit(void);
extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
#else
static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
static void op_nmi_exit(void) { }
#endif
static int nmi_timer;
extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
int ret;
ret = -ENODEV;
#ifdef CONFIG_X86_LOCAL_APIC
ret = op_nmi_init(ops);
#endif
nmi_timer = (ret != 0);
#ifdef CONFIG_X86_IO_APIC
if (nmi_timer)
ret = op_nmi_timer_init(ops);
#endif
ops->backtrace = x86_backtrace;
return ret;
return op_nmi_init(ops);
}
void oprofile_arch_exit(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!nmi_timer)
op_nmi_exit();
#endif
op_nmi_exit();
}
......@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
return 0;
}
static int force_arch_perfmon;
static int force_cpu_type(const char *str, struct kernel_param *kp)
enum __force_cpu_type {
reserved = 0, /* do not force */
timer,
arch_perfmon,
};
static int force_cpu_type;
static int set_cpu_type(const char *str, struct kernel_param *kp)
{
if (!strcmp(str, "arch_perfmon")) {
force_arch_perfmon = 1;
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
} else if (!strcmp(str, "arch_perfmon")) {
force_cpu_type = arch_perfmon;
printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
} else {
force_cpu_type = 0;
}
return 0;
}
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
if (force_arch_perfmon && cpu_has_arch_perfmon)
if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
return 0;
/*
......@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
if (!cpu_has_apic)
return -ENODEV;
if (force_cpu_type == timer)
return -ENODEV;
switch (vendor) {
case X86_VENDOR_AMD:
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
......
/**
* @file nmi_timer_int.c
*
* @remark Copyright 2003 OProfile authors
* @remark Read the file COPYING
*
* @author Zwane Mwaikambo <zwane@linuxpower.ca>
*/
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/oprofile.h>
#include <linux/rcupdate.h>
#include <linux/kdebug.h>
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/ptrace.h>
static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
{
oprofile_add_sample(regs, 0);
return NMI_HANDLED;
}
static int timer_start(void)
{
if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
0, "oprofile-timer"))
return 1;
return 0;
}
static void timer_stop(void)
{
unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
synchronize_sched(); /* Allow already-started NMIs to complete. */
}
int __init op_nmi_timer_init(struct oprofile_operations *ops)
{
ops->start = timer_start;
ops->stop = timer_stop;
ops->cpu_type = "timer";
printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
return 0;
}
......@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
quiet_cmd_posttest = TEST $@
cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
posttest: $(obj)/test_get_len vmlinux
quiet_cmd_sanitytest = TEST $@
cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
$(call cmd,posttest)
$(call cmd,sanitytest)
hostprogs-y := test_get_len
hostprogs-y += test_get_len insn_sanity
# -I needed for generated C source and C source which in the kernel tree.
HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
# Dependencies are also needed.
$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
......@@ -47,7 +47,7 @@ BEGIN {
sep_expr = "^\\|$"
group_expr = "^Grp[0-9A-Za-z]+"
imm_expr = "^[IJAO][a-z]"
imm_expr = "^[IJAOL][a-z]"
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
......@@ -59,6 +59,7 @@ BEGIN {
imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
......@@ -70,8 +71,12 @@ BEGIN {
lprefix3_expr = "\\(F2\\)"
max_lprefix = 4
vexok_expr = "\\(VEX\\)"
vexonly_expr = "\\(oVEX\\)"
# All opcodes starting with lower-case 'v' or with (v1) superscript
# accepts VEX prefix
vexok_opcode_expr = "^v.*"
vexok_expr = "\\(v1\\)"
# All opcodes with (v) superscript supports *only* VEX prefix
vexonly_expr = "\\(v\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
......@@ -85,8 +90,8 @@ BEGIN {
prefix_num["SEG=GS"] = "INAT_PFX_GS"
prefix_num["SEG=SS"] = "INAT_PFX_SS"
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
clear_vars()
}
......@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(opcode, fpu_expr))
flags = add_flags(flags, "INAT_MODRM")
# check VEX only code
# check VEX codes
if (match(ext, vexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
# check VEX only code
if (match(ext, vexok_expr))
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
# check prefixes
......
/*
* x86 decoder sanity test - based on test_get_insn.c
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2009
* Copyright (C) Hitachi, Ltd., 2011
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define unlikely(cond) (cond)
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
#include <asm/insn.h>
#include <inat.c>
#include <insn.c>
/*
* Test of instruction analysis against tampering.
* Feed random binary to instruction decoder and ensure not to
* access out-of-instruction-buffer.
*/
#define DEFAULT_MAX_ITER 10000
#define INSN_NOP 0x90
static const char *prog; /* Program name */
static int verbose; /* Verbosity */
static int x86_64; /* x86-64 bit mode flag */
static unsigned int seed; /* Random seed */
static unsigned long iter_start; /* Start of iteration number */
static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
static FILE *input_file; /* Input file name */
static void usage(const char *err)
{
if (err)
fprintf(stderr, "Error: %s\n\n", err);
fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
fprintf(stderr, "\t-y 64bit mode\n");
fprintf(stderr, "\t-n 32bit mode\n");
fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
fprintf(stderr, "\t-m Give a maximum iteration number\n");
fprintf(stderr, "\t-i Give an input file with decoded binary\n");
exit(1);
}
static void dump_field(FILE *fp, const char *name, const char *indent,
struct insn_field *field)
{
fprintf(fp, "%s.%s = {\n", indent, name);
fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
indent, field->value, field->bytes[0], field->bytes[1],
field->bytes[2], field->bytes[3]);
fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
field->got, field->nbytes);
}
static void dump_insn(FILE *fp, struct insn *insn)
{
fprintf(fp, "Instruction = {\n");
dump_field(fp, "prefixes", "\t", &insn->prefixes);
dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
dump_field(fp, "opcode", "\t", &insn->opcode);
dump_field(fp, "modrm", "\t", &insn->modrm);
dump_field(fp, "sib", "\t", &insn->sib);
dump_field(fp, "displacement", "\t", &insn->displacement);
dump_field(fp, "immediate1", "\t", &insn->immediate1);
dump_field(fp, "immediate2", "\t", &insn->immediate2);
fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
insn->attr, insn->opnd_bytes, insn->addr_bytes);
fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
insn->length, insn->x86_64, insn->kaddr);
}
static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
unsigned char *insn_buf, struct insn *insn)
{
int i;
fprintf(fp, "%s:\n", msg);
dump_insn(fp, insn);
fprintf(fp, "You can reproduce this with below command(s);\n");
/* Input a decoded instruction sequence directly */
fprintf(fp, " $ echo ");
for (i = 0; i < MAX_INSN_SIZE; i++)
fprintf(fp, " %02x", insn_buf[i]);
fprintf(fp, " | %s -i -\n", prog);
if (!input_file) {
fprintf(fp, "Or \n");
/* Give a seed and iteration number */
fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
}
}
static void init_random_seed(void)
{
int fd;
fd = open("/dev/urandom", O_RDONLY);
if (fd < 0)
goto fail;
if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
goto fail;
close(fd);
return;
fail:
usage("Failed to open /dev/urandom");
}
/* Read given instruction sequence from the input file */
static int read_next_insn(unsigned char *insn_buf)
{
char buf[256] = "", *tmp;
int i;
tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
if (tmp == NULL || feof(input_file))
return 0;
for (i = 0; i < MAX_INSN_SIZE; i++) {
insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
if (*tmp != ' ')
break;
}
return i;
}
static int generate_insn(unsigned char *insn_buf)
{
int i;
if (input_file)
return read_next_insn(insn_buf);
/* Fills buffer with random binary up to MAX_INSN_SIZE */
for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
*(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
while (i < MAX_INSN_SIZE)
insn_buf[i++] = random() & 0xff;
return i;
}
static void parse_args(int argc, char **argv)
{
int c;
char *tmp = NULL;
int set_seed = 0;
prog = argv[0];
while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
switch (c) {
case 'y':
x86_64 = 1;
break;
case 'n':
x86_64 = 0;
break;
case 'v':
verbose++;
break;
case 'i':
if (strcmp("-", optarg) == 0)
input_file = stdin;
else
input_file = fopen(optarg, "r");
if (!input_file)
usage("Failed to open input file");
break;
case 's':
seed = (unsigned int)strtoul(optarg, &tmp, 0);
if (*tmp == ',') {
optarg = tmp + 1;
iter_start = strtoul(optarg, &tmp, 0);
}
if (*tmp != '\0' || tmp == optarg)
usage("Failed to parse seed");
set_seed = 1;
break;
case 'm':
iter_end = strtoul(optarg, &tmp, 0);
if (*tmp != '\0' || tmp == optarg)
usage("Failed to parse max_iter");
break;
default:
usage(NULL);
}
}
/* Check errors */
if (iter_end < iter_start)
usage("Max iteration number must be bigger than iter-num");
if (set_seed && input_file)
usage("Don't use input file (-i) with random seed (-s)");
/* Initialize random seed */
if (!input_file) {
if (!set_seed) /* No seed is given */
init_random_seed();
srand(seed);
}
}
int main(int argc, char **argv)
{
struct insn insn;
int insns = 0;
int errors = 0;
unsigned long i;
unsigned char insn_buf[MAX_INSN_SIZE * 2];
parse_args(argc, argv);
/* Prepare stop bytes with NOPs */
memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
for (i = 0; i < iter_end; i++) {
if (generate_insn(insn_buf) <= 0)
break;
if (i < iter_start) /* Skip to given iteration number */
continue;
/* Decode an instruction */
insn_init(&insn, insn_buf, x86_64);
insn_get_length(&insn);
if (insn.next_byte <= insn.kaddr ||
insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
/* Access out-of-range memory */
dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
errors++;
} else if (verbose && !insn_complete(&insn))
dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
else if (verbose >= 2)
dump_insn(stdout, &insn);
insns++;
}
fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
return errors ? 1 : 0;
}
/**
* @file nmi_timer_int.c
*
* @remark Copyright 2011 Advanced Micro Devices, Inc.
*
* @author Robert Richter <robert.richter@amd.com>
*/
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/oprofile.h>
#include <linux/perf_event.h>
#ifdef CONFIG_OPROFILE_NMI_TIMER
static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events);
static int ctr_running;
static struct perf_event_attr nmi_timer_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};
static void nmi_timer_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
event->hw.interrupts = 0; /* don't throttle interrupts */
oprofile_add_sample(regs, 0);
}
static int nmi_timer_start_cpu(int cpu)
{
struct perf_event *event = per_cpu(nmi_timer_events, cpu);
if (!event) {
event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL,
nmi_timer_callback, NULL);
if (IS_ERR(event))
return PTR_ERR(event);
per_cpu(nmi_timer_events, cpu) = event;
}
if (event && ctr_running)
perf_event_enable(event);
return 0;
}
static void nmi_timer_stop_cpu(int cpu)
{
struct perf_event *event = per_cpu(nmi_timer_events, cpu);
if (event && ctr_running)
perf_event_disable(event);
}
static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action,
void *data)
{
int cpu = (unsigned long)data;
switch (action) {
case CPU_DOWN_FAILED:
case CPU_ONLINE:
nmi_timer_start_cpu(cpu);
break;
case CPU_DOWN_PREPARE:
nmi_timer_stop_cpu(cpu);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block nmi_timer_cpu_nb = {
.notifier_call = nmi_timer_cpu_notifier
};
static int nmi_timer_start(void)
{
int cpu;
get_online_cpus();
ctr_running = 1;
for_each_online_cpu(cpu)
nmi_timer_start_cpu(cpu);
put_online_cpus();
return 0;
}
static void nmi_timer_stop(void)
{
int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
nmi_timer_stop_cpu(cpu);
ctr_running = 0;
put_online_cpus();
}
static void nmi_timer_shutdown(void)
{
struct perf_event *event;
int cpu;
get_online_cpus();
unregister_cpu_notifier(&nmi_timer_cpu_nb);
for_each_possible_cpu(cpu) {
event = per_cpu(nmi_timer_events, cpu);
if (!event)
continue;
perf_event_disable(event);
per_cpu(nmi_timer_events, cpu) = NULL;
perf_event_release_kernel(event);
}
put_online_cpus();
}
static int nmi_timer_setup(void)
{
int cpu, err;
u64 period;
/* clock cycles per tick: */
period = (u64)cpu_khz * 1000;
do_div(period, HZ);
nmi_timer_attr.sample_period = period;
get_online_cpus();
err = register_cpu_notifier(&nmi_timer_cpu_nb);
if (err)
goto out;
/* can't attach events to offline cpus: */
for_each_online_cpu(cpu) {
err = nmi_timer_start_cpu(cpu);
if (err)
break;
}
if (err)
nmi_timer_shutdown();
out:
put_online_cpus();
return err;
}
int __init op_nmi_timer_init(struct oprofile_operations *ops)
{
int err = 0;
err = nmi_timer_setup();
if (err)
return err;
nmi_timer_shutdown(); /* only check, don't alloc */
ops->create_files = NULL;
ops->setup = nmi_timer_setup;
ops->shutdown = nmi_timer_shutdown;
ops->start = nmi_timer_start;
ops->stop = nmi_timer_stop;
ops->cpu_type = "timer";
printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
return 0;
}
#endif
......@@ -246,37 +246,31 @@ static int __init oprofile_init(void)
int err;
/* always init architecture to setup backtrace support */
timer_mode = 0;
err = oprofile_arch_init(&oprofile_ops);
if (!err) {
if (!timer && !oprofilefs_register())
return 0;
oprofile_arch_exit();
}
timer_mode = err || timer; /* fall back to timer mode on errors */
if (timer_mode) {
if (!err)
oprofile_arch_exit();
/* setup timer mode: */
timer_mode = 1;
/* no nmi timer mode if oprofile.timer is set */
if (timer || op_nmi_timer_init(&oprofile_ops)) {
err = oprofile_timer_init(&oprofile_ops);
if (err)
return err;
}
err = oprofilefs_register();
if (!err)
return 0;
/* failed */
if (timer_mode)
oprofile_timer_exit();
else
oprofile_arch_exit();
return err;
return oprofilefs_register();
}
static void __exit oprofile_exit(void)
{
oprofilefs_unregister();
if (timer_mode)
oprofile_timer_exit();
else
if (!timer_mode)
oprofile_arch_exit();
}
......
......@@ -35,7 +35,15 @@ struct dentry;
void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
#ifdef CONFIG_OPROFILE_NMI_TIMER
int op_nmi_timer_init(struct oprofile_operations *ops);
#else
static inline int op_nmi_timer_init(struct oprofile_operations *ops)
{
return -ENODEV;
}
#endif
int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
......
......@@ -97,24 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = {
.notifier_call = oprofile_cpu_notify,
};
int oprofile_timer_init(struct oprofile_operations *ops)
static int oprofile_hrtimer_setup(void)
{
int rc;
rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
if (rc)
return rc;
ops->create_files = NULL;
ops->setup = NULL;
ops->shutdown = NULL;
ops->start = oprofile_hrtimer_start;
ops->stop = oprofile_hrtimer_stop;
ops->cpu_type = "timer";
printk(KERN_INFO "oprofile: using timer interrupt.\n");
return 0;
return register_hotcpu_notifier(&oprofile_cpu_notifier);
}
void oprofile_timer_exit(void)
static void oprofile_hrtimer_shutdown(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
int oprofile_timer_init(struct oprofile_operations *ops)
{
ops->create_files = NULL;
ops->setup = oprofile_hrtimer_setup;
ops->shutdown = oprofile_hrtimer_shutdown;
ops->start = oprofile_hrtimer_start;
ops->stop = oprofile_hrtimer_stop;
ops->cpu_type = "timer";
printk(KERN_INFO "oprofile: using timer interrupt.\n");
return 0;
}
......@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w);
#include <asm/bitops.h>
#define for_each_set_bit(bit, addr, size) \
for ((bit) = find_first_bit((addr), (size)); \
(bit) < (size); \
for ((bit) = find_first_bit((addr), (size)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
/* same as for_each_set_bit() but use bit as value to start with */
#define for_each_set_bit_cont(bit, addr, size) \
for ((bit) = find_next_bit((addr), (size), (bit)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
static __inline__ int get_bitmask_order(unsigned int count)
......
......@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
......@@ -14,6 +15,12 @@ struct jump_label_key {
#endif
};
struct jump_label_key_deferred {
struct jump_label_key key;
unsigned long timeout;
struct delayed_work work;
};
# include <asm/jump_label.h>
# define HAVE_JUMP_LABEL
#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
......@@ -51,8 +58,11 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
extern int jump_label_text_reserved(void *start, void *end);
extern void jump_label_inc(struct jump_label_key *key);
extern void jump_label_dec(struct jump_label_key *key);
extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
extern bool jump_label_enabled(struct jump_label_key *key);
extern void jump_label_apply_nops(struct module *mod);
extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
unsigned long rl);
#else /* !HAVE_JUMP_LABEL */
......@@ -68,6 +78,10 @@ static __always_inline void jump_label_init(void)
{
}
struct jump_label_key_deferred {
struct jump_label_key key;
};
static __always_inline bool static_branch(struct jump_label_key *key)
{
if (unlikely(atomic_read(&key->enabled)))
......@@ -85,6 +99,11 @@ static inline void jump_label_dec(struct jump_label_key *key)
atomic_dec(&key->enabled);
}
static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
{
jump_label_dec(&key->key);
}
static inline int jump_label_text_reserved(void *start, void *end)
{
return 0;
......@@ -102,6 +121,14 @@ static inline int jump_label_apply_nops(struct module *mod)
{
return 0;
}
static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
unsigned long rl)
{
}
#endif /* HAVE_JUMP_LABEL */
#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
#endif /* _LINUX_JUMP_LABEL_H */
......@@ -54,6 +54,7 @@ enum perf_hw_id {
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
PERF_COUNT_HW_MAX, /* non-ABI */
};
......@@ -890,6 +891,7 @@ struct perf_event_context {
int nr_active;
int is_active;
int nr_stat;
int nr_freq;
int rotate_disable;
atomic_t refcount;
struct task_struct *task;
......@@ -1063,12 +1065,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
}
}
extern struct jump_label_key perf_sched_events;
extern struct jump_label_key_deferred perf_sched_events;
static inline void perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task)
{
if (static_branch(&perf_sched_events))
if (static_branch(&perf_sched_events.key))
__perf_event_task_sched_in(prev, task);
}
......@@ -1077,7 +1079,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
if (static_branch(&perf_sched_events))
if (static_branch(&perf_sched_events.key))
__perf_event_task_sched_out(prev, next);
}
......
......@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_core.o = -pg
endif
obj-y := core.o ring_buffer.o
obj-y := core.o ring_buffer.o callchain.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
/*
* Performance events callchain code, extracted from core.c:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
#include <linux/slab.h>
#include "internal.h"
struct callchain_cpus_entries {
struct rcu_head rcu_head;
struct perf_callchain_entry *cpu_entries[0];
};
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
static struct callchain_cpus_entries *callchain_cpus_entries;
__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
__weak void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
static void release_callchain_buffers_rcu(struct rcu_head *head)
{
struct callchain_cpus_entries *entries;
int cpu;
entries = container_of(head, struct callchain_cpus_entries, rcu_head);
for_each_possible_cpu(cpu)
kfree(entries->cpu_entries[cpu]);
kfree(entries);
}
static void release_callchain_buffers(void)
{
struct callchain_cpus_entries *entries;
entries = callchain_cpus_entries;
rcu_assign_pointer(callchain_cpus_entries, NULL);
call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
}
static int alloc_callchain_buffers(void)
{
int cpu;
int size;
struct callchain_cpus_entries *entries;
/*
* We can't use the percpu allocation API for data that can be
* accessed from NMI. Use a temporary manual per cpu allocation
* until that gets sorted out.
*/
size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
entries = kzalloc(size, GFP_KERNEL);
if (!entries)
return -ENOMEM;
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
for_each_possible_cpu(cpu) {
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
cpu_to_node(cpu));
if (!entries->cpu_entries[cpu])
goto fail;
}
rcu_assign_pointer(callchain_cpus_entries, entries);
return 0;
fail:
for_each_possible_cpu(cpu)
kfree(entries->cpu_entries[cpu]);
kfree(entries);
return -ENOMEM;
}
int get_callchain_buffers(void)
{
int err = 0;
int count;
mutex_lock(&callchain_mutex);
count = atomic_inc_return(&nr_callchain_events);
if (WARN_ON_ONCE(count < 1)) {
err = -EINVAL;
goto exit;
}
if (count > 1) {
/* If the allocation failed, give up */
if (!callchain_cpus_entries)
err = -ENOMEM;
goto exit;
}
err = alloc_callchain_buffers();
if (err)
release_callchain_buffers();
exit:
mutex_unlock(&callchain_mutex);
return err;
}
void put_callchain_buffers(void)
{
if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
release_callchain_buffers();
mutex_unlock(&callchain_mutex);
}
}
static struct perf_callchain_entry *get_callchain_entry(int *rctx)
{
int cpu;
struct callchain_cpus_entries *entries;
*rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
if (*rctx == -1)
return NULL;
entries = rcu_dereference(callchain_cpus_entries);
if (!entries)
return NULL;
cpu = smp_processor_id();
return &entries->cpu_entries[cpu][*rctx];
}
static void
put_callchain_entry(int rctx)
{
put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
}
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
int rctx;
struct perf_callchain_entry *entry;
entry = get_callchain_entry(&rctx);
if (rctx == -1)
return NULL;
if (!entry)
goto exit_put;
entry->nr = 0;
if (!user_mode(regs)) {
perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
perf_callchain_kernel(entry, regs);
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}
if (regs) {
perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_user(entry, regs);
}
exit_put:
put_callchain_entry(rctx);
return entry;
}
This diff is collapsed.
#ifndef _KERNEL_EVENTS_INTERNAL_H
#define _KERNEL_EVENTS_INTERNAL_H
#include <linux/hardirq.h>
/* Buffer handling */
#define RING_BUFFER_WRITABLE 0x01
struct ring_buffer {
......@@ -67,7 +71,7 @@ static inline int page_order(struct ring_buffer *rb)
}
#endif
static unsigned long perf_data_size(struct ring_buffer *rb)
static inline unsigned long perf_data_size(struct ring_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
}
......@@ -96,4 +100,37 @@ __output_copy(struct perf_output_handle *handle,
} while (len);
}
/* Callchain handling */
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
static inline int get_recursion_context(int *recursion)
{
int rctx;
if (in_nmi())
rctx = 3;
else if (in_irq())
rctx = 2;
else if (in_softirq())
rctx = 1;
else
rctx = 0;
if (recursion[rctx])
return -1;
recursion[rctx]++;
barrier();
return rctx;
}
static inline void put_recursion_context(int *recursion, int rctx)
{
barrier();
recursion[rctx]--;
}
#endif /* _KERNEL_EVENTS_INTERNAL_H */
......@@ -72,15 +72,46 @@ void jump_label_inc(struct jump_label_key *key)
jump_label_unlock();
}
void jump_label_dec(struct jump_label_key *key)
static void __jump_label_dec(struct jump_label_key *key,
unsigned long rate_limit, struct delayed_work *work)
{
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
return;
jump_label_update(key, JUMP_LABEL_DISABLE);
if (rate_limit) {
atomic_inc(&key->enabled);
schedule_delayed_work(work, rate_limit);
} else
jump_label_update(key, JUMP_LABEL_DISABLE);
jump_label_unlock();
}
static void jump_label_update_timeout(struct work_struct *work)
{
struct jump_label_key_deferred *key =
container_of(work, struct jump_label_key_deferred, work.work);
__jump_label_dec(&key->key, 0, NULL);
}
void jump_label_dec(struct jump_label_key *key)
{
__jump_label_dec(key, 0, NULL);
}
void jump_label_dec_deferred(struct jump_label_key_deferred *key)
{
__jump_label_dec(&key->key, key->timeout, &key->work);
}
void jump_label_rate_limit(struct jump_label_key_deferred *key,
unsigned long rl)
{
key->timeout = rl;
INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
}
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
{
if (entry->code <= (unsigned long)end &&
......@@ -111,7 +142,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
* running code can override this to make the non-live update case
* cheaper.
*/
void __weak arch_jump_label_transform_static(struct jump_entry *entry,
void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
arch_jump_label_transform(entry, type);
......@@ -217,8 +248,13 @@ void jump_label_apply_nops(struct module *mod)
if (iter_start == iter_stop)
return;
for (iter = iter_start; iter < iter_stop; iter++)
arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
for (iter = iter_start; iter < iter_stop; iter++) {
struct jump_label_key *iterk;
iterk = (struct jump_label_key *)(unsigned long)iter->key;
arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
}
}
static int jump_label_add_module(struct module *mod)
......@@ -258,8 +294,7 @@ static int jump_label_add_module(struct module *mod)
key->next = jlm;
if (jump_label_enabled(key))
__jump_label_update(key, iter, iter_stop,
JUMP_LABEL_ENABLE);
__jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
}
return 0;
......
......@@ -500,36 +500,32 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
usage[i] = '\0';
}
static int __print_lock_name(struct lock_class *class)
static void __print_lock_name(struct lock_class *class)
{
char str[KSYM_NAME_LEN];
const char *name;
name = class->name;
if (!name)
name = __get_key_name(class->key, str);
return printk("%s", name);
}
static void print_lock_name(struct lock_class *class)
{
char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
const char *name;
get_usage_chars(class, usage);
name = class->name;
if (!name) {
name = __get_key_name(class->key, str);
printk(" (%s", name);
printk("%s", name);
} else {
printk(" (%s", name);
printk("%s", name);
if (class->name_version > 1)
printk("#%d", class->name_version);
if (class->subclass)
printk("/%d", class->subclass);
}
}
static void print_lock_name(struct lock_class *class)
{
char usage[LOCK_USAGE_CHARS];
get_usage_chars(class, usage);
printk(" (");
__print_lock_name(class);
printk("){%s}", usage);
}
......
......@@ -338,7 +338,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
TRACE_ITER_IRQ_INFO;
static int trace_stop_count;
static DEFINE_RAW_SPINLOCK(tracing_start_lock);
......@@ -426,6 +427,7 @@ static const char *trace_options[] = {
"record-cmd",
"overwrite",
"disable_on_free",
"irq-info",
NULL
};
......@@ -1843,6 +1845,33 @@ static void s_stop(struct seq_file *m, void *p)
trace_event_read_unlock();
}
static void
get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
{
unsigned long count;
int cpu;
*total = 0;
*entries = 0;
for_each_tracing_cpu(cpu) {
count = ring_buffer_entries_cpu(tr->buffer, cpu);
/*
* If this buffer has skipped entries, then we hold all
* entries for the trace and we need to ignore the
* ones before the time stamp.
*/
if (tr->data[cpu]->skipped_entries) {
count -= tr->data[cpu]->skipped_entries;
/* total is the same as the entries */
*total += count;
} else
*total += count +
ring_buffer_overrun_cpu(tr->buffer, cpu);
*entries += count;
}
}
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
......@@ -1855,12 +1884,35 @@ static void print_lat_help_header(struct seq_file *m)
seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_func_help_header(struct seq_file *m)
static void print_event_info(struct trace_array *tr, struct seq_file *m)
{
unsigned long total;
unsigned long entries;
get_total_entries(tr, &total, &entries);
seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
entries, total, num_online_cpus());
seq_puts(m, "#\n");
}
static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
{
seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
print_event_info(tr, m);
seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
seq_puts(m, "# | | | | |\n");
}
static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
{
print_event_info(tr, m);
seq_puts(m, "# _-----=> irqs-off\n");
seq_puts(m, "# / _----=> need-resched\n");
seq_puts(m, "# | / _---=> hardirq/softirq\n");
seq_puts(m, "# || / _--=> preempt-depth\n");
seq_puts(m, "# ||| / delay\n");
seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
seq_puts(m, "# | | | |||| | |\n");
}
void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
......@@ -1869,32 +1921,14 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
struct trace_array *tr = iter->tr;
struct trace_array_cpu *data = tr->data[tr->cpu];
struct tracer *type = current_trace;
unsigned long entries = 0;
unsigned long total = 0;
unsigned long count;
unsigned long entries;
unsigned long total;
const char *name = "preemption";
int cpu;
if (type)
name = type->name;
for_each_tracing_cpu(cpu) {
count = ring_buffer_entries_cpu(tr->buffer, cpu);
/*
* If this buffer has skipped entries, then we hold all
* entries for the trace and we need to ignore the
* ones before the time stamp.
*/
if (tr->data[cpu]->skipped_entries) {
count -= tr->data[cpu]->skipped_entries;
/* total is the same as the entries */
total += count;
} else
total += count +
ring_buffer_overrun_cpu(tr->buffer, cpu);
entries += count;
}
get_total_entries(tr, &total, &entries);
seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
name, UTS_RELEASE);
......@@ -2140,6 +2174,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
return print_trace_fmt(iter);
}
void trace_latency_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return;
if (iter->iter_flags & TRACE_FILE_LAT_FMT)
print_trace_header(m, iter);
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_lat_help_header(m);
}
void trace_default_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
......@@ -2155,8 +2204,12 @@ void trace_default_header(struct seq_file *m)
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_lat_help_header(m);
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_func_help_header(m);
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
print_func_help_header_irq(iter->tr, m);
else
print_func_help_header(iter->tr, m);
}
}
}
......
......@@ -370,6 +370,7 @@ void trace_graph_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
......@@ -654,6 +655,7 @@ enum trace_iterator_flags {
TRACE_ITER_RECORD_CMD = 0x100000,
TRACE_ITER_OVERWRITE = 0x200000,
TRACE_ITER_STOP_ON_FREE = 0x400000,
TRACE_ITER_IRQ_INFO = 0x800000,
};
/*
......
......@@ -27,6 +27,12 @@
#include "trace.h"
#include "trace_output.h"
#define DEFAULT_SYS_FILTER_MESSAGE \
"### global filter ###\n" \
"# Use this to set filters for multiple events.\n" \
"# Only events with the given fields will be affected.\n" \
"# If no events are modified, an error message will be displayed here"
enum filter_op_ids
{
OP_OR,
......@@ -646,7 +652,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
mutex_unlock(&event_mutex);
}
......@@ -1838,7 +1844,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
if (!filter)
goto out;
replace_filter_string(filter, filter_string);
/* System filters just show a default message */
kfree(filter->filter_string);
filter->filter_string = NULL;
/*
* No event actually uses the system filter
* we can free it without synchronize_sched().
......@@ -1848,14 +1857,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err) {
append_filter_err(ps, system->filter);
goto out;
}
if (err)
goto err_filter;
err = replace_system_preds(system, ps, filter_string);
if (err)
append_filter_err(ps, system->filter);
goto err_filter;
out:
filter_opstack_clear(ps);
......@@ -1865,6 +1872,11 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
mutex_unlock(&event_mutex);
return err;
err_filter:
replace_filter_string(filter, filter_string);
append_filter_err(ps, system->filter);
goto out;
}
#ifdef CONFIG_PERF_EVENTS
......
......@@ -280,9 +280,20 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
static void irqsoff_print_header(struct seq_file *s) { }
static void irqsoff_trace_open(struct trace_iterator *iter) { }
static void irqsoff_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
static void irqsoff_print_header(struct seq_file *s)
{
trace_default_header(s);
}
#else
static void irqsoff_print_header(struct seq_file *s)
{
trace_latency_header(s);
}
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
......
......@@ -627,11 +627,23 @@ int trace_print_context(struct trace_iterator *iter)
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned long secs = (unsigned long)t;
char comm[TASK_COMM_LEN];
int ret;
trace_find_cmdline(entry->pid, comm);
return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
comm, entry->pid, iter->cpu, secs, usec_rem);
ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
comm, entry->pid, iter->cpu);
if (!ret)
return 0;
if (trace_flags & TRACE_ITER_IRQ_INFO) {
ret = trace_print_lat_fmt(s, entry);
if (!ret)
return 0;
}
return trace_seq_printf(s, " %5lu.%06lu: ",
secs, usec_rem);
}
int trace_print_lat_context(struct trace_iterator *iter)
......
......@@ -280,9 +280,20 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_print_header(struct seq_file *s) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
static void wakeup_print_header(struct seq_file *s)
{
trace_default_header(s);
}
#else
static void wakeup_print_header(struct seq_file *s)
{
trace_latency_header(s);
}
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
......
......@@ -22,7 +22,7 @@ OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
Input file name. (default: perf.data unless stdin is a fifo)
-d::
--dsos=<dso[,dso...]>::
......@@ -66,7 +66,7 @@ OPTIONS
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles through the lines with more samples.
-c::
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
......
......@@ -26,7 +26,7 @@ OPTIONS
Show only DSOs with hits.
-i::
--input=::
Input file name. (default: perf.data)
Input file name. (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't do ownership validation.
......
......@@ -18,7 +18,7 @@ OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
Input file name. (default: perf.data unless stdin is a fifo)
SEE ALSO
--------
......
......@@ -23,7 +23,7 @@ OPTIONS
-------
-i <file>::
--input=<file>::
Select the input file (default: perf.data)
Select the input file (default: perf.data unless stdin is a fifo)
--caller::
Show per-callsite statistics
......
......@@ -29,7 +29,7 @@ COMMON OPTIONS
-i::
--input=<file>::
Input file name.
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
......
......@@ -89,7 +89,7 @@ OPTIONS
-m::
--mmap-pages=::
Number of mmap data pages.
Number of mmap data pages. Must be a power of two.
-g::
--call-graph::
......
......@@ -19,7 +19,7 @@ OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
......@@ -39,7 +39,7 @@ OPTIONS
-T::
--threads::
Show per-thread event counters
-C::
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
......@@ -80,9 +80,10 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
-g [type,min,order]::
-g [type,min[,limit],order]::
--call-graph::
Display call chains using type, min percent threshold and order.
Display call chains using type, min percent threshold, optional print
limit and order.
type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates.
......@@ -128,7 +129,7 @@ OPTIONS
--symfs=<directory>::
Look for files with symbols relative to this directory.
-c::
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
......
......@@ -40,7 +40,7 @@ OPTIONS
-------
-i::
--input=<file>::
Input file name. (default: perf.data)
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
......
......@@ -106,7 +106,7 @@ OPTIONS
-i::
--input=::
Input file name.
Input file name. (default: perf.data unless stdin is a fifo)
-d::
--debug-mode::
......@@ -182,12 +182,17 @@ OPTIONS
--hide-call-graph::
When printing symbols do not display call chain.
-c::
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
-c::
--comms=::
Only display events for these comms. CSV that understands
file://filename entries.
-I::
--show-info::
Display extended information about the perf.data file. This adds
......
......@@ -8,13 +8,19 @@ perf-test - Runs sanity tests.
SYNOPSIS
--------
[verse]
'perf test <options>'
'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
DESCRIPTION
-----------
This command does assorted sanity tests, initially through linked routines but
also will look for a directory with more tests in the form of scripts.
To get a list of available tests use 'perf test list', specifying a test name
fragment will show all tests that have it.
To run just specific tests, inform test name fragments or the numbers obtained
from 'perf test list'.
OPTIONS
-------
-v::
......
......@@ -27,7 +27,7 @@ OPTIONS
Select the output file (default: output.svg)
-i::
--input=::
Select the input file (default: perf.data)
Select the input file (default: perf.data unless stdin is a fifo)
-w::
--width=::
Select the width of the SVG file (default: 1000)
......
......@@ -278,6 +278,7 @@ LIB_H += util/strbuf.h
LIB_H += util/strlist.h
LIB_H += util/strfilter.h
LIB_H += util/svghelper.h
LIB_H += util/tool.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
LIB_H += util/symbol.h
......
......@@ -27,32 +27,32 @@
#include "util/sort.h"
#include "util/hist.h"
#include "util/session.h"
#include "util/tool.h"
#include <linux/bitmap.h>
static char const *input_name = "perf.data";
static bool force, use_tui, use_stdio;
static bool full_paths;
static bool print_line;
static const char *sym_hist_filter;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct perf_annotate {
struct perf_tool tool;
char const *input_name;
bool force, use_tui, use_stdio;
bool full_paths;
bool print_line;
const char *sym_hist_filter;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static int perf_evlist__add_sample(struct perf_evlist *evlist,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al)
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
struct perf_annotate *ann)
{
struct hist_entry *he;
int ret;
if (sym_hist_filter != NULL &&
(al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
if (ann->sym_hist_filter != NULL &&
(al->sym == NULL ||
strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
/* We're only interested in a symbol named sym_hist_filter */
if (al->sym != NULL) {
rb_erase(&al->sym->rb_node,
......@@ -69,8 +69,7 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
ret = 0;
if (he->ms.sym != NULL) {
struct annotation *notes = symbol__annotation(he->ms.sym);
if (notes->src == NULL &&
symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
......@@ -81,25 +80,26 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
return ret;
}
static int process_sample_event(union perf_event *event,
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct perf_session *session)
struct machine *machine)
{
struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
struct addr_location al;
if (perf_event__preprocess_sample(event, session, &al, sample,
if (perf_event__preprocess_sample(event, machine, &al, sample,
symbol__annotate_init) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
return 0;
if (!al.filtered &&
perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
return -1;
......@@ -108,14 +108,15 @@ static int process_sample_event(union perf_event *event,
return 0;
}
static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
static int hist_entry__tty_annotate(struct hist_entry *he, int evidx,
struct perf_annotate *ann)
{
return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
print_line, full_paths, 0, 0);
ann->print_line, ann->full_paths, 0, 0);
}
static void hists__find_annotations(struct hists *self, int evidx,
int nr_events)
struct perf_annotate *ann)
{
struct rb_node *nd = rb_first(&self->entries), *next;
int key = K_RIGHT;
......@@ -138,8 +139,7 @@ static void hists__find_annotations(struct hists *self, int evidx,
}
if (use_browser > 0) {
key = hist_entry__tui_annotate(he, evidx, nr_events,
NULL, NULL, 0);
key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0);
switch (key) {
case K_RIGHT:
next = rb_next(nd);
......@@ -154,7 +154,7 @@ static void hists__find_annotations(struct hists *self, int evidx,
if (next != NULL)
nd = next;
} else {
hist_entry__tty_annotate(he, evidx);
hist_entry__tty_annotate(he, evidx, ann);
nd = rb_next(nd);
/*
* Since we have a hist_entry per IP for the same
......@@ -167,33 +167,26 @@ static void hists__find_annotations(struct hists *self, int evidx,
}
}
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
.fork = perf_event__process_task,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
static int __cmd_annotate(void)
static int __cmd_annotate(struct perf_annotate *ann)
{
int ret;
struct perf_session *session;
struct perf_evsel *pos;
u64 total_nr_samples;
session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
session = perf_session__new(ann->input_name, O_RDONLY,
ann->force, false, &ann->tool);
if (session == NULL)
return -ENOMEM;
if (cpu_list) {
ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
if (ann->cpu_list) {
ret = perf_session__cpu_bitmap(session, ann->cpu_list,
ann->cpu_bitmap);
if (ret)
goto out_delete;
}
ret = perf_session__process_events(session, &event_ops);
ret = perf_session__process_events(session, &ann->tool);
if (ret)
goto out_delete;
......@@ -217,13 +210,12 @@ static int __cmd_annotate(void)
total_nr_samples += nr_samples;
hists__collapse_resort(hists);
hists__output_resort(hists);
hists__find_annotations(hists, pos->idx,
session->evlist->nr_entries);
hists__find_annotations(hists, pos->idx, ann);
}
}
if (total_nr_samples == 0) {
ui__warning("The %s file has no samples!\n", input_name);
ui__warning("The %s file has no samples!\n", session->filename);
goto out_delete;
}
out_delete:
......@@ -247,29 +239,41 @@ static const char * const annotate_usage[] = {
NULL
};
static const struct option options[] = {
OPT_STRING('i', "input", &input_name, "file",
int cmd_annotate(int argc, const char **argv, const char *prefix __used)
{
struct perf_annotate annotate = {
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
.fork = perf_event__process_task,
.ordered_samples = true,
.ordering_requires_timestamps = true,
},
};
const struct option options[] = {
OPT_STRING('i', "input", &annotate.input_name, "file",
"input file name"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
"symbol to annotate"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('f', "force", &annotate.force, "don't complain, do it"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('l', "print-line", &print_line,
OPT_BOOLEAN('l', "print-line", &annotate.print_line,
"print matching source lines (may be slow)"),
OPT_BOOLEAN('P', "full-paths", &full_paths,
OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
"Don't shorten the displayed pathnames"),
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
......@@ -279,15 +283,13 @@ static const struct option options[] = {
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_END()
};
};
int cmd_annotate(int argc, const char **argv, const char *prefix __used)
{
argc = parse_options(argc, argv, options, annotate_usage, 0);
if (use_stdio)
if (annotate.use_stdio)
use_browser = 0;
else if (use_tui)
else if (annotate.use_tui)
use_browser = 1;
setup_browser(true);
......@@ -308,7 +310,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
if (argc > 1)
usage_with_options(annotate_usage, options);
sym_hist_filter = argv[0];
annotate.sym_hist_filter = argv[0];
}
if (field_sep && *field_sep == '.') {
......@@ -316,5 +318,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
return -1;
}
return __cmd_annotate();
return __cmd_annotate(&annotate);
}
......@@ -18,7 +18,7 @@
#include <libelf.h>
static char const *input_name = "perf.data";
static const char *input_name;
static bool force;
static bool show_kernel;
static bool with_hits;
......@@ -39,24 +39,6 @@ static const struct option options[] = {
OPT_END()
};
static int perf_session__list_build_ids(void)
{
struct perf_session *session;
session = perf_session__new(input_name, O_RDONLY, force, false,
&build_id__mark_dso_hit_ops);
if (session == NULL)
return -1;
if (with_hits)
perf_session__process_events(session, &build_id__mark_dso_hit_ops);
perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
perf_session__delete(session);
return 0;
}
static int sysfs__fprintf_build_id(FILE *fp)
{
u8 kallsyms_build_id[BUILD_ID_SIZE];
......@@ -85,17 +67,36 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
return fprintf(fp, "%s\n", sbuild_id);
}
static int __cmd_buildid_list(void)
static int perf_session__list_build_ids(void)
{
if (show_kernel)
return sysfs__fprintf_build_id(stdout);
struct perf_session *session;
elf_version(EV_CURRENT);
session = perf_session__new(input_name, O_RDONLY, force, false,
&build_id__mark_dso_hit_ops);
if (session == NULL)
return -1;
/*
* See if this is an ELF file first:
*/
if (filename__fprintf_build_id(input_name, stdout))
return 0;
* See if this is an ELF file first:
*/
if (filename__fprintf_build_id(session->filename, stdout))
goto out;
if (with_hits)
perf_session__process_events(session, &build_id__mark_dso_hit_ops);
perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
out:
perf_session__delete(session);
return 0;
}
static int __cmd_buildid_list(void)
{
if (show_kernel)
return sysfs__fprintf_build_id(stdout);
return perf_session__list_build_ids();
}
......
......@@ -9,7 +9,9 @@
#include "util/debug.h"
#include "util/event.h"
#include "util/hist.h"
#include "util/evsel.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/sort.h"
#include "util/symbol.h"
#include "util/util.h"
......@@ -30,14 +32,15 @@ static int hists__add_entry(struct hists *self,
return -ENOMEM;
}
static int diff__process_sample_event(union perf_event *event,
static int diff__process_sample_event(struct perf_tool *tool __used,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
struct perf_session *session)
struct machine *machine)
{
struct addr_location al;
if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
......@@ -46,16 +49,16 @@ static int diff__process_sample_event(union perf_event *event,
if (al.filtered || al.sym == NULL)
return 0;
if (hists__add_entry(&session->hists, &al, sample->period)) {
if (hists__add_entry(&evsel->hists, &al, sample->period)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
session->hists.stats.total_period += sample->period;
evsel->hists.stats.total_period += sample->period;
return 0;
}
static struct perf_event_ops event_ops = {
static struct perf_tool perf_diff = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
......@@ -145,13 +148,13 @@ static int __cmd_diff(void)
int ret, i;
struct perf_session *session[2];
session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
session[0] = perf_session__new(input_old, O_RDONLY, force, false, &perf_diff);
session[1] = perf_session__new(input_new, O_RDONLY, force, false, &perf_diff);
if (session[0] == NULL || session[1] == NULL)
return -ENOMEM;
for (i = 0; i < 2; ++i) {
ret = perf_session__process_events(session[i], &event_ops);
ret = perf_session__process_events(session[i], &perf_diff);
if (ret)
goto out_delete;
}
......
......@@ -15,7 +15,7 @@
#include "util/parse-options.h"
#include "util/session.h"
static char const *input_name = "perf.data";
static const char *input_name;
static int __cmd_evlist(void)
{
......
......@@ -9,6 +9,7 @@
#include "perf.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/debug.h"
#include "util/parse-options.h"
......@@ -16,8 +17,9 @@
static char const *input_name = "-";
static bool inject_build_ids;
static int perf_event__repipe_synth(union perf_event *event,
struct perf_session *session __used)
static int perf_event__repipe_synth(struct perf_tool *tool __used,
union perf_event *event,
struct machine *machine __used)
{
uint32_t size;
void *buf = event;
......@@ -36,41 +38,70 @@ static int perf_event__repipe_synth(union perf_event *event,
return 0;
}
static int perf_event__repipe(union perf_event *event,
static int perf_event__repipe_op2_synth(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session __used)
{
return perf_event__repipe_synth(tool, event, NULL);
}
static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
union perf_event *event)
{
return perf_event__repipe_synth(tool, event, NULL);
}
static int perf_event__repipe_tracing_data_synth(union perf_event *event,
struct perf_session *session __used)
{
return perf_event__repipe_synth(NULL, event, NULL);
}
static int perf_event__repipe_attr(union perf_event *event,
struct perf_evlist **pevlist __used)
{
return perf_event__repipe_synth(NULL, event, NULL);
}
static int perf_event__repipe(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __used,
struct perf_session *session)
struct machine *machine)
{
return perf_event__repipe_synth(event, session);
return perf_event__repipe_synth(tool, event, machine);
}
static int perf_event__repipe_sample(union perf_event *event,
static int perf_event__repipe_sample(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __used,
struct perf_evsel *evsel __used,
struct perf_session *session)
struct machine *machine)
{
return perf_event__repipe_synth(event, session);
return perf_event__repipe_synth(tool, event, machine);
}
static int perf_event__repipe_mmap(union perf_event *event,
static int perf_event__repipe_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_session *session)
struct machine *machine)
{
int err;
err = perf_event__process_mmap(event, sample, session);
perf_event__repipe(event, sample, session);
err = perf_event__process_mmap(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
static int perf_event__repipe_task(union perf_event *event,
static int perf_event__repipe_task(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_session *session)
struct machine *machine)
{
int err;
err = perf_event__process_task(event, sample, session);
perf_event__repipe(event, sample, session);
err = perf_event__process_task(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
......@@ -80,7 +111,7 @@ static int perf_event__repipe_tracing_data(union perf_event *event,
{
int err;
perf_event__repipe_synth(event, session);
perf_event__repipe_synth(NULL, event, NULL);
err = perf_event__process_tracing_data(event, session);
return err;
......@@ -100,10 +131,10 @@ static int dso__read_build_id(struct dso *self)
return -1;
}
static int dso__inject_build_id(struct dso *self, struct perf_session *session)
static int dso__inject_build_id(struct dso *self, struct perf_tool *tool,
struct machine *machine)
{
u16 misc = PERF_RECORD_MISC_USER;
struct machine *machine;
int err;
if (dso__read_build_id(self) < 0) {
......@@ -111,17 +142,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return -1;
}
machine = perf_session__find_host_machine(session);
if (machine == NULL) {
pr_err("Can't find machine for session\n");
return -1;
}
if (self->kernel)
misc = PERF_RECORD_MISC_KERNEL;
err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
machine, session);
err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe,
machine);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", self->long_name);
return -1;
......@@ -130,10 +155,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return 0;
}
static int perf_event__inject_buildid(union perf_event *event,
static int perf_event__inject_buildid(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
struct perf_session *session)
struct machine *machine)
{
struct addr_location al;
struct thread *thread;
......@@ -141,21 +167,21 @@ static int perf_event__inject_buildid(union perf_event *event,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
thread = perf_session__findnew(session, event->ip.pid);
thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
goto repipe;
}
thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
event->ip.pid, event->ip.ip, &al);
thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
event->ip.ip, &al);
if (al.map != NULL) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
if (map__load(al.map, NULL) >= 0) {
dso__inject_build_id(al.map->dso, session);
dso__inject_build_id(al.map->dso, tool, machine);
/*
* If this fails, too bad, let the other side
* account this as unresolved.
......@@ -168,24 +194,24 @@ static int perf_event__inject_buildid(union perf_event *event,
}
repipe:
perf_event__repipe(event, sample, session);
perf_event__repipe(tool, event, sample, machine);
return 0;
}
struct perf_event_ops inject_ops = {
struct perf_tool perf_inject = {
.sample = perf_event__repipe_sample,
.mmap = perf_event__repipe,
.comm = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
.read = perf_event__repipe,
.read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
.attr = perf_event__repipe_synth,
.event_type = perf_event__repipe_synth,
.tracing_data = perf_event__repipe_synth,
.build_id = perf_event__repipe_synth,
.attr = perf_event__repipe_attr,
.event_type = perf_event__repipe_event_type_synth,
.tracing_data = perf_event__repipe_tracing_data_synth,
.build_id = perf_event__repipe_op2_synth,
};
extern volatile int session_done;
......@@ -203,17 +229,17 @@ static int __cmd_inject(void)
signal(SIGINT, sig_handler);
if (inject_build_ids) {
inject_ops.sample = perf_event__inject_buildid;
inject_ops.mmap = perf_event__repipe_mmap;
inject_ops.fork = perf_event__repipe_task;
inject_ops.tracing_data = perf_event__repipe_tracing_data;
perf_inject.sample = perf_event__inject_buildid;
perf_inject.mmap = perf_event__repipe_mmap;
perf_inject.fork = perf_event__repipe_task;
perf_inject.tracing_data = perf_event__repipe_tracing_data;
}
session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject);
if (session == NULL)
return -ENOMEM;
ret = perf_session__process_events(session, &inject_ops);
ret = perf_session__process_events(session, &perf_inject);
perf_session__delete(session);
......
......@@ -7,6 +7,7 @@
#include "util/thread.h"
#include "util/header.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
......@@ -18,7 +19,7 @@
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
static char const *input_name = "perf.data";
static const char *input_name;
static int alloc_flag;
static int caller_flag;
......@@ -303,12 +304,13 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,
}
}
static int process_sample_event(union perf_event *event,
static int process_sample_event(struct perf_tool *tool __used,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
struct perf_session *session)
struct machine *machine)
{
struct thread *thread = perf_session__findnew(session, event->ip.pid);
struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
......@@ -324,7 +326,7 @@ static int process_sample_event(union perf_event *event,
return 0;
}
static struct perf_event_ops event_ops = {
static struct perf_tool perf_kmem = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
......@@ -483,7 +485,7 @@ static int __cmd_kmem(void)
{
int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
0, false, &event_ops);
0, false, &perf_kmem);
if (session == NULL)
return -ENOMEM;
......@@ -494,7 +496,7 @@ static int __cmd_kmem(void)
goto out_delete;
setup_pager();
err = perf_session__process_events(session, &event_ops);
err = perf_session__process_events(session, &perf_kmem);
if (err != 0)
goto out_delete;
sort_result();
......
......@@ -38,7 +38,7 @@ static const struct option kvm_options[] = {
OPT_BOOLEAN(0, "guest", &perf_guest,
"Collect guest os data"),
OPT_BOOLEAN(0, "host", &perf_host,
"Collect guest os data"),
"Collect host os data"),
OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
"guest mount directory under which every guest os"
" instance has a subdir"),
......
......@@ -12,6 +12,7 @@
#include "util/debug.h"
#include "util/session.h"
#include "util/tool.h"
#include <sys/types.h>
#include <sys/prctl.h>
......@@ -325,7 +326,7 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
die("memory allocation failed\n");
}
static char const *input_name = "perf.data";
static const char *input_name;
struct raw_event_sample {
u32 size;
......@@ -845,12 +846,13 @@ static void dump_info(void)
die("Unknown type of information\n");
}
static int process_sample_event(union perf_event *event,
static int process_sample_event(struct perf_tool *tool __used,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
struct perf_session *s)
struct machine *machine)
{
struct thread *thread = perf_session__findnew(s, sample->tid);
struct thread *thread = machine__findnew_thread(machine, sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
......@@ -863,7 +865,7 @@ static int process_sample_event(union perf_event *event,
return 0;
}
static struct perf_event_ops eops = {
static struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
......
......@@ -46,7 +46,6 @@
#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
#define DEFAULT_FUNC_FILTER "!_*"
#define MAX_PATH_LEN 256
/* Session management structure */
static struct {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -72,7 +72,7 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym, int nevents);
int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
......@@ -99,8 +99,7 @@ static inline int symbol__tui_annotate(struct symbol *sym __used,
}
#else
int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
int nr_events, void(*timer)(void *arg), void *arg,
int delay_secs);
void(*timer)(void *arg), void *arg, int delay_secs);
#endif
extern const char *disassembler_style;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment