Commit f82c37e7 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (35 commits)
  perf: Fix unexported generic perf_arch_fetch_caller_regs
  perf record: Don't try to find buildids in a zero sized file
  perf: export perf_trace_regs and perf_arch_fetch_caller_regs
  perf, x86: Fix hw_perf_enable() event assignment
  perf, ppc: Fix compile error due to new cpu notifiers
  perf: Make the install relative to DESTDIR if specified
  kprobes: Calculate the index correctly when freeing the out-of-line execution slot
  perf tools: Fix sparse CPU numbering related bugs
  perf_event: Fix oops triggered by cpu offline/online
  perf: Drop the obsolete profile naming for trace events
  perf: Take a hot regs snapshot for trace events
  perf: Introduce new perf_fetch_caller_regs() for hot regs snapshot
  perf/x86-64: Use frame pointer to walk on irq and process stacks
  lockdep: Move lock events under lockdep recursion protection
  perf report: Print the map table just after samples for which no map was found
  perf report: Add multiple event support
  perf session: Change perf_session post processing functions to take histogram tree
  perf session: Add storage for seperating event types in report
  perf session: Change add_hist_entry to take the tree root instead of session
  perf record: Add ID and to recorded event data when recording multiple events
  ...
parents c6b9e73f dcd5c166
...@@ -1287,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs) ...@@ -1287,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit(); irq_exit();
} }
void hw_perf_event_setup(int cpu) static void power_pmu_setup(int cpu)
{ {
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
...@@ -1297,6 +1297,23 @@ void hw_perf_event_setup(int cpu) ...@@ -1297,6 +1297,23 @@ void hw_perf_event_setup(int cpu)
cpuhw->mmcr[0] = MMCR0_FC; cpuhw->mmcr[0] = MMCR0_FC;
} }
static int __cpuinit
power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
power_pmu_setup(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
int register_power_pmu(struct power_pmu *pmu) int register_power_pmu(struct power_pmu *pmu)
{ {
if (ppmu) if (ppmu)
...@@ -1314,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu) ...@@ -1314,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu)
freeze_events_kernel = MMCR0_FCHV; freeze_events_kernel = MMCR0_FCHV;
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */
perf_cpu_notifier(power_pmu_notifier);
return 0; return 0;
} }
...@@ -275,13 +275,30 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -275,13 +275,30 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
return &pmu; return &pmu;
} }
void hw_perf_event_setup(int cpu) static void sh_pmu_setup(int cpu)
{ {
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
memset(cpuhw, 0, sizeof(struct cpu_hw_events)); memset(cpuhw, 0, sizeof(struct cpu_hw_events));
} }
static int __cpuinit
sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
sh_pmu_setup(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
void hw_perf_enable(void) void hw_perf_enable(void)
{ {
if (!sh_pmu_initialized()) if (!sh_pmu_initialized())
...@@ -308,5 +325,6 @@ int register_sh_pmu(struct sh_pmu *pmu) ...@@ -308,5 +325,6 @@ int register_sh_pmu(struct sh_pmu *pmu)
WARN_ON(pmu->num_events > MAX_HWEVENTS); WARN_ON(pmu->num_events > MAX_HWEVENTS);
perf_cpu_notifier(sh_pmu_notifier);
return 0; return 0;
} }
This diff is collapsed.
...@@ -271,28 +271,6 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) ...@@ -271,28 +271,6 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
return &emptyconstraint; return &emptyconstraint;
} }
static __initconst struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_events = 4,
.event_bits = 48,
.event_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints
};
static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
{ {
struct amd_nb *nb; struct amd_nb *nb;
...@@ -309,7 +287,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) ...@@ -309,7 +287,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
* initialize all possible NB constraints * initialize all possible NB constraints
*/ */
for (i = 0; i < x86_pmu.num_events; i++) { for (i = 0; i < x86_pmu.num_events; i++) {
set_bit(i, nb->event_constraints[i].idxmsk); __set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1; nb->event_constraints[i].weight = 1;
} }
return nb; return nb;
...@@ -378,6 +356,31 @@ static void amd_pmu_cpu_offline(int cpu) ...@@ -378,6 +356,31 @@ static void amd_pmu_cpu_offline(int cpu)
raw_spin_unlock(&amd_nb_lock); raw_spin_unlock(&amd_nb_lock);
} }
static __initconst struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_events = 4,
.event_bits = 48,
.event_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,
.cpu_prepare = amd_pmu_cpu_online,
.cpu_dead = amd_pmu_cpu_offline,
};
static __init int amd_pmu_init(void) static __init int amd_pmu_init(void)
{ {
/* Performance-monitoring supported from K7 and later: */ /* Performance-monitoring supported from K7 and later: */
...@@ -390,11 +393,6 @@ static __init int amd_pmu_init(void) ...@@ -390,11 +393,6 @@ static __init int amd_pmu_init(void)
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
sizeof(hw_cache_event_ids)); sizeof(hw_cache_event_ids));
/*
* explicitly initialize the boot cpu, other cpus will get
* the cpu hotplug callbacks from smp_init()
*/
amd_pmu_cpu_online(smp_processor_id());
return 0; return 0;
} }
...@@ -405,12 +403,4 @@ static int amd_pmu_init(void) ...@@ -405,12 +403,4 @@ static int amd_pmu_init(void)
return 0; return 0;
} }
static void amd_pmu_cpu_online(int cpu)
{
}
static void amd_pmu_cpu_offline(int cpu)
{
}
#endif #endif
...@@ -548,9 +548,9 @@ static inline void intel_pmu_ack_status(u64 ack) ...@@ -548,9 +548,9 @@ static inline void intel_pmu_ack_status(u64 ack)
} }
static inline void static inline void
intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) intel_pmu_disable_fixed(struct hw_perf_event *hwc)
{ {
int idx = __idx - X86_PMC_IDX_FIXED; int idx = hwc->idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask; u64 ctrl_val, mask;
mask = 0xfULL << (idx * 4); mask = 0xfULL << (idx * 4);
...@@ -621,26 +621,28 @@ static void intel_pmu_drain_bts_buffer(void) ...@@ -621,26 +621,28 @@ static void intel_pmu_drain_bts_buffer(void)
} }
static inline void static inline void
intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) intel_pmu_disable_event(struct perf_event *event)
{ {
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { struct hw_perf_event *hwc = &event->hw;
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts(); intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer(); intel_pmu_drain_bts_buffer();
return; return;
} }
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc, idx); intel_pmu_disable_fixed(hwc);
return; return;
} }
x86_pmu_disable_event(hwc, idx); x86_pmu_disable_event(event);
} }
static inline void static inline void
intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{ {
int idx = __idx - X86_PMC_IDX_FIXED; int idx = hwc->idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask; u64 ctrl_val, bits, mask;
int err; int err;
...@@ -670,9 +672,11 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) ...@@ -670,9 +672,11 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
err = checking_wrmsrl(hwc->config_base, ctrl_val); err = checking_wrmsrl(hwc->config_base, ctrl_val);
} }
static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) static void intel_pmu_enable_event(struct perf_event *event)
{ {
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { struct hw_perf_event *hwc = &event->hw;
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
if (!__get_cpu_var(cpu_hw_events).enabled) if (!__get_cpu_var(cpu_hw_events).enabled)
return; return;
...@@ -681,11 +685,11 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) ...@@ -681,11 +685,11 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
} }
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc, idx); intel_pmu_enable_fixed(hwc);
return; return;
} }
__x86_pmu_enable_event(hwc, idx); __x86_pmu_enable_event(hwc);
} }
/* /*
...@@ -694,14 +698,8 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) ...@@ -694,14 +698,8 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
*/ */
static int intel_pmu_save_and_restart(struct perf_event *event) static int intel_pmu_save_and_restart(struct perf_event *event)
{ {
struct hw_perf_event *hwc = &event->hw; x86_perf_event_update(event);
int idx = hwc->idx; return x86_perf_event_set_period(event);
int ret;
x86_perf_event_update(event, hwc, idx);
ret = x86_perf_event_set_period(event, hwc, idx);
return ret;
} }
static void intel_pmu_reset(void) static void intel_pmu_reset(void)
...@@ -745,11 +743,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -745,11 +743,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events); cpuc = &__get_cpu_var(cpu_hw_events);
perf_disable(); intel_pmu_disable_all();
intel_pmu_drain_bts_buffer(); intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status(); status = intel_pmu_get_status();
if (!status) { if (!status) {
perf_enable(); intel_pmu_enable_all();
return 0; return 0;
} }
...@@ -759,8 +757,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -759,8 +757,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
WARN_ONCE(1, "perfevents: irq loop stuck!\n"); WARN_ONCE(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug(); perf_event_print_debug();
intel_pmu_reset(); intel_pmu_reset();
perf_enable(); goto done;
return 1;
} }
inc_irq_stat(apic_perf_irqs); inc_irq_stat(apic_perf_irqs);
...@@ -768,7 +765,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -768,7 +765,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit]; struct perf_event *event = cpuc->events[bit];
clear_bit(bit, (unsigned long *) &status);
if (!test_bit(bit, cpuc->active_mask)) if (!test_bit(bit, cpuc->active_mask))
continue; continue;
...@@ -778,7 +774,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -778,7 +774,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
data.period = event->hw.last_period; data.period = event->hw.last_period;
if (perf_event_overflow(event, 1, &data, regs)) if (perf_event_overflow(event, 1, &data, regs))
intel_pmu_disable_event(&event->hw, bit); x86_pmu_stop(event);
} }
intel_pmu_ack_status(ack); intel_pmu_ack_status(ack);
...@@ -790,8 +786,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -790,8 +786,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
if (status) if (status)
goto again; goto again;
perf_enable(); done:
intel_pmu_enable_all();
return 1; return 1;
} }
...@@ -870,7 +866,10 @@ static __initconst struct x86_pmu intel_pmu = { ...@@ -870,7 +866,10 @@ static __initconst struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1, .max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts, .enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts, .disable_bts = intel_pmu_disable_bts,
.get_event_constraints = intel_get_event_constraints .get_event_constraints = intel_get_event_constraints,
.cpu_starting = init_debug_store_on_cpu,
.cpu_dying = fini_debug_store_on_cpu,
}; };
static __init int intel_pmu_init(void) static __init int intel_pmu_init(void)
......
...@@ -77,27 +77,29 @@ static void p6_pmu_enable_all(void) ...@@ -77,27 +77,29 @@ static void p6_pmu_enable_all(void)
} }
static inline void static inline void
p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) p6_pmu_disable_event(struct perf_event *event)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val = P6_NOP_EVENT; u64 val = P6_NOP_EVENT;
if (cpuc->enabled) if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE; val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)checking_wrmsrl(hwc->config_base + idx, val); (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
} }
static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) static void p6_pmu_enable_event(struct perf_event *event)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val; u64 val;
val = hwc->config; val = hwc->config;
if (cpuc->enabled) if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE; val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)checking_wrmsrl(hwc->config_base + idx, val); (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
} }
static __initconst struct x86_pmu p6_pmu = { static __initconst struct x86_pmu p6_pmu = {
......
...@@ -29,4 +29,19 @@ struct stack_frame { ...@@ -29,4 +29,19 @@ struct stack_frame {
struct stack_frame *next_frame; struct stack_frame *next_frame;
unsigned long return_address; unsigned long return_address;
}; };
static inline unsigned long rewind_frame_pointer(int n)
{
struct stack_frame *frame;
get_bp(frame);
#ifdef CONFIG_FRAME_POINTER
while (n--)
frame = frame->next_frame;
#endif #endif
return (unsigned long)frame;
}
#endif /* DUMPSTACK_H */
...@@ -208,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -208,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
if (ops->stack(data, "IRQ") < 0) if (ops->stack(data, "IRQ") < 0)
break; break;
bp = print_context_stack(tinfo, stack, bp, bp = ops->walk_stack(tinfo, stack, bp,
ops, data, irq_stack_end, &graph); ops, data, irq_stack_end, &graph);
/* /*
* We link to the next stack (which would be * We link to the next stack (which would be
...@@ -229,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ...@@ -229,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
/* /*
* This handles the process stack: * This handles the process stack:
*/ */
bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
put_cpu(); put_cpu();
} }
EXPORT_SYMBOL(dump_trace); EXPORT_SYMBOL(dump_trace);
......
...@@ -131,12 +131,12 @@ struct ftrace_event_call { ...@@ -131,12 +131,12 @@ struct ftrace_event_call {
void *mod; void *mod;
void *data; void *data;
int profile_count; int perf_refcount;
int (*profile_enable)(struct ftrace_event_call *); int (*perf_event_enable)(struct ftrace_event_call *);
void (*profile_disable)(struct ftrace_event_call *); void (*perf_event_disable)(struct ftrace_event_call *);
}; };
#define FTRACE_MAX_PROFILE_SIZE 2048 #define PERF_MAX_TRACE_SIZE 2048
#define MAX_FILTER_PRED 32 #define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
...@@ -187,22 +187,25 @@ do { \ ...@@ -187,22 +187,25 @@ do { \
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
struct perf_event; struct perf_event;
extern int ftrace_profile_enable(int event_id);
extern void ftrace_profile_disable(int event_id); DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
extern int perf_trace_enable(int event_id);
extern void perf_trace_disable(int event_id);
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str); char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event); extern void ftrace_profile_free_filter(struct perf_event *event);
extern void * extern void *
ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, perf_trace_buf_prepare(int size, unsigned short type, int *rctxp,
unsigned long *irq_flags); unsigned long *irq_flags);
static inline void static inline void
ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
u64 count, unsigned long irq_flags) u64 count, unsigned long irq_flags, struct pt_regs *regs)
{ {
struct trace_entry *entry = raw_data; struct trace_entry *entry = raw_data;
perf_tp_event(entry->type, addr, count, raw_data, size); perf_tp_event(entry->type, addr, count, raw_data, size, regs);
perf_swevent_put_recursion_context(rctx); perf_swevent_put_recursion_context(rctx);
local_irq_restore(irq_flags); local_irq_restore(irq_flags);
} }
......
...@@ -452,6 +452,8 @@ enum perf_callchain_context { ...@@ -452,6 +452,8 @@ enum perf_callchain_context {
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#define PERF_MAX_STACK_DEPTH 255 #define PERF_MAX_STACK_DEPTH 255
...@@ -847,6 +849,44 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) ...@@ -847,6 +849,44 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
__perf_sw_event(event_id, nr, nmi, regs, addr); __perf_sw_event(event_id, nr, nmi, regs, addr);
} }
extern void
perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
/*
* Take a snapshot of the regs. Skip ip and frame pointer to
* the nth caller. We only need a few of the regs:
* - ip for PERF_SAMPLE_IP
* - cs for user_mode() tests
* - bp for callchains
* - eflags, for future purposes, just in case
*/
static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
{
unsigned long ip;
memset(regs, 0, sizeof(*regs));
switch (skip) {
case 1 :
ip = CALLER_ADDR0;
break;
case 2 :
ip = CALLER_ADDR1;
break;
case 3 :
ip = CALLER_ADDR2;
break;
case 4:
ip = CALLER_ADDR3;
break;
/* No need to support further for now */
default:
ip = 0;
}
return perf_arch_fetch_caller_regs(regs, ip, skip);
}
extern void __perf_event_mmap(struct vm_area_struct *vma); extern void __perf_event_mmap(struct vm_area_struct *vma);
static inline void perf_event_mmap(struct vm_area_struct *vma) static inline void perf_event_mmap(struct vm_area_struct *vma)
...@@ -880,7 +920,8 @@ static inline bool perf_paranoid_kernel(void) ...@@ -880,7 +920,8 @@ static inline bool perf_paranoid_kernel(void)
} }
extern void perf_event_init(void); extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size, struct pt_regs *regs);
extern void perf_bp_event(struct perf_event *event, void *data); extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags #ifndef perf_misc_flags
...@@ -936,5 +977,21 @@ static inline void perf_event_disable(struct perf_event *event) { } ...@@ -936,5 +977,21 @@ static inline void perf_event_disable(struct perf_event *event) { }
#define perf_output_put(handle, x) \ #define perf_output_put(handle, x) \
perf_output_copy((handle), &(x), sizeof(x)) perf_output_copy((handle), &(x), sizeof(x))
/*
* This has to have a higher priority than migration_notifier in sched.c.
*/
#define perf_cpu_notifier(fn) \
do { \
static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = 20 }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_ONLINE, \
(void *)(unsigned long)smp_processor_id()); \
register_cpu_notifier(&fn##_nb); \
} while (0)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_PERF_EVENT_H */ #endif /* _LINUX_PERF_EVENT_H */
...@@ -105,18 +105,18 @@ struct perf_event_attr; ...@@ -105,18 +105,18 @@ struct perf_event_attr;
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ #define TRACE_SYS_ENTER_PERF_INIT(sname) \
.profile_enable = prof_sysenter_enable, \ .perf_event_enable = perf_sysenter_enable, \
.profile_disable = prof_sysenter_disable, .perf_event_disable = perf_sysenter_disable,
#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ #define TRACE_SYS_EXIT_PERF_INIT(sname) \
.profile_enable = prof_sysexit_enable, \ .perf_event_enable = perf_sysexit_enable, \
.profile_disable = prof_sysexit_disable, .perf_event_disable = perf_sysexit_disable,
#else #else
#define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PERF(sname)
#define TRACE_SYS_ENTER_PROFILE_INIT(sname) #define TRACE_SYS_ENTER_PERF_INIT(sname)
#define TRACE_SYS_EXIT_PROFILE(sname) #define TRACE_SYS_EXIT_PERF(sname)
#define TRACE_SYS_EXIT_PROFILE_INIT(sname) #define TRACE_SYS_EXIT_PERF_INIT(sname)
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
#ifdef CONFIG_FTRACE_SYSCALLS #ifdef CONFIG_FTRACE_SYSCALLS
...@@ -153,7 +153,7 @@ struct perf_event_attr; ...@@ -153,7 +153,7 @@ struct perf_event_attr;
.regfunc = reg_event_syscall_enter, \ .regfunc = reg_event_syscall_enter, \
.unregfunc = unreg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \
.data = (void *)&__syscall_meta_##sname,\ .data = (void *)&__syscall_meta_##sname,\
TRACE_SYS_ENTER_PROFILE_INIT(sname) \ TRACE_SYS_ENTER_PERF_INIT(sname) \
} }
#define SYSCALL_TRACE_EXIT_EVENT(sname) \ #define SYSCALL_TRACE_EXIT_EVENT(sname) \
...@@ -175,7 +175,7 @@ struct perf_event_attr; ...@@ -175,7 +175,7 @@ struct perf_event_attr;
.regfunc = reg_event_syscall_exit, \ .regfunc = reg_event_syscall_exit, \
.unregfunc = unreg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \
.data = (void *)&__syscall_meta_##sname,\ .data = (void *)&__syscall_meta_##sname,\
TRACE_SYS_EXIT_PROFILE_INIT(sname) \ TRACE_SYS_EXIT_PERF_INIT(sname) \
} }
#define SYSCALL_METADATA(sname, nb) \ #define SYSCALL_METADATA(sname, nb) \
......
...@@ -401,18 +401,18 @@ static inline notrace int ftrace_get_offsets_##call( \ ...@@ -401,18 +401,18 @@ static inline notrace int ftrace_get_offsets_##call( \
#undef DEFINE_EVENT #undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args) \ #define DEFINE_EVENT(template, name, proto, args) \
\ \
static void ftrace_profile_##name(proto); \ static void perf_trace_##name(proto); \
\ \
static notrace int \ static notrace int \
ftrace_profile_enable_##name(struct ftrace_event_call *unused) \ perf_trace_enable_##name(struct ftrace_event_call *unused) \
{ \ { \
return register_trace_##name(ftrace_profile_##name); \ return register_trace_##name(perf_trace_##name); \
} \ } \
\ \
static notrace void \ static notrace void \
ftrace_profile_disable_##name(struct ftrace_event_call *unused) \ perf_trace_disable_##name(struct ftrace_event_call *unused) \
{ \ { \
unregister_trace_##name(ftrace_profile_##name); \ unregister_trace_##name(perf_trace_##name); \
} }
#undef DEFINE_EVENT_PRINT #undef DEFINE_EVENT_PRINT
...@@ -507,12 +507,12 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused) \ ...@@ -507,12 +507,12 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused) \
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
#define _TRACE_PROFILE_INIT(call) \ #define _TRACE_PERF_INIT(call) \
.profile_enable = ftrace_profile_enable_##call, \ .perf_event_enable = perf_trace_enable_##call, \
.profile_disable = ftrace_profile_disable_##call, .perf_event_disable = perf_trace_disable_##call,
#else #else
#define _TRACE_PROFILE_INIT(call) #define _TRACE_PERF_INIT(call)
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
#undef __entry #undef __entry
...@@ -638,7 +638,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -638,7 +638,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.unregfunc = ftrace_raw_unreg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \
.print_fmt = print_fmt_##template, \ .print_fmt = print_fmt_##template, \
.define_fields = ftrace_define_fields_##template, \ .define_fields = ftrace_define_fields_##template, \
_TRACE_PROFILE_INIT(call) \ _TRACE_PERF_INIT(call) \
} }
#undef DEFINE_EVENT_PRINT #undef DEFINE_EVENT_PRINT
...@@ -657,18 +657,18 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -657,18 +657,18 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.unregfunc = ftrace_raw_unreg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \
.print_fmt = print_fmt_##call, \ .print_fmt = print_fmt_##call, \
.define_fields = ftrace_define_fields_##template, \ .define_fields = ftrace_define_fields_##template, \
_TRACE_PROFILE_INIT(call) \ _TRACE_PERF_INIT(call) \
} }
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/* /*
* Define the insertion callback to profile events * Define the insertion callback to perf events
* *
* The job is very similar to ftrace_raw_event_<call> except that we don't * The job is very similar to ftrace_raw_event_<call> except that we don't
* insert in the ring buffer but in a perf counter. * insert in the ring buffer but in a perf counter.
* *
* static void ftrace_profile_<call>(proto) * static void ftrace_perf_<call>(proto)
* { * {
* struct ftrace_data_offsets_<call> __maybe_unused __data_offsets; * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
* struct ftrace_event_call *event_call = &event_<call>; * struct ftrace_event_call *event_call = &event_<call>;
...@@ -757,13 +757,14 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -757,13 +757,14 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
#undef DECLARE_EVENT_CLASS #undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace void \ static notrace void \
ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
proto) \ proto) \
{ \ { \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_raw_##call *entry; \ struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \ u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \ unsigned long irq_flags; \
struct pt_regs *__regs; \
int __entry_size; \ int __entry_size; \
int __data_size; \ int __data_size; \
int rctx; \ int rctx; \
...@@ -773,10 +774,10 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ ...@@ -773,10 +774,10 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
sizeof(u64)); \ sizeof(u64)); \
__entry_size -= sizeof(u32); \ __entry_size -= sizeof(u32); \
\ \
if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \
"profile buffer not large enough")) \ "profile buffer not large enough")) \
return; \ return; \
entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \ entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \
__entry_size, event_call->id, &rctx, &irq_flags); \ __entry_size, event_call->id, &rctx, &irq_flags); \
if (!entry) \ if (!entry) \
return; \ return; \
...@@ -784,17 +785,20 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ ...@@ -784,17 +785,20 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
\ \
{ assign; } \ { assign; } \
\ \
ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \ __regs = &__get_cpu_var(perf_trace_regs); \
__count, irq_flags); \ perf_fetch_caller_regs(__regs, 2); \
\
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, irq_flags, __regs); \
} }
#undef DEFINE_EVENT #undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \ #define DEFINE_EVENT(template, call, proto, args) \
static notrace void ftrace_profile_##call(proto) \ static notrace void perf_trace_##call(proto) \
{ \ { \
struct ftrace_event_call *event_call = &event_##call; \ struct ftrace_event_call *event_call = &event_##call; \
\ \
ftrace_profile_templ_##template(event_call, args); \ perf_trace_templ_##template(event_call, args); \
} }
#undef DEFINE_EVENT_PRINT #undef DEFINE_EVENT_PRINT
......
...@@ -47,10 +47,10 @@ enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); ...@@ -47,10 +47,10 @@ enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
#endif #endif
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
int prof_sysenter_enable(struct ftrace_event_call *call); int perf_sysenter_enable(struct ftrace_event_call *call);
void prof_sysenter_disable(struct ftrace_event_call *call); void perf_sysenter_disable(struct ftrace_event_call *call);
int prof_sysexit_enable(struct ftrace_event_call *call); int perf_sysexit_enable(struct ftrace_event_call *call);
void prof_sysexit_disable(struct ftrace_event_call *call); void perf_sysexit_disable(struct ftrace_event_call *call);
#endif #endif
#endif /* _TRACE_SYSCALL_H */ #endif /* _TRACE_SYSCALL_H */
...@@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, ...@@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
struct kprobe_insn_page *kip; struct kprobe_insn_page *kip;
list_for_each_entry(kip, &c->pages, list) { list_for_each_entry(kip, &c->pages, list) {
long idx = ((long)slot - (long)kip->insns) / c->insn_size; long idx = ((long)slot - (long)kip->insns) /
(c->insn_size * sizeof(kprobe_opcode_t));
if (idx >= 0 && idx < slots_per_page(c)) { if (idx >= 0 && idx < slots_per_page(c)) {
WARN_ON(kip->slot_used[idx] != SLOT_USED); WARN_ON(kip->slot_used[idx] != SLOT_USED);
if (dirty) { if (dirty) {
......
...@@ -3211,8 +3211,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -3211,8 +3211,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
{ {
unsigned long flags; unsigned long flags;
trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
if (unlikely(current->lockdep_recursion)) if (unlikely(current->lockdep_recursion))
return; return;
...@@ -3220,6 +3218,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -3220,6 +3218,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
__lock_acquire(lock, subclass, trylock, read, check, __lock_acquire(lock, subclass, trylock, read, check,
irqs_disabled_flags(flags), nest_lock, ip, 0); irqs_disabled_flags(flags), nest_lock, ip, 0);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
...@@ -3232,14 +3231,13 @@ void lock_release(struct lockdep_map *lock, int nested, ...@@ -3232,14 +3231,13 @@ void lock_release(struct lockdep_map *lock, int nested,
{ {
unsigned long flags; unsigned long flags;
trace_lock_release(lock, nested, ip);
if (unlikely(current->lockdep_recursion)) if (unlikely(current->lockdep_recursion))
return; return;
raw_local_irq_save(flags); raw_local_irq_save(flags);
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
trace_lock_release(lock, nested, ip);
__lock_release(lock, nested, ip); __lock_release(lock, nested, ip);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
...@@ -3413,8 +3411,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) ...@@ -3413,8 +3411,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{ {
unsigned long flags; unsigned long flags;
trace_lock_contended(lock, ip);
if (unlikely(!lock_stat)) if (unlikely(!lock_stat))
return; return;
...@@ -3424,6 +3420,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) ...@@ -3424,6 +3420,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
raw_local_irq_save(flags); raw_local_irq_save(flags);
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
trace_lock_contended(lock, ip);
__lock_contended(lock, ip); __lock_contended(lock, ip);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
......
...@@ -81,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -81,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_event_setup(int cpu) { barrier(); }
void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
int __weak int __weak
hw_perf_group_sched_in(struct perf_event *group_leader, hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx, struct perf_cpu_context *cpuctx,
...@@ -97,25 +93,15 @@ void __weak perf_event_print_debug(void) { } ...@@ -97,25 +93,15 @@ void __weak perf_event_print_debug(void) { }
static DEFINE_PER_CPU(int, perf_disable_count); static DEFINE_PER_CPU(int, perf_disable_count);
void __perf_disable(void)
{
__get_cpu_var(perf_disable_count)++;
}
bool __perf_enable(void)
{
return !--__get_cpu_var(perf_disable_count);
}
void perf_disable(void) void perf_disable(void)
{ {
__perf_disable(); if (!__get_cpu_var(perf_disable_count)++)
hw_perf_disable(); hw_perf_disable();
} }
void perf_enable(void) void perf_enable(void)
{ {
if (__perf_enable()) if (!--__get_cpu_var(perf_disable_count))
hw_perf_enable(); hw_perf_enable();
} }
...@@ -1538,12 +1524,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) ...@@ -1538,12 +1524,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/ */
if (interrupts == MAX_INTERRUPTS) { if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1); perf_log_throttle(event, 1);
perf_disable();
event->pmu->unthrottle(event); event->pmu->unthrottle(event);
perf_enable();
} }
if (!event->attr.freq || !event->attr.sample_freq) if (!event->attr.freq || !event->attr.sample_freq)
continue; continue;
perf_disable();
event->pmu->read(event); event->pmu->read(event);
now = atomic64_read(&event->count); now = atomic64_read(&event->count);
delta = now - hwc->freq_count_stamp; delta = now - hwc->freq_count_stamp;
...@@ -1551,6 +1540,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) ...@@ -1551,6 +1540,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
if (delta > 0) if (delta > 0)
perf_adjust_period(event, TICK_NSEC, delta); perf_adjust_period(event, TICK_NSEC, delta);
perf_enable();
} }
raw_spin_unlock(&ctx->lock); raw_spin_unlock(&ctx->lock);
} }
...@@ -1560,9 +1550,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) ...@@ -1560,9 +1550,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/ */
static void rotate_ctx(struct perf_event_context *ctx) static void rotate_ctx(struct perf_event_context *ctx)
{ {
if (!ctx->nr_events)
return;
raw_spin_lock(&ctx->lock); raw_spin_lock(&ctx->lock);
/* Rotate the first entry last of non-pinned groups */ /* Rotate the first entry last of non-pinned groups */
...@@ -1575,19 +1562,28 @@ void perf_event_task_tick(struct task_struct *curr) ...@@ -1575,19 +1562,28 @@ void perf_event_task_tick(struct task_struct *curr)
{ {
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx; struct perf_event_context *ctx;
int rotate = 0;
if (!atomic_read(&nr_events)) if (!atomic_read(&nr_events))
return; return;
cpuctx = &__get_cpu_var(perf_cpu_context); cpuctx = &__get_cpu_var(perf_cpu_context);
ctx = curr->perf_event_ctxp; if (cpuctx->ctx.nr_events &&
cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
perf_disable(); ctx = curr->perf_event_ctxp;
if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
rotate = 1;
perf_ctx_adjust_freq(&cpuctx->ctx); perf_ctx_adjust_freq(&cpuctx->ctx);
if (ctx) if (ctx)
perf_ctx_adjust_freq(ctx); perf_ctx_adjust_freq(ctx);
if (!rotate)
return;
perf_disable();
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx) if (ctx)
task_ctx_sched_out(ctx, EVENT_FLEXIBLE); task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
...@@ -1599,7 +1595,6 @@ void perf_event_task_tick(struct task_struct *curr) ...@@ -1599,7 +1595,6 @@ void perf_event_task_tick(struct task_struct *curr)
cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx) if (ctx)
task_ctx_sched_in(curr, EVENT_FLEXIBLE); task_ctx_sched_in(curr, EVENT_FLEXIBLE);
perf_enable(); perf_enable();
} }
...@@ -2791,6 +2786,13 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) ...@@ -2791,6 +2786,13 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return NULL; return NULL;
} }
#ifdef CONFIG_EVENT_TRACING
__weak
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
}
#endif
/* /*
* Output * Output
*/ */
...@@ -4318,9 +4320,8 @@ static const struct pmu perf_ops_task_clock = { ...@@ -4318,9 +4320,8 @@ static const struct pmu perf_ops_task_clock = {
#ifdef CONFIG_EVENT_TRACING #ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record, void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size) int entry_size, struct pt_regs *regs)
{ {
struct pt_regs *regs = get_irq_regs();
struct perf_sample_data data; struct perf_sample_data data;
struct perf_raw_record raw = { struct perf_raw_record raw = {
.size = entry_size, .size = entry_size,
...@@ -4330,12 +4331,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, ...@@ -4330,12 +4331,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
perf_sample_data_init(&data, addr); perf_sample_data_init(&data, addr);
data.raw = &raw; data.raw = &raw;
if (!regs)
regs = task_pt_regs(current);
/* Trace events already protected against recursion */ /* Trace events already protected against recursion */
do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
&data, regs); &data, regs);
} }
EXPORT_SYMBOL_GPL(perf_tp_event); EXPORT_SYMBOL_GPL(perf_tp_event);
...@@ -4351,7 +4349,7 @@ static int perf_tp_event_match(struct perf_event *event, ...@@ -4351,7 +4349,7 @@ static int perf_tp_event_match(struct perf_event *event,
static void tp_perf_event_destroy(struct perf_event *event) static void tp_perf_event_destroy(struct perf_event *event)
{ {
ftrace_profile_disable(event->attr.config); perf_trace_disable(event->attr.config);
} }
static const struct pmu *tp_perf_event_init(struct perf_event *event) static const struct pmu *tp_perf_event_init(struct perf_event *event)
...@@ -4365,7 +4363,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) ...@@ -4365,7 +4363,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
!capable(CAP_SYS_ADMIN)) !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
if (ftrace_profile_enable(event->attr.config)) if (perf_trace_enable(event->attr.config))
return NULL; return NULL;
event->destroy = tp_perf_event_destroy; event->destroy = tp_perf_event_destroy;
...@@ -5372,18 +5370,26 @@ int perf_event_init_task(struct task_struct *child) ...@@ -5372,18 +5370,26 @@ int perf_event_init_task(struct task_struct *child)
return ret; return ret;
} }
static void __init perf_event_init_all_cpus(void)
{
int cpu;
struct perf_cpu_context *cpuctx;
for_each_possible_cpu(cpu) {
cpuctx = &per_cpu(perf_cpu_context, cpu);
__perf_event_init_context(&cpuctx->ctx, NULL);
}
}
static void __cpuinit perf_event_init_cpu(int cpu) static void __cpuinit perf_event_init_cpu(int cpu)
{ {
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
cpuctx = &per_cpu(perf_cpu_context, cpu); cpuctx = &per_cpu(perf_cpu_context, cpu);
__perf_event_init_context(&cpuctx->ctx, NULL);
spin_lock(&perf_resource_lock); spin_lock(&perf_resource_lock);
cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
spin_unlock(&perf_resource_lock); spin_unlock(&perf_resource_lock);
hw_perf_event_setup(cpu);
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
...@@ -5423,20 +5429,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) ...@@ -5423,20 +5429,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_event_init_cpu(cpu); perf_event_init_cpu(cpu);
break; break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
hw_perf_event_setup_online(cpu);
break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN: case CPU_DOWN_PREPARE_FROZEN:
perf_event_exit_cpu(cpu); perf_event_exit_cpu(cpu);
break; break;
case CPU_DEAD:
hw_perf_event_setup_offline(cpu);
break;
default: default:
break; break;
} }
...@@ -5454,6 +5451,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = { ...@@ -5454,6 +5451,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
void __init perf_event_init(void) void __init perf_event_init(void)
{ {
perf_event_init_all_cpus();
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id()); (void *)(long)smp_processor_id());
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
......
...@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o ...@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
ifeq ($(CONFIG_PERF_EVENTS),y) ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
......
/* /*
* trace event based perf counter profiling * trace event based perf event profiling/tracing
* *
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
* * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include "trace.h" #include "trace.h"
DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf; static char *perf_trace_buf;
static char *perf_trace_buf_nmi; static char *perf_trace_buf_nmi;
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
/* Count the events in use (per event id, not per instance) */ /* Count the events in use (per event id, not per instance) */
static int total_profile_count; static int total_ref_count;
static int ftrace_profile_enable_event(struct ftrace_event_call *event) static int perf_trace_event_enable(struct ftrace_event_call *event)
{ {
char *buf; char *buf;
int ret = -ENOMEM; int ret = -ENOMEM;
if (event->profile_count++ > 0) if (event->perf_refcount++ > 0)
return 0; return 0;
if (!total_profile_count) { if (!total_ref_count) {
buf = (char *)alloc_percpu(perf_trace_t); buf = (char *)alloc_percpu(perf_trace_t);
if (!buf) if (!buf)
goto fail_buf; goto fail_buf;
...@@ -40,35 +44,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) ...@@ -40,35 +44,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
rcu_assign_pointer(perf_trace_buf_nmi, buf); rcu_assign_pointer(perf_trace_buf_nmi, buf);
} }
ret = event->profile_enable(event); ret = event->perf_event_enable(event);
if (!ret) { if (!ret) {
total_profile_count++; total_ref_count++;
return 0; return 0;
} }
fail_buf_nmi: fail_buf_nmi:
if (!total_profile_count) { if (!total_ref_count) {
free_percpu(perf_trace_buf_nmi); free_percpu(perf_trace_buf_nmi);
free_percpu(perf_trace_buf); free_percpu(perf_trace_buf);
perf_trace_buf_nmi = NULL; perf_trace_buf_nmi = NULL;
perf_trace_buf = NULL; perf_trace_buf = NULL;
} }
fail_buf: fail_buf:
event->profile_count--; event->perf_refcount--;
return ret; return ret;
} }
int ftrace_profile_enable(int event_id) int perf_trace_enable(int event_id)
{ {
struct ftrace_event_call *event; struct ftrace_event_call *event;
int ret = -EINVAL; int ret = -EINVAL;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) { list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id && event->profile_enable && if (event->id == event_id && event->perf_event_enable &&
try_module_get(event->mod)) { try_module_get(event->mod)) {
ret = ftrace_profile_enable_event(event); ret = perf_trace_event_enable(event);
break; break;
} }
} }
...@@ -77,16 +81,16 @@ int ftrace_profile_enable(int event_id) ...@@ -77,16 +81,16 @@ int ftrace_profile_enable(int event_id)
return ret; return ret;
} }
static void ftrace_profile_disable_event(struct ftrace_event_call *event) static void perf_trace_event_disable(struct ftrace_event_call *event)
{ {
char *buf, *nmi_buf; char *buf, *nmi_buf;
if (--event->profile_count > 0) if (--event->perf_refcount > 0)
return; return;
event->profile_disable(event); event->perf_event_disable(event);
if (!--total_profile_count) { if (!--total_ref_count) {
buf = perf_trace_buf; buf = perf_trace_buf;
rcu_assign_pointer(perf_trace_buf, NULL); rcu_assign_pointer(perf_trace_buf, NULL);
...@@ -104,14 +108,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) ...@@ -104,14 +108,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
} }
} }
void ftrace_profile_disable(int event_id) void perf_trace_disable(int event_id)
{ {
struct ftrace_event_call *event; struct ftrace_event_call *event;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) { list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) { if (event->id == event_id) {
ftrace_profile_disable_event(event); perf_trace_event_disable(event);
module_put(event->mod); module_put(event->mod);
break; break;
} }
...@@ -119,8 +123,8 @@ void ftrace_profile_disable(int event_id) ...@@ -119,8 +123,8 @@ void ftrace_profile_disable(int event_id)
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
} }
__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
int *rctxp, unsigned long *irq_flags) int *rctxp, unsigned long *irq_flags)
{ {
struct trace_entry *entry; struct trace_entry *entry;
char *trace_buf, *raw_data; char *trace_buf, *raw_data;
...@@ -161,4 +165,4 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, ...@@ -161,4 +165,4 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
local_irq_restore(*irq_flags); local_irq_restore(*irq_flags);
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
...@@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, ...@@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
trace_create_file("enable", 0644, call->dir, call, trace_create_file("enable", 0644, call->dir, call,
enable); enable);
if (call->id && call->profile_enable) if (call->id && call->perf_event_enable)
trace_create_file("id", 0444, call->dir, call, trace_create_file("id", 0444, call->dir, call,
id); id);
......
...@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp) ...@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */ /* Kprobe profile handler */
static __kprobes void kprobe_profile_func(struct kprobe *kp, static __kprobes void kprobe_perf_func(struct kprobe *kp,
struct pt_regs *regs) struct pt_regs *regs)
{ {
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
...@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp, ...@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64)); size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough")) "profile buffer not large enough"))
return; return;
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry) if (!entry)
return; return;
...@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp, ...@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++) for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs); entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
} }
/* Kretprobe profile handler */ /* Kretprobe profile handler */
static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
struct pt_regs *regs) struct pt_regs *regs)
{ {
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
...@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, ...@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64)); size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough")) "profile buffer not large enough"))
return; return;
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry) if (!entry)
return; return;
...@@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, ...@@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++) for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs); entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
irq_flags, regs);
} }
static int probe_profile_enable(struct ftrace_event_call *call) static int probe_perf_enable(struct ftrace_event_call *call)
{ {
struct trace_probe *tp = (struct trace_probe *)call->data; struct trace_probe *tp = (struct trace_probe *)call->data;
...@@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call) ...@@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
return enable_kprobe(&tp->rp.kp); return enable_kprobe(&tp->rp.kp);
} }
static void probe_profile_disable(struct ftrace_event_call *call) static void probe_perf_disable(struct ftrace_event_call *call)
{ {
struct trace_probe *tp = (struct trace_probe *)call->data; struct trace_probe *tp = (struct trace_probe *)call->data;
...@@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) ...@@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
kprobe_trace_func(kp, regs); kprobe_trace_func(kp, regs);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE) if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs); kprobe_perf_func(kp, regs);
#endif #endif
return 0; /* We don't tweek kernel, so just return 0 */ return 0; /* We don't tweek kernel, so just return 0 */
} }
...@@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) ...@@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
kretprobe_trace_func(ri, regs); kretprobe_trace_func(ri, regs);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE) if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs); kretprobe_perf_func(ri, regs);
#endif #endif
return 0; /* We don't tweek kernel, so just return 0 */ return 0; /* We don't tweek kernel, so just return 0 */
} }
...@@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp) ...@@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
call->unregfunc = probe_event_disable; call->unregfunc = probe_event_disable;
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable; call->perf_event_enable = probe_perf_enable;
call->profile_disable = probe_profile_disable; call->perf_event_disable = probe_perf_disable;
#endif #endif
call->data = tp; call->data = tp;
ret = trace_add_event_call(call); ret = trace_add_event_call(call);
......
...@@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls); ...@@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_prof_refcount_enter; static int sys_perf_refcount_enter;
static int sys_prof_refcount_exit; static int sys_perf_refcount_exit;
static void prof_syscall_enter(struct pt_regs *regs, long id) static void perf_syscall_enter(struct pt_regs *regs, long id)
{ {
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec; struct syscall_trace_enter *rec;
...@@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
int size; int size;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
return; return;
sys_data = syscall_nr_to_meta(syscall_nr); sys_data = syscall_nr_to_meta(syscall_nr);
...@@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64)); size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough")) "perf buffer not large enough"))
return; return;
rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
sys_data->enter_event->id, &rctx, &flags); sys_data->enter_event->id, &rctx, &flags);
if (!rec) if (!rec)
return; return;
...@@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
rec->nr = syscall_nr; rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args); (unsigned long *)&rec->args);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
} }
int prof_sysenter_enable(struct ftrace_event_call *call) int perf_sysenter_enable(struct ftrace_event_call *call)
{ {
int ret = 0; int ret = 0;
int num; int num;
...@@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call) ...@@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
num = ((struct syscall_metadata *)call->data)->syscall_nr; num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock); mutex_lock(&syscall_trace_lock);
if (!sys_prof_refcount_enter) if (!sys_perf_refcount_enter)
ret = register_trace_sys_enter(prof_syscall_enter); ret = register_trace_sys_enter(perf_syscall_enter);
if (ret) { if (ret) {
pr_info("event trace: Could not activate" pr_info("event trace: Could not activate"
"syscall entry trace point"); "syscall entry trace point");
} else { } else {
set_bit(num, enabled_prof_enter_syscalls); set_bit(num, enabled_perf_enter_syscalls);
sys_prof_refcount_enter++; sys_perf_refcount_enter++;
} }
mutex_unlock(&syscall_trace_lock); mutex_unlock(&syscall_trace_lock);
return ret; return ret;
} }
void prof_sysenter_disable(struct ftrace_event_call *call) void perf_sysenter_disable(struct ftrace_event_call *call)
{ {
int num; int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr; num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock); mutex_lock(&syscall_trace_lock);
sys_prof_refcount_enter--; sys_perf_refcount_enter--;
clear_bit(num, enabled_prof_enter_syscalls); clear_bit(num, enabled_perf_enter_syscalls);
if (!sys_prof_refcount_enter) if (!sys_perf_refcount_enter)
unregister_trace_sys_enter(prof_syscall_enter); unregister_trace_sys_enter(perf_syscall_enter);
mutex_unlock(&syscall_trace_lock); mutex_unlock(&syscall_trace_lock);
} }
static void prof_syscall_exit(struct pt_regs *regs, long ret) static void perf_syscall_exit(struct pt_regs *regs, long ret)
{ {
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec; struct syscall_trace_exit *rec;
...@@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
int size; int size;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
return; return;
sys_data = syscall_nr_to_meta(syscall_nr); sys_data = syscall_nr_to_meta(syscall_nr);
...@@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
* Impossible, but be paranoid with the future * Impossible, but be paranoid with the future
* How to put this check outside runtime? * How to put this check outside runtime?
*/ */
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"exit event has grown above profile buffer size")) "exit event has grown above perf buffer size"))
return; return;
rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
sys_data->exit_event->id, &rctx, &flags); sys_data->exit_event->id, &rctx, &flags);
if (!rec) if (!rec)
return; return;
...@@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
rec->nr = syscall_nr; rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs); rec->ret = syscall_get_return_value(current, regs);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
} }
int prof_sysexit_enable(struct ftrace_event_call *call) int perf_sysexit_enable(struct ftrace_event_call *call)
{ {
int ret = 0; int ret = 0;
int num; int num;
...@@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call) ...@@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
num = ((struct syscall_metadata *)call->data)->syscall_nr; num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock); mutex_lock(&syscall_trace_lock);
if (!sys_prof_refcount_exit) if (!sys_perf_refcount_exit)
ret = register_trace_sys_exit(prof_syscall_exit); ret = register_trace_sys_exit(perf_syscall_exit);
if (ret) { if (ret) {
pr_info("event trace: Could not activate" pr_info("event trace: Could not activate"
"syscall exit trace point"); "syscall exit trace point");
} else { } else {
set_bit(num, enabled_prof_exit_syscalls); set_bit(num, enabled_perf_exit_syscalls);
sys_prof_refcount_exit++; sys_perf_refcount_exit++;
} }
mutex_unlock(&syscall_trace_lock); mutex_unlock(&syscall_trace_lock);
return ret; return ret;
} }
void prof_sysexit_disable(struct ftrace_event_call *call) void perf_sysexit_disable(struct ftrace_event_call *call)
{ {
int num; int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr; num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock); mutex_lock(&syscall_trace_lock);
sys_prof_refcount_exit--; sys_perf_refcount_exit--;
clear_bit(num, enabled_prof_exit_syscalls); clear_bit(num, enabled_perf_exit_syscalls);
if (!sys_prof_refcount_exit) if (!sys_perf_refcount_exit)
unregister_trace_sys_exit(prof_syscall_exit); unregister_trace_sys_exit(perf_syscall_exit);
mutex_unlock(&syscall_trace_lock); mutex_unlock(&syscall_trace_lock);
} }
......
...@@ -24,7 +24,10 @@ DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) ...@@ -24,7 +24,10 @@ DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
# Make the path relative to DESTDIR, not prefix
ifndef DESTDIR
prefix?=$(HOME) prefix?=$(HOME)
endif
bindir?=$(prefix)/bin bindir?=$(prefix)/bin
htmldir?=$(prefix)/share/doc/perf-doc htmldir?=$(prefix)/share/doc/perf-doc
pdfdir?=$(prefix)/share/doc/perf-doc pdfdir?=$(prefix)/share/doc/perf-doc
...@@ -32,7 +35,6 @@ mandir?=$(prefix)/share/man ...@@ -32,7 +35,6 @@ mandir?=$(prefix)/share/man
man1dir=$(mandir)/man1 man1dir=$(mandir)/man1
man5dir=$(mandir)/man5 man5dir=$(mandir)/man5
man7dir=$(mandir)/man7 man7dir=$(mandir)/man7
# DESTDIR=
ASCIIDOC=asciidoc ASCIIDOC=asciidoc
ASCIIDOC_EXTRA = --unsafe ASCIIDOC_EXTRA = --unsafe
......
...@@ -216,7 +216,10 @@ STRIP ?= strip ...@@ -216,7 +216,10 @@ STRIP ?= strip
# runtime figures out where they are based on the path to the executable. # runtime figures out where they are based on the path to the executable.
# This can help installing the suite in a relocatable way. # This can help installing the suite in a relocatable way.
# Make the path relative to DESTDIR, not to prefix
ifndef DESTDIR
prefix = $(HOME) prefix = $(HOME)
endif
bindir_relative = bin bindir_relative = bin
bindir = $(prefix)/$(bindir_relative) bindir = $(prefix)/$(bindir_relative)
mandir = share/man mandir = share/man
...@@ -233,7 +236,6 @@ sysconfdir = $(prefix)/etc ...@@ -233,7 +236,6 @@ sysconfdir = $(prefix)/etc
ETC_PERFCONFIG = etc/perfconfig ETC_PERFCONFIG = etc/perfconfig
endif endif
lib = lib lib = lib
# DESTDIR=
export prefix bindir sharedir sysconfdir export prefix bindir sharedir sysconfdir
...@@ -387,6 +389,7 @@ LIB_H += util/thread.h ...@@ -387,6 +389,7 @@ LIB_H += util/thread.h
LIB_H += util/trace-event.h LIB_H += util/trace-event.h
LIB_H += util/probe-finder.h LIB_H += util/probe-finder.h
LIB_H += util/probe-event.h LIB_H += util/probe-event.h
LIB_H += util/cpumap.h
LIB_OBJS += util/abspath.o LIB_OBJS += util/abspath.o
LIB_OBJS += util/alias.o LIB_OBJS += util/alias.o
...@@ -433,6 +436,7 @@ LIB_OBJS += util/sort.o ...@@ -433,6 +436,7 @@ LIB_OBJS += util/sort.o
LIB_OBJS += util/hist.o LIB_OBJS += util/hist.o
LIB_OBJS += util/probe-event.o LIB_OBJS += util/probe-event.o
LIB_OBJS += util/util.o LIB_OBJS += util/util.o
LIB_OBJS += util/cpumap.o
BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-annotate.o
......
...@@ -116,7 +116,7 @@ static int perf_session__add_hist_entry(struct perf_session *self, ...@@ -116,7 +116,7 @@ static int perf_session__add_hist_entry(struct perf_session *self,
return 0; return 0;
} }
he = __perf_session__add_hist_entry(self, al, NULL, count, &hit); he = __perf_session__add_hist_entry(&self->hists, al, NULL, count, &hit);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -564,8 +564,8 @@ static int __cmd_annotate(void) ...@@ -564,8 +564,8 @@ static int __cmd_annotate(void)
if (verbose > 2) if (verbose > 2)
dsos__fprintf(stdout); dsos__fprintf(stdout);
perf_session__collapse_resort(session); perf_session__collapse_resort(&session->hists);
perf_session__output_resort(session, session->event_total[0]); perf_session__output_resort(&session->hists, session->event_total[0]);
perf_session__find_annotations(session); perf_session__find_annotations(session);
out_delete: out_delete:
perf_session__delete(session); perf_session__delete(session);
......
...@@ -26,7 +26,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, ...@@ -26,7 +26,8 @@ static int perf_session__add_hist_entry(struct perf_session *self,
struct addr_location *al, u64 count) struct addr_location *al, u64 count)
{ {
bool hit; bool hit;
struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL, struct hist_entry *he = __perf_session__add_hist_entry(&self->hists,
al, NULL,
count, &hit); count, &hit);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -114,7 +115,7 @@ static void perf_session__resort_hist_entries(struct perf_session *self) ...@@ -114,7 +115,7 @@ static void perf_session__resort_hist_entries(struct perf_session *self)
static void perf_session__set_hist_entries_positions(struct perf_session *self) static void perf_session__set_hist_entries_positions(struct perf_session *self)
{ {
perf_session__output_resort(self, self->events_stats.total); perf_session__output_resort(&self->hists, self->events_stats.total);
perf_session__resort_hist_entries(self); perf_session__resort_hist_entries(self);
} }
...@@ -166,13 +167,15 @@ static int __cmd_diff(void) ...@@ -166,13 +167,15 @@ static int __cmd_diff(void)
goto out_delete; goto out_delete;
} }
perf_session__output_resort(session[1], session[1]->events_stats.total); perf_session__output_resort(&session[1]->hists,
session[1]->events_stats.total);
if (show_displacement) if (show_displacement)
perf_session__set_hist_entries_positions(session[0]); perf_session__set_hist_entries_positions(session[0]);
perf_session__match_hists(session[0], session[1]); perf_session__match_hists(session[0], session[1]);
perf_session__fprintf_hists(session[1], session[0], perf_session__fprintf_hists(&session[1]->hists, session[0],
show_displacement, stdout); show_displacement, stdout,
session[1]->events_stats.total);
out_delete: out_delete:
for (i = 0; i < 2; ++i) for (i = 0; i < 2; ++i)
perf_session__delete(session[i]); perf_session__delete(session[i]);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "util/debug.h" #include "util/debug.h"
#include "util/session.h" #include "util/session.h"
#include "util/symbol.h" #include "util/symbol.h"
#include "util/cpumap.h"
#include <unistd.h> #include <unistd.h>
#include <sched.h> #include <sched.h>
...@@ -244,6 +245,9 @@ static void create_counter(int counter, int cpu, pid_t pid) ...@@ -244,6 +245,9 @@ static void create_counter(int counter, int cpu, pid_t pid)
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (nr_counters > 1)
attr->sample_type |= PERF_SAMPLE_ID;
if (freq) { if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD; attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1; attr->freq = 1;
...@@ -391,6 +395,9 @@ static int process_buildids(void) ...@@ -391,6 +395,9 @@ static int process_buildids(void)
{ {
u64 size = lseek(output, 0, SEEK_CUR); u64 size = lseek(output, 0, SEEK_CUR);
if (size == 0)
return 0;
session->fd = output; session->fd = output;
return __perf_session__process_events(session, post_processing_offset, return __perf_session__process_events(session, post_processing_offset,
size - post_processing_offset, size - post_processing_offset,
...@@ -418,9 +425,6 @@ static int __cmd_record(int argc, const char **argv) ...@@ -418,9 +425,6 @@ static int __cmd_record(int argc, const char **argv)
char buf; char buf;
page_size = sysconf(_SC_PAGE_SIZE); page_size = sysconf(_SC_PAGE_SIZE);
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
atexit(sig_atexit); atexit(sig_atexit);
signal(SIGCHLD, sig_handler); signal(SIGCHLD, sig_handler);
...@@ -544,8 +548,9 @@ static int __cmd_record(int argc, const char **argv) ...@@ -544,8 +548,9 @@ static int __cmd_record(int argc, const char **argv)
if ((!system_wide && !inherit) || profile_cpu != -1) { if ((!system_wide && !inherit) || profile_cpu != -1) {
open_counters(profile_cpu, target_pid); open_counters(profile_cpu, target_pid);
} else { } else {
nr_cpus = read_cpu_map();
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid); open_counters(cpumap[i], target_pid);
} }
if (file_new) { if (file_new) {
......
...@@ -45,28 +45,71 @@ static char *pretty_printing_style = default_pretty_printing_style; ...@@ -45,28 +45,71 @@ static char *pretty_printing_style = default_pretty_printing_style;
static char callchain_default_opt[] = "fractal,0.5"; static char callchain_default_opt[] = "fractal,0.5";
static struct event_stat_id *get_stats(struct perf_session *self,
u64 event_stream, u32 type, u64 config)
{
struct rb_node **p = &self->stats_by_id.rb_node;
struct rb_node *parent = NULL;
struct event_stat_id *iter, *new;
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct event_stat_id, rb_node);
if (iter->config == config)
return iter;
if (config > iter->config)
p = &(*p)->rb_right;
else
p = &(*p)->rb_left;
}
new = malloc(sizeof(struct event_stat_id));
if (new == NULL)
return NULL;
memset(new, 0, sizeof(struct event_stat_id));
new->event_stream = event_stream;
new->config = config;
new->type = type;
rb_link_node(&new->rb_node, parent, p);
rb_insert_color(&new->rb_node, &self->stats_by_id);
return new;
}
static int perf_session__add_hist_entry(struct perf_session *self, static int perf_session__add_hist_entry(struct perf_session *self,
struct addr_location *al, struct addr_location *al,
struct ip_callchain *chain, u64 count) struct sample_data *data)
{ {
struct symbol **syms = NULL, *parent = NULL; struct symbol **syms = NULL, *parent = NULL;
bool hit; bool hit;
struct hist_entry *he; struct hist_entry *he;
struct event_stat_id *stats;
struct perf_event_attr *attr;
if ((sort__has_parent || symbol_conf.use_callchain) && chain) if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain)
syms = perf_session__resolve_callchain(self, al->thread, syms = perf_session__resolve_callchain(self, al->thread,
chain, &parent); data->callchain, &parent);
he = __perf_session__add_hist_entry(self, al, parent, count, &hit);
attr = perf_header__find_attr(data->id, &self->header);
if (attr)
stats = get_stats(self, data->id, attr->type, attr->config);
else
stats = get_stats(self, data->id, 0, 0);
if (stats == NULL)
return -ENOMEM;
he = __perf_session__add_hist_entry(&stats->hists, al, parent,
data->period, &hit);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
if (hit) if (hit)
he->count += count; he->count += data->period;
if (symbol_conf.use_callchain) { if (symbol_conf.use_callchain) {
if (!hit) if (!hit)
callchain_init(&he->callchain); callchain_init(&he->callchain);
append_chain(&he->callchain, chain, syms); append_chain(&he->callchain, data->callchain, syms);
free(syms); free(syms);
} }
...@@ -86,10 +129,30 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) ...@@ -86,10 +129,30 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
return 0; return 0;
} }
static int add_event_total(struct perf_session *session,
struct sample_data *data,
struct perf_event_attr *attr)
{
struct event_stat_id *stats;
if (attr)
stats = get_stats(session, data->id, attr->type, attr->config);
else
stats = get_stats(session, data->id, 0, 0);
if (!stats)
return -ENOMEM;
stats->stats.total += data->period;
session->events_stats.total += data->period;
return 0;
}
static int process_sample_event(event_t *event, struct perf_session *session) static int process_sample_event(event_t *event, struct perf_session *session)
{ {
struct sample_data data = { .period = 1, }; struct sample_data data = { .period = 1, };
struct addr_location al; struct addr_location al;
struct perf_event_attr *attr;
event__parse_sample(event, session->sample_type, &data); event__parse_sample(event, session->sample_type, &data);
...@@ -123,12 +186,18 @@ static int process_sample_event(event_t *event, struct perf_session *session) ...@@ -123,12 +186,18 @@ static int process_sample_event(event_t *event, struct perf_session *session)
if (al.filtered || (hide_unresolved && al.sym == NULL)) if (al.filtered || (hide_unresolved && al.sym == NULL))
return 0; return 0;
if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) { if (perf_session__add_hist_entry(session, &al, &data)) {
pr_debug("problem incrementing symbol count, skipping event\n"); pr_debug("problem incrementing symbol count, skipping event\n");
return -1; return -1;
} }
session->events_stats.total += data.period; attr = perf_header__find_attr(data.id, &session->header);
if (add_event_total(session, &data, attr)) {
pr_debug("problem adding event count\n");
return -1;
}
return 0; return 0;
} }
...@@ -197,6 +266,7 @@ static int __cmd_report(void) ...@@ -197,6 +266,7 @@ static int __cmd_report(void)
{ {
int ret = -EINVAL; int ret = -EINVAL;
struct perf_session *session; struct perf_session *session;
struct rb_node *next;
session = perf_session__new(input_name, O_RDONLY, force); session = perf_session__new(input_name, O_RDONLY, force);
if (session == NULL) if (session == NULL)
...@@ -224,10 +294,28 @@ static int __cmd_report(void) ...@@ -224,10 +294,28 @@ static int __cmd_report(void)
if (verbose > 2) if (verbose > 2)
dsos__fprintf(stdout); dsos__fprintf(stdout);
perf_session__collapse_resort(session); next = rb_first(&session->stats_by_id);
perf_session__output_resort(session, session->events_stats.total); while (next) {
fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); struct event_stat_id *stats;
perf_session__fprintf_hists(session, NULL, false, stdout);
stats = rb_entry(next, struct event_stat_id, rb_node);
perf_session__collapse_resort(&stats->hists);
perf_session__output_resort(&stats->hists, stats->stats.total);
if (rb_first(&session->stats_by_id) ==
rb_last(&session->stats_by_id))
fprintf(stdout, "# Samples: %Ld\n#\n",
stats->stats.total);
else
fprintf(stdout, "# Samples: %Ld %s\n#\n",
stats->stats.total,
__event_name(stats->type, stats->config));
perf_session__fprintf_hists(&stats->hists, NULL, false, stdout,
stats->stats.total);
fprintf(stdout, "\n\n");
next = rb_next(&stats->rb_node);
}
if (sort_order == default_sort_order && if (sort_order == default_sort_order &&
parent_pattern == default_parent_pattern) parent_pattern == default_parent_pattern)
fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n");
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "util/event.h" #include "util/event.h"
#include "util/debug.h" #include "util/debug.h"
#include "util/header.h" #include "util/header.h"
#include "util/cpumap.h"
#include <sys/prctl.h> #include <sys/prctl.h>
#include <math.h> #include <math.h>
...@@ -151,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid) ...@@ -151,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid)
unsigned int cpu; unsigned int cpu;
for (cpu = 0; cpu < nr_cpus; cpu++) { for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0);
if (fd[cpu][counter] < 0 && verbose) if (fd[cpu][counter] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter, fprintf(stderr, ERR_PERF_OPEN, counter,
fd[cpu][counter], strerror(errno)); fd[cpu][counter], strerror(errno));
...@@ -519,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) ...@@ -519,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
nr_counters = ARRAY_SIZE(default_attrs); nr_counters = ARRAY_SIZE(default_attrs);
} }
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (system_wide)
assert(nr_cpus <= MAX_NR_CPUS); nr_cpus = read_cpu_map();
assert((int)nr_cpus >= 0); else
nr_cpus = 1;
/* /*
* We dont want to block the signals - that would cause * We dont want to block the signals - that would cause
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include "util/parse-options.h" #include "util/parse-options.h"
#include "util/parse-events.h" #include "util/parse-events.h"
#include "util/cpumap.h"
#include "util/debug.h" #include "util/debug.h"
...@@ -1123,7 +1124,7 @@ static void start_counter(int i, int counter) ...@@ -1123,7 +1124,7 @@ static void start_counter(int i, int counter)
cpu = profile_cpu; cpu = profile_cpu;
if (target_pid == -1 && profile_cpu == -1) if (target_pid == -1 && profile_cpu == -1)
cpu = i; cpu = cpumap[i];
attr = attrs + counter; attr = attrs + counter;
...@@ -1347,12 +1348,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) ...@@ -1347,12 +1348,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
attrs[counter].sample_period = default_interval; attrs[counter].sample_period = default_interval;
} }
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
if (target_pid != -1 || profile_cpu != -1) if (target_pid != -1 || profile_cpu != -1)
nr_cpus = 1; nr_cpus = 1;
else
nr_cpus = read_cpu_map();
get_term_dimensions(&winsize); get_term_dimensions(&winsize);
if (print_entries == 0) { if (print_entries == 0) {
......
#include "util.h"
#include "../perf.h"
#include "cpumap.h"
#include <assert.h>
#include <stdio.h>
int cpumap[MAX_NR_CPUS];
static int default_cpu_map(void)
{
int nr_cpus, i;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert((int)nr_cpus >= 0);
for (i = 0; i < nr_cpus; ++i)
cpumap[i] = i;
return nr_cpus;
}
int read_cpu_map(void)
{
FILE *onlnf;
int nr_cpus = 0;
int n, cpu, prev;
char sep;
onlnf = fopen("/sys/devices/system/cpu/online", "r");
if (!onlnf)
return default_cpu_map();
sep = 0;
prev = -1;
for (;;) {
n = fscanf(onlnf, "%u%c", &cpu, &sep);
if (n <= 0)
break;
if (prev >= 0) {
assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS);
while (++prev < cpu)
cpumap[nr_cpus++] = prev;
}
assert (nr_cpus < MAX_NR_CPUS);
cpumap[nr_cpus++] = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
prev = -1;
if (n == 1 || sep == '\n')
break;
}
fclose(onlnf);
if (nr_cpus > 0)
return nr_cpus;
return default_cpu_map();
}
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
extern int read_cpu_map(void);
extern int cpumap[];
#endif /* __PERF_CPUMAP_H */
...@@ -99,6 +99,15 @@ struct events_stats { ...@@ -99,6 +99,15 @@ struct events_stats {
u64 lost; u64 lost;
}; };
struct event_stat_id {
struct rb_node rb_node;
struct rb_root hists;
struct events_stats stats;
u64 config;
u64 event_stream;
u32 type;
};
void event__print_totals(void); void event__print_totals(void);
struct perf_session; struct perf_session;
......
...@@ -12,12 +12,12 @@ struct callchain_param callchain_param = { ...@@ -12,12 +12,12 @@ struct callchain_param callchain_param = {
* histogram, sorted on item, collects counts * histogram, sorted on item, collects counts
*/ */
struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists,
struct addr_location *al, struct addr_location *al,
struct symbol *sym_parent, struct symbol *sym_parent,
u64 count, bool *hit) u64 count, bool *hit)
{ {
struct rb_node **p = &self->hists.rb_node; struct rb_node **p = &hists->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct hist_entry *he; struct hist_entry *he;
struct hist_entry entry = { struct hist_entry entry = {
...@@ -53,7 +53,7 @@ struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, ...@@ -53,7 +53,7 @@ struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self,
return NULL; return NULL;
*he = entry; *he = entry;
rb_link_node(&he->rb_node, parent, p); rb_link_node(&he->rb_node, parent, p);
rb_insert_color(&he->rb_node, &self->hists); rb_insert_color(&he->rb_node, hists);
*hit = false; *hit = false;
return he; return he;
} }
...@@ -130,7 +130,7 @@ static void collapse__insert_entry(struct rb_root *root, struct hist_entry *he) ...@@ -130,7 +130,7 @@ static void collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
rb_insert_color(&he->rb_node, root); rb_insert_color(&he->rb_node, root);
} }
void perf_session__collapse_resort(struct perf_session *self) void perf_session__collapse_resort(struct rb_root *hists)
{ {
struct rb_root tmp; struct rb_root tmp;
struct rb_node *next; struct rb_node *next;
...@@ -140,17 +140,17 @@ void perf_session__collapse_resort(struct perf_session *self) ...@@ -140,17 +140,17 @@ void perf_session__collapse_resort(struct perf_session *self)
return; return;
tmp = RB_ROOT; tmp = RB_ROOT;
next = rb_first(&self->hists); next = rb_first(hists);
while (next) { while (next) {
n = rb_entry(next, struct hist_entry, rb_node); n = rb_entry(next, struct hist_entry, rb_node);
next = rb_next(&n->rb_node); next = rb_next(&n->rb_node);
rb_erase(&n->rb_node, &self->hists); rb_erase(&n->rb_node, hists);
collapse__insert_entry(&tmp, n); collapse__insert_entry(&tmp, n);
} }
self->hists = tmp; *hists = tmp;
} }
/* /*
...@@ -183,7 +183,7 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root, ...@@ -183,7 +183,7 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root,
rb_insert_color(&he->rb_node, root); rb_insert_color(&he->rb_node, root);
} }
void perf_session__output_resort(struct perf_session *self, u64 total_samples) void perf_session__output_resort(struct rb_root *hists, u64 total_samples)
{ {
struct rb_root tmp; struct rb_root tmp;
struct rb_node *next; struct rb_node *next;
...@@ -194,18 +194,18 @@ void perf_session__output_resort(struct perf_session *self, u64 total_samples) ...@@ -194,18 +194,18 @@ void perf_session__output_resort(struct perf_session *self, u64 total_samples)
total_samples * (callchain_param.min_percent / 100); total_samples * (callchain_param.min_percent / 100);
tmp = RB_ROOT; tmp = RB_ROOT;
next = rb_first(&self->hists); next = rb_first(hists);
while (next) { while (next) {
n = rb_entry(next, struct hist_entry, rb_node); n = rb_entry(next, struct hist_entry, rb_node);
next = rb_next(&n->rb_node); next = rb_next(&n->rb_node);
rb_erase(&n->rb_node, &self->hists); rb_erase(&n->rb_node, hists);
perf_session__insert_output_hist_entry(&tmp, n, perf_session__insert_output_hist_entry(&tmp, n,
min_callchain_hits); min_callchain_hits);
} }
self->hists = tmp; *hists = tmp;
} }
static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
...@@ -456,10 +456,10 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, ...@@ -456,10 +456,10 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
} }
static size_t hist_entry__fprintf(struct hist_entry *self, static size_t hist_entry__fprintf(struct hist_entry *self,
struct perf_session *session,
struct perf_session *pair_session, struct perf_session *pair_session,
bool show_displacement, bool show_displacement,
long displacement, FILE *fp) long displacement, FILE *fp,
u64 session_total)
{ {
struct sort_entry *se; struct sort_entry *se;
u64 count, total; u64 count, total;
...@@ -474,7 +474,7 @@ static size_t hist_entry__fprintf(struct hist_entry *self, ...@@ -474,7 +474,7 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
total = pair_session->events_stats.total; total = pair_session->events_stats.total;
} else { } else {
count = self->count; count = self->count;
total = session->events_stats.total; total = session_total;
} }
if (total) if (total)
...@@ -496,8 +496,8 @@ static size_t hist_entry__fprintf(struct hist_entry *self, ...@@ -496,8 +496,8 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
if (total > 0) if (total > 0)
old_percent = (count * 100.0) / total; old_percent = (count * 100.0) / total;
if (session->events_stats.total > 0) if (session_total > 0)
new_percent = (self->count * 100.0) / session->events_stats.total; new_percent = (self->count * 100.0) / session_total;
diff = new_percent - old_percent; diff = new_percent - old_percent;
...@@ -544,16 +544,17 @@ static size_t hist_entry__fprintf(struct hist_entry *self, ...@@ -544,16 +544,17 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
left_margin -= thread__comm_len(self->thread); left_margin -= thread__comm_len(self->thread);
} }
hist_entry_callchain__fprintf(fp, self, session->events_stats.total, hist_entry_callchain__fprintf(fp, self, session_total,
left_margin); left_margin);
} }
return ret; return ret;
} }
size_t perf_session__fprintf_hists(struct perf_session *self, size_t perf_session__fprintf_hists(struct rb_root *hists,
struct perf_session *pair, struct perf_session *pair,
bool show_displacement, FILE *fp) bool show_displacement, FILE *fp,
u64 session_total)
{ {
struct sort_entry *se; struct sort_entry *se;
struct rb_node *nd; struct rb_node *nd;
...@@ -641,7 +642,7 @@ size_t perf_session__fprintf_hists(struct perf_session *self, ...@@ -641,7 +642,7 @@ size_t perf_session__fprintf_hists(struct perf_session *self,
fprintf(fp, "\n#\n"); fprintf(fp, "\n#\n");
print_entries: print_entries:
for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) { for (nd = rb_first(hists); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
if (show_displacement) { if (show_displacement) {
...@@ -652,8 +653,13 @@ size_t perf_session__fprintf_hists(struct perf_session *self, ...@@ -652,8 +653,13 @@ size_t perf_session__fprintf_hists(struct perf_session *self,
displacement = 0; displacement = 0;
++position; ++position;
} }
ret += hist_entry__fprintf(h, self, pair, show_displacement, ret += hist_entry__fprintf(h, pair, show_displacement,
displacement, fp); displacement, fp, session_total);
if (h->map == NULL && verbose > 1) {
__map_groups__fprintf_maps(&h->thread->mg,
MAP__FUNCTION, fp);
fprintf(fp, "%.10s end\n", graph_dotted_line);
}
} }
free(rem_sq_bracket); free(rem_sq_bracket);
......
...@@ -10,8 +10,9 @@ struct perf_session; ...@@ -10,8 +10,9 @@ struct perf_session;
struct hist_entry; struct hist_entry;
struct addr_location; struct addr_location;
struct symbol; struct symbol;
struct rb_root;
struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists,
struct addr_location *al, struct addr_location *al,
struct symbol *parent, struct symbol *parent,
u64 count, bool *hit); u64 count, bool *hit);
...@@ -19,9 +20,10 @@ extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); ...@@ -19,9 +20,10 @@ extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
void hist_entry__free(struct hist_entry *); void hist_entry__free(struct hist_entry *);
void perf_session__output_resort(struct perf_session *self, u64 total_samples); void perf_session__output_resort(struct rb_root *hists, u64 total_samples);
void perf_session__collapse_resort(struct perf_session *self); void perf_session__collapse_resort(struct rb_root *hists);
size_t perf_session__fprintf_hists(struct perf_session *self, size_t perf_session__fprintf_hists(struct rb_root *hists,
struct perf_session *pair, struct perf_session *pair,
bool show_displacement, FILE *fp); bool show_displacement, FILE *fp,
u64 session_total);
#endif /* __PERF_HIST_H */ #endif /* __PERF_HIST_H */
...@@ -169,7 +169,7 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) ...@@ -169,7 +169,7 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
{ {
Dwarf_Files *files; Dwarf_Files *files;
size_t nfiles, i; size_t nfiles, i;
const char *src; const char *src = NULL;
int ret; int ret;
if (!fname) if (!fname)
......
...@@ -70,6 +70,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc ...@@ -70,6 +70,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
memcpy(self->filename, filename, len); memcpy(self->filename, filename, len);
self->threads = RB_ROOT; self->threads = RB_ROOT;
self->stats_by_id = RB_ROOT;
self->last_match = NULL; self->last_match = NULL;
self->mmap_window = 32; self->mmap_window = 32;
self->cwd = NULL; self->cwd = NULL;
......
...@@ -20,6 +20,7 @@ struct perf_session { ...@@ -20,6 +20,7 @@ struct perf_session {
struct thread *last_match; struct thread *last_match;
struct map *vmlinux_maps[MAP__NR_TYPES]; struct map *vmlinux_maps[MAP__NR_TYPES];
struct events_stats events_stats; struct events_stats events_stats;
struct rb_root stats_by_id;
unsigned long event_total[PERF_RECORD_MAX]; unsigned long event_total[PERF_RECORD_MAX];
unsigned long unknown_events; unsigned long unknown_events;
struct rb_root hists; struct rb_root hists;
......
...@@ -79,8 +79,8 @@ int thread__comm_len(struct thread *self) ...@@ -79,8 +79,8 @@ int thread__comm_len(struct thread *self)
return self->comm_len; return self->comm_len;
} }
static size_t __map_groups__fprintf_maps(struct map_groups *self, size_t __map_groups__fprintf_maps(struct map_groups *self,
enum map_type type, FILE *fp) enum map_type type, FILE *fp)
{ {
size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
struct rb_node *nd; struct rb_node *nd;
...@@ -89,7 +89,7 @@ static size_t __map_groups__fprintf_maps(struct map_groups *self, ...@@ -89,7 +89,7 @@ static size_t __map_groups__fprintf_maps(struct map_groups *self,
struct map *pos = rb_entry(nd, struct map, rb_node); struct map *pos = rb_entry(nd, struct map, rb_node);
printed += fprintf(fp, "Map:"); printed += fprintf(fp, "Map:");
printed += map__fprintf(pos, fp); printed += map__fprintf(pos, fp);
if (verbose > 1) { if (verbose > 2) {
printed += dso__fprintf(pos->dso, type, fp); printed += dso__fprintf(pos->dso, type, fp);
printed += fprintf(fp, "--\n"); printed += fprintf(fp, "--\n");
} }
...@@ -183,8 +183,8 @@ struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) ...@@ -183,8 +183,8 @@ struct thread *perf_session__findnew(struct perf_session *self, pid_t pid)
return th; return th;
} }
static void map_groups__remove_overlappings(struct map_groups *self, static int map_groups__fixup_overlappings(struct map_groups *self,
struct map *map) struct map *map)
{ {
struct rb_root *root = &self->maps[map->type]; struct rb_root *root = &self->maps[map->type];
struct rb_node *next = rb_first(root); struct rb_node *next = rb_first(root);
...@@ -209,7 +209,36 @@ static void map_groups__remove_overlappings(struct map_groups *self, ...@@ -209,7 +209,36 @@ static void map_groups__remove_overlappings(struct map_groups *self,
* list. * list.
*/ */
list_add_tail(&pos->node, &self->removed_maps[map->type]); list_add_tail(&pos->node, &self->removed_maps[map->type]);
/*
* Now check if we need to create new maps for areas not
* overlapped by the new map:
*/
if (map->start > pos->start) {
struct map *before = map__clone(pos);
if (before == NULL)
return -ENOMEM;
before->end = map->start - 1;
map_groups__insert(self, before);
if (verbose >= 2)
map__fprintf(before, stderr);
}
if (map->end < pos->end) {
struct map *after = map__clone(pos);
if (after == NULL)
return -ENOMEM;
after->start = map->end + 1;
map_groups__insert(self, after);
if (verbose >= 2)
map__fprintf(after, stderr);
}
} }
return 0;
} }
void maps__insert(struct rb_root *maps, struct map *map) void maps__insert(struct rb_root *maps, struct map *map)
...@@ -254,7 +283,7 @@ struct map *maps__find(struct rb_root *maps, u64 ip) ...@@ -254,7 +283,7 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
void thread__insert_map(struct thread *self, struct map *map) void thread__insert_map(struct thread *self, struct map *map)
{ {
map_groups__remove_overlappings(&self->mg, map); map_groups__fixup_overlappings(&self->mg, map);
map_groups__insert(&self->mg, map); map_groups__insert(&self->mg, map);
} }
......
...@@ -10,6 +10,9 @@ struct map_groups { ...@@ -10,6 +10,9 @@ struct map_groups {
struct list_head removed_maps[MAP__NR_TYPES]; struct list_head removed_maps[MAP__NR_TYPES];
}; };
size_t __map_groups__fprintf_maps(struct map_groups *self,
enum map_type type, FILE *fp);
struct thread { struct thread {
struct rb_node rb_node; struct rb_node rb_node;
struct map_groups mg; struct map_groups mg;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment