Commit 0ffa798d authored by Ingo Molnar's avatar Ingo Molnar

Merge branches 'perf/powerpc' and 'perf/bench' into perf/core

Merge reason: Both 'perf bench' and the pending PowerPC changes
              are now ready for the next merge window.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
...@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE ...@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
config HCALL_STATS config HCALL_STATS
bool "Hypervisor call instrumentation" bool "Hypervisor call instrumentation"
depends on PPC_PSERIES && DEBUG_FS depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
help help
Adds code to keep track of the number of hypervisor calls made and Adds code to keep track of the number of hypervisor calls made and
the amount of time spent in hypervisor calls. Wall time spent in the amount of time spent in hypervisor calls. Wall time spent in
......
...@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y ...@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_HCALL_STATS=y # CONFIG_HCALL_STATS is not set
# CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_CODE_PATCHING_SELFTEST is not set
# CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set
# CONFIG_MSI_BITMAP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define _ASM_POWERPC_EMULATED_OPS_H #define _ASM_POWERPC_EMULATED_OPS_H
#include <asm/atomic.h> #include <asm/atomic.h>
#include <linux/perf_event.h>
#ifdef CONFIG_PPC_EMULATED_STATS #ifdef CONFIG_PPC_EMULATED_STATS
...@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated; ...@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
extern void ppc_warn_emulated_print(const char *type); extern void ppc_warn_emulated_print(const char *type);
#define PPC_WARN_EMULATED(type) \ #define __PPC_WARN_EMULATED(type) \
do { \ do { \
atomic_inc(&ppc_emulated.type.val); \ atomic_inc(&ppc_emulated.type.val); \
if (ppc_warn_emulated) \ if (ppc_warn_emulated) \
...@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type); ...@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
#else /* !CONFIG_PPC_EMULATED_STATS */ #else /* !CONFIG_PPC_EMULATED_STATS */
#define PPC_WARN_EMULATED(type) do { } while (0) #define __PPC_WARN_EMULATED(type) do { } while (0)
#endif /* !CONFIG_PPC_EMULATED_STATS */ #endif /* !CONFIG_PPC_EMULATED_STATS */
#define PPC_WARN_EMULATED(type, regs) \
do { \
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
1, 0, regs, 0); \
__PPC_WARN_EMULATED(type); \
} while (0)
#define PPC_WARN_ALIGNMENT(type, regs) \
do { \
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
1, 0, regs, regs->dar); \
__PPC_WARN_EMULATED(type); \
} while (0)
#endif /* _ASM_POWERPC_EMULATED_OPS_H */ #endif /* _ASM_POWERPC_EMULATED_OPS_H */
...@@ -274,6 +274,8 @@ struct hcall_stats { ...@@ -274,6 +274,8 @@ struct hcall_stats {
unsigned long num_calls; /* number of calls (on this CPU) */ unsigned long num_calls; /* number of calls (on this CPU) */
unsigned long tb_total; /* total wall time (mftb) of calls. */ unsigned long tb_total; /* total wall time (mftb) of calls. */
unsigned long purr_total; /* total cpu time (PURR) of calls. */ unsigned long purr_total; /* total cpu time (PURR) of calls. */
unsigned long tb_start;
unsigned long purr_start;
}; };
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
......
...@@ -489,6 +489,8 @@ ...@@ -489,6 +489,8 @@
#define SPRN_MMCR1 798 #define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312 #define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
......
#undef TRACE_SYSTEM
#define TRACE_SYSTEM powerpc
#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_POWERPC_H
#include <linux/tracepoint.h>
struct pt_regs;
TRACE_EVENT(irq_entry,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(irq_exit,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(timer_interrupt_entry,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(timer_interrupt_exit,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
#ifdef CONFIG_PPC_PSERIES
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
TRACE_EVENT_FN(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
TP_fast_assign(
__entry->opcode = opcode;
),
TP_printk("opcode=%lu", __entry->opcode),
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
TRACE_EVENT_FN(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
),
TP_fast_assign(
__entry->opcode = opcode;
__entry->retval = retval;
),
TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
#endif
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH asm
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>
...@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs) ...@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_SPE #ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) { if ((instr >> 26) == 0x4) {
PPC_WARN_EMULATED(spe); PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr); return emulate_spe(regs, reg, instr);
} }
#endif #endif
...@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs) ...@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
flags |= SPLT; flags |= SPLT;
nb = 8; nb = 8;
} }
PPC_WARN_EMULATED(vsx); PPC_WARN_ALIGNMENT(vsx, regs);
return emulate_vsx(addr, reg, areg, regs, flags, nb); return emulate_vsx(addr, reg, areg, regs, flags, nb);
} }
#endif #endif
...@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs) ...@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
* the exception of DCBZ which is handled as a special case here * the exception of DCBZ which is handled as a special case here
*/ */
if (instr == DCBZ) { if (instr == DCBZ) {
PPC_WARN_EMULATED(dcbz); PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr); return emulate_dcbz(regs, addr);
} }
if (unlikely(nb == 0)) if (unlikely(nb == 0))
...@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs) ...@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
* function * function
*/ */
if (flags & M) { if (flags & M) {
PPC_WARN_EMULATED(multiple); PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb, return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz); flags, instr, swiz);
} }
...@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs) ...@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */ /* Special case for 16-byte FP loads and stores */
if (nb == 16) { if (nb == 16) {
PPC_WARN_EMULATED(fp_pair); PPC_WARN_ALIGNMENT(fp_pair, regs);
return emulate_fp_pair(addr, reg, flags); return emulate_fp_pair(addr, reg, flags);
} }
PPC_WARN_EMULATED(unaligned); PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else /* If we are loading, get the data from user space, else
* get it from register values * get it from register values
......
...@@ -551,7 +551,7 @@ restore: ...@@ -551,7 +551,7 @@ restore:
BEGIN_FW_FTR_SECTION BEGIN_FW_FTR_SECTION
ld r5,SOFTE(r1) ld r5,SOFTE(r1)
FW_FTR_SECTION_ELSE FW_FTR_SECTION_ELSE
b iseries_check_pending_irqs b .Liseries_check_pending_irqs
ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2: 2:
TRACE_AND_RESTORE_IRQ(r5); TRACE_AND_RESTORE_IRQ(r5);
...@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) ...@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_BOOK3E */ #endif /* CONFIG_PPC_BOOK3E */
iseries_check_pending_irqs: .Liseries_check_pending_irqs:
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
ld r5,SOFTE(r1) ld r5,SOFTE(r1)
cmpdi 0,r5,0 cmpdi 0,r5,0
......
...@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) ...@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
* prolog code of the PerformanceMonitor one. A little * prolog code of the PerformanceMonitor one. A little
* trickery is thus necessary * trickery is thus necessary
*/ */
performance_monitor_pSeries_1:
. = 0xf00 . = 0xf00
b performance_monitor_pSeries b performance_monitor_pSeries
altivec_unavailable_pSeries_1:
. = 0xf20 . = 0xf20
b altivec_unavailable_pSeries b altivec_unavailable_pSeries
vsx_unavailable_pSeries_1:
. = 0xf40 . = 0xf40
b vsx_unavailable_pSeries b vsx_unavailable_pSeries
......
...@@ -70,6 +70,8 @@ ...@@ -70,6 +70,8 @@
#include <asm/firmware.h> #include <asm/firmware.h>
#include <asm/lv1call.h> #include <asm/lv1call.h>
#endif #endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
int __irq_offset_value; int __irq_offset_value;
static int ppc_spurious_interrupts; static int ppc_spurious_interrupts;
...@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs) ...@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs); struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq; unsigned int irq;
trace_irq_entry(regs);
irq_enter(); irq_enter();
check_stack_overflow(); check_stack_overflow();
...@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs) ...@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
timer_interrupt(regs); timer_interrupt(regs);
} }
#endif #endif
trace_irq_exit(regs);
} }
void __init init_IRQ(void) void __init init_IRQ(void)
......
...@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
*/ */
if (record) { if (record) {
struct perf_sample_data data = { struct perf_sample_data data = {
.addr = 0, .addr = ~0ULL,
.period = event->hw.last_period, .period = event->hw.last_period,
}; };
......
...@@ -72,10 +72,6 @@ ...@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f #define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/* /*
* Layout of constraint bits: * Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000 * 6666555555555544444444443333333333222222222211111111110000000000
......
...@@ -72,10 +72,6 @@ ...@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f #define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/* /*
* Layout of constraint bits: * Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000 * 6666555555555544444444443333333333222222222211111111110000000000
...@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
unsigned long mmcr1 = 0; unsigned long mmcr1 = 0;
unsigned long mmcra = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, byte, psel; unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp; unsigned int ttm, grp;
int i, isbus, bit, grsel; int i, isbus, bit, grsel;
......
...@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, ...@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
unsigned long mmcr1 = 0; unsigned long mmcr1 = 0;
unsigned long mmcra = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
int i; int i;
unsigned int pmc, ev, b, u, s, psel; unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0; unsigned int ttmset = 0;
......
...@@ -50,10 +50,6 @@ ...@@ -50,10 +50,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff #define MMCR1_PMCSEL_MSK 0xff
/*
* Bits in MMCRA
*/
/* /*
* Layout of constraint bits: * Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000 * 6666555555555544444444443333333333222222222211111111110000000000
...@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, ...@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
unsigned long mmcr1 = 0; unsigned long mmcr1 = 0;
unsigned long mmcra = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0; unsigned int pmc_inuse = 0;
int i; int i;
......
...@@ -83,10 +83,6 @@ static short mmcr1_adder_bits[8] = { ...@@ -83,10 +83,6 @@ static short mmcr1_adder_bits[8] = {
MMCR1_PMC8_ADDER_SEL_SH MMCR1_PMC8_ADDER_SEL_SH
}; };
/*
* Bits in MMCRA
*/
/* /*
* Layout of constraint bits: * Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000 * 6666555555555544444444443333333333222222222211111111110000000000
......
...@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency); ...@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct dentry *powerpc_debugfs_root; struct dentry *powerpc_debugfs_root;
EXPORT_SYMBOL(powerpc_debugfs_root);
static int powerpc_debugfs_init(void) static int powerpc_debugfs_init(void)
{ {
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <asm/trace.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs) ...@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
struct clock_event_device *evt = &decrementer->event; struct clock_event_device *evt = &decrementer->event;
u64 now; u64 now;
trace_timer_interrupt_entry(regs);
/* Ensure a positive value is written to the decrementer, or else /* Ensure a positive value is written to the decrementer, or else
* some CPUs will continuue to take decrementer exceptions */ * some CPUs will continuue to take decrementer exceptions */
set_dec(DECREMENTER_MAX); set_dec(DECREMENTER_MAX);
...@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs) ...@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
now = decrementer->next_tb - now; now = decrementer->next_tb - now;
if (now <= DECREMENTER_MAX) if (now <= DECREMENTER_MAX)
set_dec((int)now); set_dec((int)now);
trace_timer_interrupt_exit(regs);
return; return;
} }
old_regs = set_irq_regs(regs); old_regs = set_irq_regs(regs);
...@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs) ...@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
irq_exit(); irq_exit();
set_irq_regs(old_regs); set_irq_regs(old_regs);
trace_timer_interrupt_exit(regs);
} }
void wakeup_decrementer(void) void wakeup_decrementer(void)
......
...@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs) ...@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate the mfspr rD, PVR. */ /* Emulate the mfspr rD, PVR. */
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
PPC_WARN_EMULATED(mfpvr); PPC_WARN_EMULATED(mfpvr, regs);
rd = (instword >> 21) & 0x1f; rd = (instword >> 21) & 0x1f;
regs->gpr[rd] = mfspr(SPRN_PVR); regs->gpr[rd] = mfspr(SPRN_PVR);
return 0; return 0;
...@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs) ...@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulating the dcba insn is just a no-op. */ /* Emulating the dcba insn is just a no-op. */
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
PPC_WARN_EMULATED(dcba); PPC_WARN_EMULATED(dcba, regs);
return 0; return 0;
} }
...@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs) ...@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
int shift = (instword >> 21) & 0x1c; int shift = (instword >> 21) & 0x1c;
unsigned long msk = 0xf0000000UL >> shift; unsigned long msk = 0xf0000000UL >> shift;
PPC_WARN_EMULATED(mcrxr); PPC_WARN_EMULATED(mcrxr, regs);
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
regs->xer &= ~0xf0000000UL; regs->xer &= ~0xf0000000UL;
return 0; return 0;
...@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs) ...@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate load/store string insn. */ /* Emulate load/store string insn. */
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
PPC_WARN_EMULATED(string); PPC_WARN_EMULATED(string, regs);
return emulate_string_inst(regs, instword); return emulate_string_inst(regs, instword);
} }
/* Emulate the popcntb (Population Count Bytes) instruction. */ /* Emulate the popcntb (Population Count Bytes) instruction. */
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
PPC_WARN_EMULATED(popcntb); PPC_WARN_EMULATED(popcntb, regs);
return emulate_popcntb_inst(regs, instword); return emulate_popcntb_inst(regs, instword);
} }
/* Emulate isel (Integer Select) instruction */ /* Emulate isel (Integer Select) instruction */
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
PPC_WARN_EMULATED(isel); PPC_WARN_EMULATED(isel, regs);
return emulate_isel(regs, instword); return emulate_isel(regs, instword);
} }
...@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs) ...@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#ifdef CONFIG_MATH_EMULATION #ifdef CONFIG_MATH_EMULATION
errcode = do_mathemu(regs); errcode = do_mathemu(regs);
if (errcode >= 0) if (errcode >= 0)
PPC_WARN_EMULATED(math); PPC_WARN_EMULATED(math, regs);
switch (errcode) { switch (errcode) {
case 0: case 0:
...@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs) ...@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#elif defined(CONFIG_8XX_MINIMAL_FPEMU) #elif defined(CONFIG_8XX_MINIMAL_FPEMU)
errcode = Soft_emulate_8xx(regs); errcode = Soft_emulate_8xx(regs);
if (errcode >= 0) if (errcode >= 0)
PPC_WARN_EMULATED(8xx); PPC_WARN_EMULATED(8xx, regs);
switch (errcode) { switch (errcode) {
case 0: case 0:
...@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs) ...@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
flush_altivec_to_thread(current); flush_altivec_to_thread(current);
PPC_WARN_EMULATED(altivec); PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs); err = emulate_altivec(regs);
if (err == 0) { if (err == 0) {
regs->nip += 4; /* skip emulated instruction */ regs->nip += 4; /* skip emulated instruction */
......
...@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION ...@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
srd r8,r5,r11 srd r8,r5,r11
mtctr r8 mtctr r8
setup: .Lsetup:
dcbt r9,r4 dcbt r9,r4
dcbz r9,r3 dcbz r9,r3
add r9,r9,r12 add r9,r9,r12
bdnz setup bdnz .Lsetup
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8 addi r3,r3,-8
srdi r8,r5,7 /* page is copied in 128 byte strides */ srdi r8,r5,7 /* page is copied in 128 byte strides */
......
...@@ -14,68 +14,94 @@ ...@@ -14,68 +14,94 @@
#define STK_PARM(i) (48 + ((i)-3)*8) #define STK_PARM(i) (48 + ((i)-3)*8)
#ifdef CONFIG_HCALL_STATS #ifdef CONFIG_TRACEPOINTS
.section ".toc","aw"
.globl hcall_tracepoint_refcount
hcall_tracepoint_refcount:
.llong 0
.section ".text"
/* /*
* precall must preserve all registers. use unused STK_PARM() * precall must preserve all registers. use unused STK_PARM()
* areas to save snapshots and opcode. * areas to save snapshots and opcode. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
*/ */
#define HCALL_INST_PRECALL \ #define HCALL_INST_PRECALL(FIRST_REG) \
std r3,STK_PARM(r3)(r1); /* save opcode */ \
mftb r0; /* get timebase and */ \
std r0,STK_PARM(r5)(r1); /* save for later */ \
BEGIN_FTR_SECTION; \ BEGIN_FTR_SECTION; \
mfspr r0,SPRN_PURR; /* get PURR and */ \ b 1f; \
std r0,STK_PARM(r6)(r1); /* save for later */ \ END_FTR_SECTION(0, 1); \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); ld r12,hcall_tracepoint_refcount@toc(r2); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
std r3,STK_PARM(r3)(r1); \
std r4,STK_PARM(r4)(r1); \
std r5,STK_PARM(r5)(r1); \
std r6,STK_PARM(r6)(r1); \
std r7,STK_PARM(r7)(r1); \
std r8,STK_PARM(r8)(r1); \
std r9,STK_PARM(r9)(r1); \
std r10,STK_PARM(r10)(r1); \
std r0,16(r1); \
addi r4,r1,STK_PARM(FIRST_REG); \
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
bl .__trace_hcall_entry; \
addi r1,r1,STACK_FRAME_OVERHEAD; \
ld r0,16(r1); \
ld r3,STK_PARM(r3)(r1); \
ld r4,STK_PARM(r4)(r1); \
ld r5,STK_PARM(r5)(r1); \
ld r6,STK_PARM(r6)(r1); \
ld r7,STK_PARM(r7)(r1); \
ld r8,STK_PARM(r8)(r1); \
ld r9,STK_PARM(r9)(r1); \
ld r10,STK_PARM(r10)(r1); \
mtlr r0; \
1:
/* /*
* postcall is performed immediately before function return which * postcall is performed immediately before function return which
* allows liberal use of volatile registers. We branch around this * allows liberal use of volatile registers. We branch around this
* in early init (eg when populating the MMU hashtable) by using an * in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature. * unconditional cpu feature.
*/ */
#define HCALL_INST_POSTCALL \ #define __HCALL_INST_POSTCALL \
BEGIN_FTR_SECTION; \ BEGIN_FTR_SECTION; \
b 1f; \ b 1f; \
END_FTR_SECTION(0, 1); \ END_FTR_SECTION(0, 1); \
ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ ld r12,hcall_tracepoint_refcount@toc(r2); \
cmpldi cr7,r4,MAX_HCALL_OPCODE; \ cmpdi r12,0; \
bgt- cr7,1f; \ beq+ 1f; \
\ mflr r0; \
/* get time and PURR snapshots after hcall */ \ ld r6,STK_PARM(r3)(r1); \
mftb r7; /* timebase after */ \ std r3,STK_PARM(r3)(r1); \
BEGIN_FTR_SECTION; \ mr r4,r3; \
mfspr r8,SPRN_PURR; /* PURR after */ \ mr r3,r6; \
ld r6,STK_PARM(r6)(r1); /* PURR before */ \ std r0,16(r1); \
subf r6,r6,r8; /* delta */ \ stdu r1,-STACK_FRAME_OVERHEAD(r1); \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ bl .__trace_hcall_exit; \
ld r5,STK_PARM(r5)(r1); /* timebase before */ \ addi r1,r1,STACK_FRAME_OVERHEAD; \
subf r5,r5,r7; /* time delta */ \ ld r0,16(r1); \
\ ld r3,STK_PARM(r3)(r1); \
/* calculate address of stat structure r4 = opcode */ \ mtlr r0; \
srdi r4,r4,2; /* index into array */ \
mulli r4,r4,HCALL_STAT_SIZE; \
LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
add r4,r4,r7; \
ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
add r4,r4,r7; \
\
/* update stats */ \
ld r7,HCALL_STAT_CALLS(r4); /* count */ \
addi r7,r7,1; \
std r7,HCALL_STAT_CALLS(r4); \
ld r7,HCALL_STAT_TB(r4); /* timebase */ \
add r7,r7,r5; \
std r7,HCALL_STAT_TB(r4); \
BEGIN_FTR_SECTION; \
ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
add r7,r7,r6; \
std r7,HCALL_STAT_PURR(r4); \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
1: 1:
#define HCALL_INST_POSTCALL_NORETS \
li r5,0; \
__HCALL_INST_POSTCALL
#define HCALL_INST_POSTCALL(BUFREG) \
mr r5,BUFREG; \
__HCALL_INST_POSTCALL
#else #else
#define HCALL_INST_PRECALL #define HCALL_INST_PRECALL(FIRST_ARG)
#define HCALL_INST_POSTCALL #define HCALL_INST_POSTCALL_NORETS
#define HCALL_INST_POSTCALL(BUFREG)
#endif #endif
.text .text
...@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) ...@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
mfcr r0 mfcr r0
stw r0,8(r1) stw r0,8(r1)
HCALL_INST_PRECALL HCALL_INST_PRECALL(r4)
HVSC /* invoke the hypervisor */ HVSC /* invoke the hypervisor */
HCALL_INST_POSTCALL HCALL_INST_POSTCALL_NORETS
lwz r0,8(r1) lwz r0,8(r1)
mtcrf 0xff,r0 mtcrf 0xff,r0
...@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall) ...@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
mfcr r0 mfcr r0
stw r0,8(r1) stw r0,8(r1)
HCALL_INST_PRECALL HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */ std r4,STK_PARM(r4)(r1) /* Save ret buffer */
...@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall) ...@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
std r6, 16(r12) std r6, 16(r12)
std r7, 24(r12) std r7, 24(r12)
HCALL_INST_POSTCALL HCALL_INST_POSTCALL(r12)
lwz r0,8(r1) lwz r0,8(r1)
mtcrf 0xff,r0 mtcrf 0xff,r0
...@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9) ...@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
mfcr r0 mfcr r0
stw r0,8(r1) stw r0,8(r1)
HCALL_INST_PRECALL HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */ std r4,STK_PARM(r4)(r1) /* Save ret buffer */
...@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9) ...@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
std r11,56(r12) std r11,56(r12)
std r0, 64(r12) std r0, 64(r12)
HCALL_INST_POSTCALL HCALL_INST_POSTCALL(r12)
lwz r0,8(r1) lwz r0,8(r1)
mtcrf 0xff,r0 mtcrf 0xff,r0
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/firmware.h> #include <asm/firmware.h>
#include <asm/cputable.h> #include <asm/cputable.h>
#include <asm/trace.h>
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
...@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = { ...@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
#define HCALL_ROOT_DIR "hcall_inst" #define HCALL_ROOT_DIR "hcall_inst"
#define CPU_NAME_BUF_SIZE 32 #define CPU_NAME_BUF_SIZE 32
static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
{
struct hcall_stats *h;
if (opcode > MAX_HCALL_OPCODE)
return;
h = &get_cpu_var(hcall_stats)[opcode / 4];
h->tb_start = mftb();
h->purr_start = mfspr(SPRN_PURR);
}
static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
unsigned long *retbuf)
{
struct hcall_stats *h;
if (opcode > MAX_HCALL_OPCODE)
return;
h = &__get_cpu_var(hcall_stats)[opcode / 4];
h->num_calls++;
h->tb_total = mftb() - h->tb_start;
h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
put_cpu_var(hcall_stats);
}
static int __init hcall_inst_init(void) static int __init hcall_inst_init(void)
{ {
struct dentry *hcall_root; struct dentry *hcall_root;
...@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void) ...@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) if (!firmware_has_feature(FW_FEATURE_LPAR))
return 0; return 0;
if (register_trace_hcall_entry(probe_hcall_entry))
return -EINVAL;
if (register_trace_hcall_exit(probe_hcall_exit)) {
unregister_trace_hcall_entry(probe_hcall_entry);
return -EINVAL;
}
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
if (!hcall_root) if (!hcall_root)
return -ENOMEM; return -ENOMEM;
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <asm/cputable.h> #include <asm/cputable.h>
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/trace.h>
#include "plpar_wrappers.h" #include "plpar_wrappers.h"
#include "pseries.h" #include "pseries.h"
...@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order) ...@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
EXPORT_SYMBOL(arch_free_page); EXPORT_SYMBOL(arch_free_page);
#endif #endif
#ifdef CONFIG_TRACEPOINTS
/*
* We optimise our hcall path by placing hcall_tracepoint_refcount
* directly in the TOC so we can check if the hcall tracepoints are
* enabled via a single load.
*/
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long hcall_tracepoint_refcount;
void hcall_tracepoint_regfunc(void)
{
hcall_tracepoint_refcount++;
}
void hcall_tracepoint_unregfunc(void)
{
hcall_tracepoint_refcount--;
}
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
trace_hcall_entry(opcode, args);
}
void __trace_hcall_exit(long opcode, unsigned long retval,
unsigned long *retbuf)
{
trace_hcall_exit(opcode, retval, retbuf);
}
#endif
...@@ -106,6 +106,8 @@ enum perf_sw_ids { ...@@ -106,6 +106,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */ PERF_COUNT_SW_MAX, /* non-ABI */
}; };
......
...@@ -102,6 +102,8 @@ enum perf_sw_ids { ...@@ -102,6 +102,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */ PERF_COUNT_SW_MAX, /* non-ABI */
}; };
......
...@@ -4274,6 +4274,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) ...@@ -4274,6 +4274,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS: case PERF_COUNT_SW_CPU_MIGRATIONS:
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) { if (!event->parent) {
atomic_inc(&perf_swevent_enabled[event_id]); atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy; event->destroy = sw_perf_event_destroy;
......
perf-bench(1)
============
NAME
----
perf-bench - General framework for benchmark suites
SYNOPSIS
--------
[verse]
'perf bench' [<common options>] <subsystem> <suite> [<options>]
DESCRIPTION
-----------
This 'perf bench' command is general framework for benchmark suites.
COMMON OPTIONS
--------------
-f::
--format=::
Specify format style.
Current available format styles are,
'default'::
Default style. This is mainly for human reading.
---------------------
% perf bench sched pipe # with no style specify
(executing 1000000 pipe operations between two tasks)
Total time:5.855 sec
5.855061 usecs/op
170792 ops/sec
---------------------
'simple'::
This simple style is friendly for automated
processing by scripts.
---------------------
% perf bench --format=simple sched pipe # specified simple
5.988
---------------------
SUBSYSTEM
---------
'sched'::
Scheduler and IPC mechanisms.
SUITES FOR 'sched'
~~~~~~~~~~~~~~~~~~
*messaging*::
Suite for evaluating performance of scheduler and IPC mechanisms.
Based on hackbench by Rusty Russell.
Options of *pipe*
^^^^^^^^^^^^^^^^^
-p::
--pipe::
Use pipe() instead of socketpair()
-t::
--thread::
Be multi thread instead of multi process
-g::
--group=::
Specify number of groups
-l::
--loop=::
Specify number of loops
Example of *messaging*
^^^^^^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched messaging # run with default
options (20 sender and receiver processes per group)
(10 groups == 400 processes run)
Total time:0.308 sec
% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups
(20 sender and receiver threads per group)
(20 groups == 800 threads run)
Total time:0.582 sec
---------------------
*pipe*::
Suite for pipe() system call.
Based on pipe-test-1m.c by Ingo Molnar.
Options of *pipe*
^^^^^^^^^^^^^^^^^
-l::
--loop=::
Specify number of loops.
Example of *pipe*
^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched pipe
(executing 1000000 pipe operations between two tasks)
Total time:8.091 sec
8.091833 usecs/op
123581 ops/sec
% perf bench sched pipe -l 1000 # loop 1000
(executing 1000 pipe operations between two tasks)
Total time:0.016 sec
16.948000 usecs/op
59004 ops/sec
---------------------
SEE ALSO
--------
linkperf:perf[1]
...@@ -421,6 +421,13 @@ LIB_OBJS += util/hist.o ...@@ -421,6 +421,13 @@ LIB_OBJS += util/hist.o
LIB_OBJS += util/data_map.o LIB_OBJS += util/data_map.o
BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-annotate.o
BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o BUILTIN_OBJS += builtin-sched.o
BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-list.o
......
#ifndef BENCH_H
#define BENCH_H
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
#define BENCH_FORMAT_SIMPLE_STR "simple"
#define BENCH_FORMAT_SIMPLE 1
#define BENCH_FORMAT_UNKNOWN -1
extern int bench_format;
#endif
/*
*
* builtin-bench-messaging.c
*
* messaging: Benchmark for scheduler and IPC mechanisms
*
* Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../builtin.h"
#include "bench.h"
/* Test groups of 20 processes spraying to 20 receivers */
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#define DATASIZE 100
static int use_pipes = 0;
static unsigned int loops = 100;
static unsigned int thread_mode = 0;
static unsigned int num_groups = 10;
struct sender_context {
unsigned int num_fds;
int ready_out;
int wakefd;
int out_fds[0];
};
struct receiver_context {
unsigned int num_packets;
int in_fds[2];
int ready_out;
int wakefd;
};
static void barf(const char *msg)
{
fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
exit(1);
}
static void fdpair(int fds[2])
{
if (use_pipes) {
if (pipe(fds) == 0)
return;
} else {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
return;
}
barf(use_pipes ? "pipe()" : "socketpair()");
}
/* Block until we're ready to go */
static void ready(int ready_out, int wakefd)
{
char dummy;
struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
/* Tell them we're ready. */
if (write(ready_out, &dummy, 1) != 1)
barf("CLIENT: ready write");
/* Wait for "GO" signal */
if (poll(&pollfd, 1, -1) != 1)
barf("poll");
}
/* Sender sprays loops messages down each file descriptor */
static void *sender(struct sender_context *ctx)
{
char data[DATASIZE];
unsigned int i, j;
ready(ctx->ready_out, ctx->wakefd);
/* Now pump to every receiver. */
for (i = 0; i < loops; i++) {
for (j = 0; j < ctx->num_fds; j++) {
int ret, done = 0;
again:
ret = write(ctx->out_fds[j], data + done,
sizeof(data)-done);
if (ret < 0)
barf("SENDER: write");
done += ret;
if (done < DATASIZE)
goto again;
}
}
return NULL;
}
/* One receiver per fd */
static void *receiver(struct receiver_context* ctx)
{
unsigned int i;
if (!thread_mode)
close(ctx->in_fds[1]);
/* Wait for start... */
ready(ctx->ready_out, ctx->wakefd);
/* Receive them all */
for (i = 0; i < ctx->num_packets; i++) {
char data[DATASIZE];
int ret, done = 0;
again:
ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
if (ret < 0)
barf("SERVER: read");
done += ret;
if (done < DATASIZE)
goto again;
}
return NULL;
}
static pthread_t create_worker(void *ctx, void *(*func)(void *))
{
pthread_attr_t attr;
pthread_t childid;
int err;
if (!thread_mode) {
/* process mode */
/* Fork the receiver. */
switch (fork()) {
case -1:
barf("fork()");
break;
case 0:
(*func) (ctx);
exit(0);
break;
default:
break;
}
return (pthread_t)0;
}
if (pthread_attr_init(&attr) != 0)
barf("pthread_attr_init:");
#ifndef __ia64__
if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
barf("pthread_attr_setstacksize");
#endif
err = pthread_create(&childid, &attr, func, ctx);
if (err != 0) {
fprintf(stderr, "pthread_create failed: %s (%d)\n",
strerror(err), err);
exit(-1);
}
return childid;
}
static void reap_worker(pthread_t id)
{
int proc_status;
void *thread_status;
if (!thread_mode) {
/* process mode */
wait(&proc_status);
if (!WIFEXITED(proc_status))
exit(1);
} else {
pthread_join(id, &thread_status);
}
}
/* One group of senders and receivers */
static unsigned int group(pthread_t *pth,
unsigned int num_fds,
int ready_out,
int wakefd)
{
unsigned int i;
struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+ num_fds * sizeof(int));
if (!snd_ctx)
barf("malloc()");
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
if (!ctx)
barf("malloc()");
/* Create the pipe between client and server */
fdpair(fds);
ctx->num_packets = num_fds * loops;
ctx->in_fds[0] = fds[0];
ctx->in_fds[1] = fds[1];
ctx->ready_out = ready_out;
ctx->wakefd = wakefd;
pth[i] = create_worker(ctx, (void *)receiver);
snd_ctx->out_fds[i] = fds[1];
if (!thread_mode)
close(fds[0]);
}
/* Now we have all the fds, fork the senders */
for (i = 0; i < num_fds; i++) {
snd_ctx->ready_out = ready_out;
snd_ctx->wakefd = wakefd;
snd_ctx->num_fds = num_fds;
pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
}
/* Close the fds we have left */
if (!thread_mode)
for (i = 0; i < num_fds; i++)
close(snd_ctx->out_fds[i]);
/* Return number of children to reap */
return num_fds * 2;
}
static const struct option options[] = {
OPT_BOOLEAN('p', "pipe", &use_pipes,
"Use pipe() instead of socketpair()"),
OPT_BOOLEAN('t', "thread", &thread_mode,
"Be multi thread instead of multi process"),
OPT_INTEGER('g', "group", &num_groups,
"Specify number of groups"),
OPT_INTEGER('l', "loop", &loops,
"Specify number of loops"),
OPT_END()
};
static const char * const bench_sched_message_usage[] = {
"perf bench sched messaging <options>",
NULL
};
int bench_sched_messaging(int argc, const char **argv,
const char *prefix __used)
{
unsigned int i, total_children;
struct timeval start, stop, diff;
unsigned int num_fds = 20;
int readyfds[2], wakefds[2];
char dummy;
pthread_t *pth_tab;
argc = parse_options(argc, argv, options,
bench_sched_message_usage, 0);
pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
if (!pth_tab)
barf("main:malloc()");
fdpair(readyfds);
fdpair(wakefds);
total_children = 0;
for (i = 0; i < num_groups; i++)
total_children += group(pth_tab+total_children, num_fds,
readyfds[1], wakefds[0]);
/* Wait for everyone to be ready */
for (i = 0; i < total_children; i++)
if (read(readyfds[0], &dummy, 1) != 1)
barf("Reading for readyfds");
gettimeofday(&start, NULL);
/* Kick them off */
if (write(wakefds[1], &dummy, 1) != 1)
barf("Writing to start them");
/* Reap them all */
for (i = 0; i < total_children; i++)
reap_worker(pth_tab[i]);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# %d sender and receiver %s per group\n",
num_fds, thread_mode ? "threads" : "processes");
printf("# %d groups == %d %s run\n\n",
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
diff.tv_sec, diff.tv_usec/1000);
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
break;
default:
/* reaching here is something disaster */
fprintf(stderr, "Unknown format:%d\n", bench_format);
exit(1);
break;
}
return 0;
}
/*
*
* builtin-bench-pipe.c
*
* pipe: Benchmark for pipe()
*
* Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
* http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../builtin.h"
#include "bench.h"
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/wait.h>
#include <linux/unistd.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#define LOOPS_DEFAULT 1000000
static int loops = LOOPS_DEFAULT;
static const struct option options[] = {
OPT_INTEGER('l', "loop", &loops,
"Specify number of loops"),
OPT_END()
};
static const char * const bench_sched_pipe_usage[] = {
"perf bench sched pipe <options>",
NULL
};
int bench_sched_pipe(int argc, const char **argv,
const char *prefix __used)
{
int pipe_1[2], pipe_2[2];
int m = 0, i;
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
/*
* why does "ret" exist?
* discarding returned value of read(), write()
* causes error in building environment for perf
*/
int ret, wait_stat;
pid_t pid, retpid;
argc = parse_options(argc, argv, options,
bench_sched_pipe_usage, 0);
assert(!pipe(pipe_1));
assert(!pipe(pipe_2));
pid = fork();
assert(pid >= 0);
gettimeofday(&start, NULL);
if (!pid) {
for (i = 0; i < loops; i++) {
ret = read(pipe_1[0], &m, sizeof(int));
ret = write(pipe_2[1], &m, sizeof(int));
}
} else {
for (i = 0; i < loops; i++) {
ret = write(pipe_1[1], &m, sizeof(int));
ret = read(pipe_2[0], &m, sizeof(int));
}
}
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
if (pid) {
retpid = waitpid(pid, &wait_stat, 0);
assert((retpid == pid) && WIFEXITED(wait_stat));
return 0;
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Extecuted %d pipe operations between two tasks\n\n",
loops);
result_usec = diff.tv_sec * 1000000;
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
diff.tv_sec, diff.tv_usec/1000);
printf(" %14lf usecs/op\n",
(double)result_usec / (double)loops);
printf(" %14d ops/sec\n",
(int)((double)loops /
((double)result_usec / (double)1000000)));
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
diff.tv_sec, diff.tv_usec / 1000);
break;
default:
/* reaching here is something disaster */
fprintf(stderr, "Unknown format:%d\n", bench_format);
exit(1);
break;
}
return 0;
}
/*
*
* builtin-bench.c
*
* General benchmarking subsystem provided by perf
*
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
/*
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
*
*/
#include "perf.h"
#include "util/util.h"
#include "util/parse-options.h"
#include "builtin.h"
#include "bench/bench.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct bench_suite {
const char *name;
const char *summary;
int (*fn)(int, const char **, const char *);
};
static struct bench_suite sched_suites[] = {
{ "messaging",
"Benchmark for scheduler and IPC mechanisms",
bench_sched_messaging },
{ "pipe",
"Flood of communication over pipe() between two processes",
bench_sched_pipe },
{ NULL,
NULL,
NULL }
};
struct bench_subsys {
const char *name;
const char *summary;
struct bench_suite *suites;
};
static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
{ NULL,
NULL,
NULL }
};
static void dump_suites(int subsys_index)
{
int i;
printf("List of available suites for %s...\n\n",
subsystems[subsys_index].name);
for (i = 0; subsystems[subsys_index].suites[i].name; i++)
printf("\t%s: %s\n",
subsystems[subsys_index].suites[i].name,
subsystems[subsys_index].suites[i].summary);
printf("\n");
return;
}
static char *bench_format_str;
int bench_format = BENCH_FORMAT_DEFAULT;
static const struct option bench_options[] = {
OPT_STRING('f', "format", &bench_format_str, "default",
"Specify format style"),
OPT_END()
};
static const char * const bench_usage[] = {
"perf bench [<common options>] <subsystem> <suite> [<options>]",
NULL
};
static void print_usage(void)
{
int i;
printf("Usage: \n");
for (i = 0; bench_usage[i]; i++)
printf("\t%s\n", bench_usage[i]);
printf("\n");
printf("List of available subsystems...\n\n");
for (i = 0; subsystems[i].name; i++)
printf("\t%s: %s\n",
subsystems[i].name, subsystems[i].summary);
printf("\n");
}
static int bench_str2int(char *str)
{
if (!str)
return BENCH_FORMAT_DEFAULT;
if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
return BENCH_FORMAT_DEFAULT;
else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
return BENCH_FORMAT_SIMPLE;
return BENCH_FORMAT_UNKNOWN;
}
int cmd_bench(int argc, const char **argv, const char *prefix __used)
{
int i, j, status = 0;
if (argc < 2) {
/* No subsystem specified. */
print_usage();
goto end;
}
argc = parse_options(argc, argv, bench_options, bench_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
bench_format = bench_str2int(bench_format_str);
if (bench_format == BENCH_FORMAT_UNKNOWN) {
printf("Unknown format descriptor:%s\n", bench_format_str);
goto end;
}
if (argc < 1) {
print_usage();
goto end;
}
for (i = 0; subsystems[i].name; i++) {
if (strcmp(subsystems[i].name, argv[0]))
continue;
if (argc < 2) {
/* No suite specified. */
dump_suites(i);
goto end;
}
for (j = 0; subsystems[i].suites[j].name; j++) {
if (strcmp(subsystems[i].suites[j].name, argv[1]))
continue;
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running %s/%s benchmark...\n",
subsystems[i].name,
subsystems[i].suites[j].name);
status = subsystems[i].suites[j].fn(argc - 1,
argv + 1, prefix);
goto end;
}
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
dump_suites(i);
goto end;
}
printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
status = 1;
goto end;
}
printf("Unknown subsystem:%s\n", argv[0]);
status = 1;
end:
return status;
}
...@@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file); ...@@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file);
extern int check_pager_config(const char *cmd); extern int check_pager_config(const char *cmd);
extern int cmd_annotate(int argc, const char **argv, const char *prefix); extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix);
extern int cmd_sched(int argc, const char **argv, const char *prefix); extern int cmd_sched(int argc, const char **argv, const char *prefix);
extern int cmd_list(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# command name category [deprecated] [common] # command name category [deprecated] [common]
# #
perf-annotate mainporcelain common perf-annotate mainporcelain common
perf-bench mainporcelain common
perf-list mainporcelain common perf-list mainporcelain common
perf-sched mainporcelain common perf-sched mainporcelain common
perf-record mainporcelain common perf-record mainporcelain common
......
...@@ -137,6 +137,8 @@ enum sw_event_ids { ...@@ -137,6 +137,8 @@ enum sw_event_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
}; };
Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
......
...@@ -289,6 +289,7 @@ static void handle_internal_command(int argc, const char **argv) ...@@ -289,6 +289,7 @@ static void handle_internal_command(int argc, const char **argv)
{ "list", cmd_list, 0 }, { "list", cmd_list, 0 },
{ "record", cmd_record, 0 }, { "record", cmd_record, 0 },
{ "report", cmd_report, 0 }, { "report", cmd_report, 0 },
{ "bench", cmd_bench, 0 },
{ "stat", cmd_stat, 0 }, { "stat", cmd_stat, 0 },
{ "timechart", cmd_timechart, 0 }, { "timechart", cmd_timechart, 0 },
{ "top", cmd_top, 0 }, { "top", cmd_top, 0 },
......
...@@ -48,6 +48,8 @@ static struct event_symbol event_symbols[] = { ...@@ -48,6 +48,8 @@ static struct event_symbol event_symbols[] = {
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
{ CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
{ CSW(EMULATION_FAULTS), "emulation-faults", "" },
}; };
#define __PERF_EVENT_FIELD(config, name) \ #define __PERF_EVENT_FIELD(config, name) \
...@@ -76,6 +78,8 @@ static const char *sw_event_names[] = { ...@@ -76,6 +78,8 @@ static const char *sw_event_names[] = {
"CPU-migrations", "CPU-migrations",
"minor-faults", "minor-faults",
"major-faults", "major-faults",
"alignment-faults",
"emulation-faults",
}; };
#define MAX_ALIASES 8 #define MAX_ALIASES 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment