Commit 12e24f34 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perfcounters-fixes-for-linus' of...

Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
  perfcounter: Handle some IO return values
  perf_counter: Push perf_sample_data through the swcounter code
  perf_counter tools: Define and use our own u64, s64 etc. definitions
  perf_counter: Close race in perf_lock_task_context()
  perf_counter, x86: Improve interactions with fast-gup
  perf_counter: Simplify and fix task migration counting
  perf_counter tools: Add a data file header
  perf_counter: Update userspace callchain sampling uses
  perf_counter: Make callchain samples extensible
  perf report: Filter to parent set by default
  perf_counter tools: Handle lost events
  perf_counter: Add event overlow handling
  fs: Provide empty .set_page_dirty() aop for anon inodes
  perf_counter: tools: Makefile tweaks for 64-bit powerpc
  perf_counter: powerpc: Add processor back-end for MPC7450 family
  perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
  perf_counter: powerpc: Change how processor-specific back-ends get selected
  perf_counter: powerpc: Use unsigned long for register and constraint values
  perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
  perf_counter tools: Add and use isprint()
  ...
parents 1eb51c33 eadc84cc
...@@ -126,6 +126,7 @@ config PPC ...@@ -126,6 +126,7 @@ config PPC
select HAVE_OPROFILE select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS if PPC64 select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32 select GENERIC_ATOMIC64 if PPC32
select HAVE_PERF_COUNTERS
config EARLY_PRINTK config EARLY_PRINTK
bool bool
......
...@@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags) ...@@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags)
struct irq_chip; struct irq_chip;
#ifdef CONFIG_PERF_COUNTERS #ifdef CONFIG_PERF_COUNTERS
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_counter_pending(void) static inline unsigned long test_perf_counter_pending(void)
{ {
unsigned long x; unsigned long x;
...@@ -154,15 +156,15 @@ static inline void clear_perf_counter_pending(void) ...@@ -154,15 +156,15 @@ static inline void clear_perf_counter_pending(void)
"r" (0), "r" (0),
"i" (offsetof(struct paca_struct, perf_counter_pending))); "i" (offsetof(struct paca_struct, perf_counter_pending)));
} }
#endif /* CONFIG_PPC64 */
#else #else /* CONFIG_PERF_COUNTERS */
static inline unsigned long test_perf_counter_pending(void) static inline unsigned long test_perf_counter_pending(void)
{ {
return 0; return 0;
} }
static inline void set_perf_counter_pending(void) {}
static inline void clear_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {}
#endif /* CONFIG_PERF_COUNTERS */ #endif /* CONFIG_PERF_COUNTERS */
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
*/ */
#include <linux/types.h> #include <linux/types.h>
#include <asm/hw_irq.h>
#define MAX_HWCOUNTERS 8 #define MAX_HWCOUNTERS 8
#define MAX_EVENT_ALTERNATIVES 8 #define MAX_EVENT_ALTERNATIVES 8
#define MAX_LIMITED_HWCOUNTERS 2 #define MAX_LIMITED_HWCOUNTERS 2
...@@ -19,16 +21,18 @@ ...@@ -19,16 +21,18 @@
* describe the PMU on a particular POWER-family CPU. * describe the PMU on a particular POWER-family CPU.
*/ */
struct power_pmu { struct power_pmu {
const char *name;
int n_counter; int n_counter;
int max_alternatives; int max_alternatives;
u64 add_fields; unsigned long add_fields;
u64 test_adder; unsigned long test_adder;
int (*compute_mmcr)(u64 events[], int n_ev, int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], u64 mmcr[]); unsigned int hwc[], unsigned long mmcr[]);
int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); int (*get_constraint)(u64 event, unsigned long *mskp,
unsigned long *valp);
int (*get_alternatives)(u64 event, unsigned int flags, int (*get_alternatives)(u64 event, unsigned int flags,
u64 alt[]); u64 alt[]);
void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event); int (*limited_pmc_event)(u64 event);
u32 flags; u32 flags;
int n_generic; int n_generic;
...@@ -38,8 +42,6 @@ struct power_pmu { ...@@ -38,8 +42,6 @@ struct power_pmu {
[PERF_COUNT_HW_CACHE_RESULT_MAX]; [PERF_COUNT_HW_CACHE_RESULT_MAX];
}; };
extern struct power_pmu *ppmu;
/* /*
* Values for power_pmu.flags * Values for power_pmu.flags
*/ */
...@@ -53,15 +55,23 @@ extern struct power_pmu *ppmu; ...@@ -53,15 +55,23 @@ extern struct power_pmu *ppmu;
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
extern int register_power_pmu(struct power_pmu *);
struct pt_regs; struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
/* /*
* The power_pmu.get_constraint function returns a 64-bit value and * Only override the default definitions in include/linux/perf_counter.h
* a 64-bit mask that express the constraints between this event and * if we have hardware PMU support.
*/
#ifdef CONFIG_PPC_PERF_CTRS
#define perf_misc_flags(regs) perf_misc_flags(regs)
#endif
/*
* The power_pmu.get_constraint function returns a 32/64-bit value and
* a 32/64-bit mask that express the constraints between this event and
* other events. * other events.
* *
* The value and mask are divided up into (non-overlapping) bitfields * The value and mask are divided up into (non-overlapping) bitfields
......
...@@ -97,9 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o ...@@ -97,9 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o
power5-pmu.o power5+-pmu.o power6-pmu.o \ obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power7-pmu.o power5+-pmu.o power6-pmu.o power7-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
...@@ -108,6 +109,7 @@ obj-y += iomap.o ...@@ -108,6 +109,7 @@ obj-y += iomap.o
endif endif
obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
obj-y += ppc_save_regs.o obj-y += ppc_save_regs.o
......
This diff is collapsed.
This diff is collapsed.
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for POWER4 * Bits in event code for POWER4
...@@ -179,22 +181,22 @@ static short mmcr1_adder_bits[8] = { ...@@ -179,22 +181,22 @@ static short mmcr1_adder_bits[8] = {
*/ */
static struct unitinfo { static struct unitinfo {
u64 value, mask; unsigned long value, mask;
int unit; int unit;
int lowerbit; int lowerbit;
} p4_unitinfo[16] = { } p4_unitinfo[16] = {
[PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
[PM_ISU1_ALT] = [PM_ISU1_ALT] =
{ 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
[PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
[PM_IFU_ALT] = [PM_IFU_ALT] =
{ 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
[PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
}; };
static unsigned char direct_marked_event[8] = { static unsigned char direct_marked_event[8] = {
...@@ -249,10 +251,11 @@ static int p4_marked_instr_event(u64 event) ...@@ -249,10 +251,11 @@ static int p4_marked_instr_event(u64 event)
return (mask >> (byte * 8 + bit)) & 1; return (mask >> (byte * 8 + bit)) & 1;
} }
static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) static int p4_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, byte, unit, lower, sh; int pmc, byte, unit, lower, sh;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
int grp = -1; int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
...@@ -282,14 +285,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -282,14 +285,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
value |= p4_unitinfo[unit].value; value |= p4_unitinfo[unit].value;
sh = p4_unitinfo[unit].lowerbit; sh = p4_unitinfo[unit].lowerbit;
if (sh > 1) if (sh > 1)
value |= (u64)lower << sh; value |= (unsigned long)lower << sh;
else if (lower != sh) else if (lower != sh)
return -1; return -1;
unit = p4_unitinfo[unit].unit; unit = p4_unitinfo[unit].unit;
/* Set byte lane select field */ /* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte); mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte); value |= (unsigned long)unit << (28 - 4 * byte);
} }
if (grp == 0) { if (grp == 0) {
/* increment PMC1/2/5/6 field */ /* increment PMC1/2/5/6 field */
...@@ -353,9 +356,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) ...@@ -353,9 +356,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
} }
static int p4_compute_mmcr(u64 event[], int n_ev, static int p4_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel, lower; unsigned int pmc, unit, byte, psel, lower;
unsigned int ttm, grp; unsigned int ttm, grp;
unsigned int pmc_inuse = 0; unsigned int pmc_inuse = 0;
...@@ -429,9 +432,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev, ...@@ -429,9 +432,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
return -1; return -1;
/* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; << MMCR1_TTM0SEL_SH;
mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
<< MMCR1_TTM1SEL_SH;
mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
/* Set TTCxSEL fields. */ /* Set TTCxSEL fields. */
if (unitlower & 0xe) if (unitlower & 0xe)
...@@ -456,7 +461,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev, ...@@ -456,7 +461,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
ttm = unit - 1; /* 2->1, 3->2 */ ttm = unit - 1; /* 2->1, 3->2 */
else else
ttm = unit >> 2; ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); mmcr1 |= (unsigned long)ttm
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
} }
} }
...@@ -519,7 +525,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev, ...@@ -519,7 +525,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
return 0; return 0;
} }
static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
/* /*
* Setting the PMCxSEL field to 0 disables PMC x. * Setting the PMCxSEL field to 0 disables PMC x.
...@@ -583,11 +589,12 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -583,11 +589,12 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu power4_pmu = { static struct power_pmu power4_pmu = {
.name = "POWER4/4+",
.n_counter = 8, .n_counter = 8,
.max_alternatives = 5, .max_alternatives = 5,
.add_fields = 0x0000001100005555ull, .add_fields = 0x0000001100005555ul,
.test_adder = 0x0011083300000000ull, .test_adder = 0x0011083300000000ul,
.compute_mmcr = p4_compute_mmcr, .compute_mmcr = p4_compute_mmcr,
.get_constraint = p4_get_constraint, .get_constraint = p4_get_constraint,
.get_alternatives = p4_get_alternatives, .get_alternatives = p4_get_alternatives,
...@@ -596,3 +603,13 @@ struct power_pmu power4_pmu = { ...@@ -596,3 +603,13 @@ struct power_pmu power4_pmu = {
.generic_events = p4_generic_events, .generic_events = p4_generic_events,
.cache_events = &power4_cache_events, .cache_events = &power4_cache_events,
}; };
static int init_power4_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
return -ENODEV;
return register_power_pmu(&power4_pmu);
}
arch_initcall(init_power4_pmu);
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
...@@ -126,20 +128,21 @@ static const int grsel_shift[8] = { ...@@ -126,20 +128,21 @@ static const int grsel_shift[8] = {
}; };
/* Masks and values for using events from the various units */ /* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = { static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
[PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
[PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
[PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
[PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
[PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
}; };
static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) static int power5p_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, byte, unit, sh; int pmc, byte, unit, sh;
int bit, fmask; int bit, fmask;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) { if (pmc) {
...@@ -171,17 +174,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -171,17 +174,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
bit = event & 7; bit = event & 7;
fmask = (bit == 6)? 7: 3; fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit]; sh = grsel_shift[bit];
mask |= (u64)fmask << sh; mask |= (unsigned long)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
<< sh;
} }
/* Set byte lane select field */ /* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte); mask |= 0xfUL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte); value |= (unsigned long)unit << (24 - 4 * byte);
} }
if (pmc < 5) { if (pmc < 5) {
/* need a counter from PMC1-4 set */ /* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull; mask |= 0x8000000000000ul;
value |= 0x1000000000000ull; value |= 0x1000000000000ul;
} }
*maskp = mask; *maskp = mask;
*valp = value; *valp = value;
...@@ -452,10 +456,10 @@ static int power5p_marked_instr_event(u64 event) ...@@ -452,10 +456,10 @@ static int power5p_marked_instr_event(u64 event)
} }
static int power5p_compute_mmcr(u64 event[], int n_ev, static int power5p_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr1 = 0; unsigned long mmcr1 = 0;
u64 mmcra = 0; unsigned long mmcra = 0;
unsigned int pmc, unit, byte, psel; unsigned int pmc, unit, byte, psel;
unsigned int ttm; unsigned int ttm;
int i, isbus, bit, grsel; int i, isbus, bit, grsel;
...@@ -517,7 +521,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -517,7 +521,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
continue; continue;
if (ttmuse++) if (ttmuse++)
return -1; return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
} }
ttmuse = 0; ttmuse = 0;
for (; i <= PM_GRS; ++i) { for (; i <= PM_GRS; ++i) {
...@@ -525,7 +529,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -525,7 +529,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
continue; continue;
if (ttmuse++) if (ttmuse++)
return -1; return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
} }
if (ttmuse > 1) if (ttmuse > 1)
return -1; return -1;
...@@ -540,10 +544,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -540,10 +544,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
unit = PM_ISU0_ALT; unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) { } else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */ /* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
} }
ttm = unit >> 2; ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); mmcr1 |= (unsigned long)ttm
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
} }
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
...@@ -568,7 +573,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -568,7 +573,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
if (isbus && (byte & 2) && if (isbus && (byte & 2) &&
(psel == 8 || psel == 0x10 || psel == 0x28)) (psel == 8 || psel == 0x10 || psel == 0x28))
/* add events on higher-numbered bus */ /* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else { } else {
/* Instructions or run cycles on PMC5/6 */ /* Instructions or run cycles on PMC5/6 */
--pmc; --pmc;
...@@ -576,7 +581,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -576,7 +581,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
if (isbus && unit == PM_GRS) { if (isbus && unit == PM_GRS) {
bit = psel & 7; bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit]; mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
} }
if (power5p_marked_instr_event(event[i])) if (power5p_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE; mmcra |= MMCRA_SAMPLE_ENABLE;
...@@ -599,7 +604,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, ...@@ -599,7 +604,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
return 0; return 0;
} }
static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
if (pmc <= 3) if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
...@@ -654,11 +659,12 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -654,11 +659,12 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu power5p_pmu = { static struct power_pmu power5p_pmu = {
.name = "POWER5+/++",
.n_counter = 6, .n_counter = 6,
.max_alternatives = MAX_ALT, .max_alternatives = MAX_ALT,
.add_fields = 0x7000000000055ull, .add_fields = 0x7000000000055ul,
.test_adder = 0x3000040000000ull, .test_adder = 0x3000040000000ul,
.compute_mmcr = power5p_compute_mmcr, .compute_mmcr = power5p_compute_mmcr,
.get_constraint = power5p_get_constraint, .get_constraint = power5p_get_constraint,
.get_alternatives = power5p_get_alternatives, .get_alternatives = power5p_get_alternatives,
...@@ -669,3 +675,14 @@ struct power_pmu power5p_pmu = { ...@@ -669,3 +675,14 @@ struct power_pmu power5p_pmu = {
.generic_events = power5p_generic_events, .generic_events = power5p_generic_events,
.cache_events = &power5p_cache_events, .cache_events = &power5p_cache_events,
}; };
static int init_power5p_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
&& strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))
return -ENODEV;
return register_power_pmu(&power5p_pmu);
}
arch_initcall(init_power5p_pmu);
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for POWER5 (not POWER5++) * Bits in event code for POWER5 (not POWER5++)
...@@ -130,20 +132,21 @@ static const int grsel_shift[8] = { ...@@ -130,20 +132,21 @@ static const int grsel_shift[8] = {
}; };
/* Masks and values for using events from the various units */ /* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = { static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
[PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
[PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
[PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
[PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
[PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
}; };
static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) static int power5_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, byte, unit, sh; int pmc, byte, unit, sh;
int bit, fmask; int bit, fmask;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
int grp = -1; int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
...@@ -178,8 +181,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -178,8 +181,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
bit = event & 7; bit = event & 7;
fmask = (bit == 6)? 7: 3; fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit]; sh = grsel_shift[bit];
mask |= (u64)fmask << sh; mask |= (unsigned long)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
<< sh;
} }
/* /*
* Bus events on bytes 0 and 2 can be counted * Bus events on bytes 0 and 2 can be counted
...@@ -188,22 +192,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -188,22 +192,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
if (!pmc) if (!pmc)
grp = byte & 1; grp = byte & 1;
/* Set byte lane select field */ /* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte); mask |= 0xfUL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte); value |= (unsigned long)unit << (24 - 4 * byte);
} }
if (grp == 0) { if (grp == 0) {
/* increment PMC1/2 field */ /* increment PMC1/2 field */
mask |= 0x200000000ull; mask |= 0x200000000ul;
value |= 0x080000000ull; value |= 0x080000000ul;
} else if (grp == 1) { } else if (grp == 1) {
/* increment PMC3/4 field */ /* increment PMC3/4 field */
mask |= 0x40000000ull; mask |= 0x40000000ul;
value |= 0x10000000ull; value |= 0x10000000ul;
} }
if (pmc < 5) { if (pmc < 5) {
/* need a counter from PMC1-4 set */ /* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull; mask |= 0x8000000000000ul;
value |= 0x1000000000000ull; value |= 0x1000000000000ul;
} }
*maskp = mask; *maskp = mask;
*valp = value; *valp = value;
...@@ -383,10 +387,10 @@ static int power5_marked_instr_event(u64 event) ...@@ -383,10 +387,10 @@ static int power5_marked_instr_event(u64 event)
} }
static int power5_compute_mmcr(u64 event[], int n_ev, static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr1 = 0; unsigned long mmcr1 = 0;
u64 mmcra = 0; unsigned long mmcra = 0;
unsigned int pmc, unit, byte, psel; unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp; unsigned int ttm, grp;
int i, isbus, bit, grsel; int i, isbus, bit, grsel;
...@@ -457,7 +461,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -457,7 +461,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
continue; continue;
if (ttmuse++) if (ttmuse++)
return -1; return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
} }
ttmuse = 0; ttmuse = 0;
for (; i <= PM_GRS; ++i) { for (; i <= PM_GRS; ++i) {
...@@ -465,7 +469,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -465,7 +469,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
continue; continue;
if (ttmuse++) if (ttmuse++)
return -1; return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
} }
if (ttmuse > 1) if (ttmuse > 1)
return -1; return -1;
...@@ -480,10 +484,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -480,10 +484,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
unit = PM_ISU0_ALT; unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) { } else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */ /* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
} }
ttm = unit >> 2; ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); mmcr1 |= (unsigned long)ttm
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
} }
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
...@@ -513,7 +518,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -513,7 +518,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
--pmc; --pmc;
if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
/* add events on higher-numbered bus */ /* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else { } else {
/* Instructions or run cycles on PMC5/6 */ /* Instructions or run cycles on PMC5/6 */
--pmc; --pmc;
...@@ -521,7 +526,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -521,7 +526,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
if (isbus && unit == PM_GRS) { if (isbus && unit == PM_GRS) {
bit = psel & 7; bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit]; mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
} }
if (power5_marked_instr_event(event[i])) if (power5_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE; mmcra |= MMCRA_SAMPLE_ENABLE;
...@@ -541,7 +546,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, ...@@ -541,7 +546,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
return 0; return 0;
} }
static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
if (pmc <= 3) if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
...@@ -596,11 +601,12 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -596,11 +601,12 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu power5_pmu = { static struct power_pmu power5_pmu = {
.name = "POWER5",
.n_counter = 6, .n_counter = 6,
.max_alternatives = MAX_ALT, .max_alternatives = MAX_ALT,
.add_fields = 0x7000090000555ull, .add_fields = 0x7000090000555ul,
.test_adder = 0x3000490000000ull, .test_adder = 0x3000490000000ul,
.compute_mmcr = power5_compute_mmcr, .compute_mmcr = power5_compute_mmcr,
.get_constraint = power5_get_constraint, .get_constraint = power5_get_constraint,
.get_alternatives = power5_get_alternatives, .get_alternatives = power5_get_alternatives,
...@@ -609,3 +615,13 @@ struct power_pmu power5_pmu = { ...@@ -609,3 +615,13 @@ struct power_pmu power5_pmu = {
.generic_events = power5_generic_events, .generic_events = power5_generic_events,
.cache_events = &power5_cache_events, .cache_events = &power5_cache_events,
}; };
static int init_power5_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
return -ENODEV;
return register_power_pmu(&power5_pmu);
}
arch_initcall(init_power5_pmu);
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for POWER6 * Bits in event code for POWER6
...@@ -41,9 +43,9 @@ ...@@ -41,9 +43,9 @@
#define MMCR1_NESTSEL_SH 45 #define MMCR1_NESTSEL_SH 45
#define MMCR1_NESTSEL_MSK 0x7 #define MMCR1_NESTSEL_MSK 0x7
#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
#define MMCR1_PMC1_LLA ((u64)1 << 44) #define MMCR1_PMC1_LLA (1ul << 44)
#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) #define MMCR1_PMC1_LLA_VALUE (1ul << 39)
#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) #define MMCR1_PMC1_ADDR_SEL (1ul << 35)
#define MMCR1_PMC1SEL_SH 24 #define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff #define MMCR1_PMCSEL_MSK 0xff
...@@ -173,10 +175,10 @@ static int power6_marked_instr_event(u64 event) ...@@ -173,10 +175,10 @@ static int power6_marked_instr_event(u64 event)
* Assign PMC numbers and compute MMCR1 value for a set of events * Assign PMC numbers and compute MMCR1 value for a set of events
*/ */
static int p6_compute_mmcr(u64 event[], int n_ev, static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr1 = 0; unsigned long mmcr1 = 0;
u64 mmcra = 0; unsigned long mmcra = 0;
int i; int i;
unsigned int pmc, ev, b, u, s, psel; unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0; unsigned int ttmset = 0;
...@@ -215,7 +217,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, ...@@ -215,7 +217,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
/* check for conflict on this byte of event bus */ /* check for conflict on this byte of event bus */
if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
return -1; return -1;
mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
ttmset |= 1 << b; ttmset |= 1 << b;
if (u == 5) { if (u == 5) {
/* Nest events have a further mux */ /* Nest events have a further mux */
...@@ -224,7 +226,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, ...@@ -224,7 +226,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
MMCR1_NESTSEL(mmcr1) != s) MMCR1_NESTSEL(mmcr1) != s)
return -1; return -1;
ttmset |= 0x10; ttmset |= 0x10;
mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
} }
if (0x30 <= psel && psel <= 0x3d) { if (0x30 <= psel && psel <= 0x3d) {
/* these need the PMCx_ADDR_SEL bits */ /* these need the PMCx_ADDR_SEL bits */
...@@ -243,7 +245,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, ...@@ -243,7 +245,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
if (power6_marked_instr_event(event[i])) if (power6_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE; mmcra |= MMCRA_SAMPLE_ENABLE;
if (pmc < 4) if (pmc < 4)
mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
} }
mmcr[0] = 0; mmcr[0] = 0;
if (pmc_inuse & 1) if (pmc_inuse & 1)
...@@ -265,10 +267,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev, ...@@ -265,10 +267,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
* 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
* 32-34 select field: nest (subunit) event selector * 32-34 select field: nest (subunit) event selector
*/ */
static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) static int p6_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, byte, sh, subunit; int pmc, byte, sh, subunit;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) { if (pmc) {
...@@ -282,11 +285,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -282,11 +285,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
sh = byte * 4 + (16 - PM_UNIT_SH); sh = byte * 4 + (16 - PM_UNIT_SH);
mask |= PM_UNIT_MSKS << sh; mask |= PM_UNIT_MSKS << sh;
value |= (u64)(event & PM_UNIT_MSKS) << sh; value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
mask |= (u64)PM_SUBUNIT_MSK << 32; mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
value |= (u64)subunit << 32; value |= (unsigned long)subunit << 32;
} }
} }
if (pmc <= 4) { if (pmc <= 4) {
...@@ -458,7 +461,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) ...@@ -458,7 +461,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt; return nalt;
} }
static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
/* Set PMCxSEL to 0 to disable PMCx */ /* Set PMCxSEL to 0 to disable PMCx */
if (pmc <= 3) if (pmc <= 3)
...@@ -515,7 +518,8 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -515,7 +518,8 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu power6_pmu = { static struct power_pmu power6_pmu = {
.name = "POWER6",
.n_counter = 6, .n_counter = 6,
.max_alternatives = MAX_ALT, .max_alternatives = MAX_ALT,
.add_fields = 0x1555, .add_fields = 0x1555,
...@@ -530,3 +534,13 @@ struct power_pmu power6_pmu = { ...@@ -530,3 +534,13 @@ struct power_pmu power6_pmu = {
.generic_events = power6_generic_events, .generic_events = power6_generic_events,
.cache_events = &power6_cache_events, .cache_events = &power6_cache_events,
}; };
static int init_power6_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
return -ENODEV;
return register_power_pmu(&power6_pmu);
}
arch_initcall(init_power6_pmu);
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for POWER7 * Bits in event code for POWER7
...@@ -71,10 +73,11 @@ ...@@ -71,10 +73,11 @@
* 0-9: Count of events needing PMC1..PMC5 * 0-9: Count of events needing PMC1..PMC5
*/ */
static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) static int power7_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, sh; int pmc, sh;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) { if (pmc) {
...@@ -224,10 +227,10 @@ static int power7_marked_instr_event(u64 event) ...@@ -224,10 +227,10 @@ static int power7_marked_instr_event(u64 event)
} }
static int power7_compute_mmcr(u64 event[], int n_ev, static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr1 = 0; unsigned long mmcr1 = 0;
u64 mmcra = 0; unsigned long mmcra = 0;
unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0; unsigned int pmc_inuse = 0;
int i; int i;
...@@ -265,11 +268,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev, ...@@ -265,11 +268,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
--pmc; --pmc;
} }
if (pmc <= 3) { if (pmc <= 3) {
mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); mmcr1 |= (unsigned long) unit
mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); << (MMCR1_TTM0SEL_SH - 4 * pmc);
mmcr1 |= (unsigned long) combine
<< (MMCR1_PMC1_COMBINE_SH - pmc);
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
if (unit == 6) /* L2 events */ if (unit == 6) /* L2 events */
mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; mmcr1 |= (unsigned long) l2sel
<< MMCR1_L2SEL_SH;
} }
if (power7_marked_instr_event(event[i])) if (power7_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE; mmcra |= MMCRA_SAMPLE_ENABLE;
...@@ -287,10 +293,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev, ...@@ -287,10 +293,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
return 0; return 0;
} }
static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
if (pmc <= 3) if (pmc <= 3)
mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
} }
static int power7_generic_events[] = { static int power7_generic_events[] = {
...@@ -342,11 +348,12 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -342,11 +348,12 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu power7_pmu = { static struct power_pmu power7_pmu = {
.name = "POWER7",
.n_counter = 6, .n_counter = 6,
.max_alternatives = MAX_ALT + 1, .max_alternatives = MAX_ALT + 1,
.add_fields = 0x1555ull, .add_fields = 0x1555ul,
.test_adder = 0x3000ull, .test_adder = 0x3000ul,
.compute_mmcr = power7_compute_mmcr, .compute_mmcr = power7_compute_mmcr,
.get_constraint = power7_get_constraint, .get_constraint = power7_get_constraint,
.get_alternatives = power7_get_alternatives, .get_alternatives = power7_get_alternatives,
...@@ -355,3 +362,13 @@ struct power_pmu power7_pmu = { ...@@ -355,3 +362,13 @@ struct power_pmu power7_pmu = {
.generic_events = power7_generic_events, .generic_events = power7_generic_events,
.cache_events = &power7_cache_events, .cache_events = &power7_cache_events,
}; };
static int init_power7_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
return -ENODEV;
return register_power_pmu(&power7_pmu);
}
arch_initcall(init_power7_pmu);
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
*/ */
#include <linux/string.h> #include <linux/string.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/string.h>
#include <asm/reg.h> #include <asm/reg.h>
#include <asm/cputable.h>
/* /*
* Bits in event code for PPC970 * Bits in event code for PPC970
...@@ -183,7 +185,7 @@ static int p970_marked_instr_event(u64 event) ...@@ -183,7 +185,7 @@ static int p970_marked_instr_event(u64 event)
} }
/* Masks and values for using events from the various units */ /* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = { static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
[PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
[PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
...@@ -192,10 +194,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { ...@@ -192,10 +194,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_STS] = { 0x380000000000ull, 0x310000000000ull }, [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
}; };
static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) static int p970_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{ {
int pmc, byte, unit, sh, spcsel; int pmc, byte, unit, sh, spcsel;
u64 mask = 0, value = 0; unsigned long mask = 0, value = 0;
int grp = -1; int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
...@@ -222,7 +225,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -222,7 +225,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
grp = byte & 1; grp = byte & 1;
/* Set byte lane select field */ /* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte); mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte); value |= (unsigned long)unit << (28 - 4 * byte);
} }
if (grp == 0) { if (grp == 0) {
/* increment PMC1/2/5/6 field */ /* increment PMC1/2/5/6 field */
...@@ -236,7 +239,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) ...@@ -236,7 +239,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
if (spcsel) { if (spcsel) {
mask |= 3ull << 48; mask |= 3ull << 48;
value |= (u64)spcsel << 48; value |= (unsigned long)spcsel << 48;
} }
*maskp = mask; *maskp = mask;
*valp = value; *valp = value;
...@@ -257,9 +260,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) ...@@ -257,9 +260,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
} }
static int p970_compute_mmcr(u64 event[], int n_ev, static int p970_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[]) unsigned int hwc[], unsigned long mmcr[])
{ {
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel; unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp; unsigned int ttm, grp;
unsigned int pmc_inuse = 0; unsigned int pmc_inuse = 0;
...@@ -320,7 +323,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, ...@@ -320,7 +323,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
continue; continue;
ttm = unitmap[i]; ttm = unitmap[i];
++ttmuse[(ttm >> 2) & 1]; ++ttmuse[(ttm >> 2) & 1];
mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
} }
/* Check only one unit per TTMx */ /* Check only one unit per TTMx */
if (ttmuse[0] > 1 || ttmuse[1] > 1) if (ttmuse[0] > 1 || ttmuse[1] > 1)
...@@ -340,7 +343,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, ...@@ -340,7 +343,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
if (unit == PM_LSU1L && byte >= 2) if (unit == PM_LSU1L && byte >= 2)
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
} }
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); mmcr1 |= (unsigned long)ttm
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
} }
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
...@@ -386,7 +390,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, ...@@ -386,7 +390,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
for (pmc = 0; pmc < 2; ++pmc) for (pmc = 0; pmc < 2; ++pmc)
mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
for (; pmc < 8; ++pmc) for (; pmc < 8; ++pmc)
mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); mmcr1 |= (unsigned long)pmcsel[pmc]
<< (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc_inuse & 1) if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE; mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe) if (pmc_inuse & 0xfe)
...@@ -401,7 +406,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, ...@@ -401,7 +406,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
return 0; return 0;
} }
static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
{ {
int shift, i; int shift, i;
...@@ -467,7 +472,8 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { ...@@ -467,7 +472,8 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
}, },
}; };
struct power_pmu ppc970_pmu = { static struct power_pmu ppc970_pmu = {
.name = "PPC970/FX/MP",
.n_counter = 8, .n_counter = 8,
.max_alternatives = 2, .max_alternatives = 2,
.add_fields = 0x001100005555ull, .add_fields = 0x001100005555ull,
...@@ -480,3 +486,14 @@ struct power_pmu ppc970_pmu = { ...@@ -480,3 +486,14 @@ struct power_pmu ppc970_pmu = {
.generic_events = ppc970_generic_events, .generic_events = ppc970_generic_events,
.cache_events = &ppc970_cache_events, .cache_events = &ppc970_cache_events,
}; };
static int init_ppc970_pmu(void)
{
if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
&& strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))
return -ENODEV;
return register_power_pmu(&ppc970_pmu);
}
arch_initcall(init_ppc970_pmu);
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include <linux/posix-timers.h> #include <linux/posix-timers.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/perf_counter.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void) ...@@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void)
} }
#endif /* CONFIG_PPC_ISERIES */ #endif /* CONFIG_PPC_ISERIES */
#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
DEFINE_PER_CPU(u8, perf_counter_pending);
void set_perf_counter_pending(void)
{
get_cpu_var(perf_counter_pending) = 1;
set_dec(1);
put_cpu_var(perf_counter_pending);
}
#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending)
#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0
#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
#define test_perf_counter_pending() 0
#define clear_perf_counter_pending()
#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
/* /*
* For iSeries shared processors, we have to let the hypervisor * For iSeries shared processors, we have to let the hypervisor
* set the hardware decrementer. We set a virtual decrementer * set the hardware decrementer. We set a virtual decrementer
...@@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs) ...@@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs)
set_dec(DECREMENTER_MAX); set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC32
if (test_perf_counter_pending()) {
clear_perf_counter_pending();
perf_counter_do_pending();
}
if (atomic_read(&ppc_n_lost_interrupts) != 0) if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs); do_IRQ(regs);
#endif #endif
......
config PPC64 config PPC64
bool "64-bit kernel" bool "64-bit kernel"
default n default n
select HAVE_PERF_COUNTERS select PPC_HAVE_PMU_SUPPORT
help help
This option selects whether a 32-bit or a 64-bit kernel This option selects whether a 32-bit or a 64-bit kernel
will be built. will be built.
...@@ -78,6 +78,7 @@ config POWER4_ONLY ...@@ -78,6 +78,7 @@ config POWER4_ONLY
config 6xx config 6xx
def_bool y def_bool y
depends on PPC32 && PPC_BOOK3S depends on PPC32 && PPC_BOOK3S
select PPC_HAVE_PMU_SUPPORT
config POWER3 config POWER3
bool bool
...@@ -246,6 +247,15 @@ config VIRT_CPU_ACCOUNTING ...@@ -246,6 +247,15 @@ config VIRT_CPU_ACCOUNTING
If in doubt, say Y here. If in doubt, say Y here.
config PPC_HAVE_PMU_SUPPORT
bool
config PPC_PERF_CTRS
def_bool y
depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT
help
This enables the powerpc-specific perf_counter back-end.
config SMP config SMP
depends on PPC_STD_MMU || FSL_BOOKE depends on PPC_STD_MMU || FSL_BOOKE
bool "Symmetric multi-processing support" bool "Symmetric multi-processing support"
......
...@@ -84,11 +84,6 @@ union cpuid10_edx { ...@@ -84,11 +84,6 @@ union cpuid10_edx {
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
extern void set_perf_counter_pending(void);
#define clear_perf_counter_pending() do { } while (0)
#define test_perf_counter_pending() (0)
#ifdef CONFIG_PERF_COUNTERS #ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void); extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void); extern void perf_counters_lapic_init(void);
......
...@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); ...@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
#endif #endif
#if defined(CONFIG_HIGHPTE) #if defined(CONFIG_HIGHPTE)
#define __KM_PTE \
(in_nmi() ? KM_NMI_PTE : \
in_irq() ? KM_IRQ_PTE : \
KM_PTE0)
#define pte_offset_map(dir, address) \ #define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
pte_index((address))) pte_index((address)))
#define pte_offset_map_nested(dir, address) \ #define pte_offset_map_nested(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
pte_index((address))) pte_index((address)))
#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
#else #else
#define pte_offset_map(dir, address) \ #define pte_offset_map(dir, address) \
......
...@@ -25,7 +25,12 @@ ...@@ -25,7 +25,12 @@
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(-1UL) #define KERNEL_DS MAKE_MM_SEG(-1UL)
#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
#ifdef CONFIG_X86_32
# define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
#else
# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK)
#endif
#define get_ds() (KERNEL_DS) #define get_ds() (KERNEL_DS)
#define get_fs() (current_thread_info()->addr_limit) #define get_fs() (current_thread_info()->addr_limit)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
...@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) ...@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
return event & CORE_EVNTSEL_MASK; return event & CORE_EVNTSEL_MASK;
} }
static const u64 amd_0f_hw_cache_event_ids static const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = [PERF_COUNT_HW_CACHE_RESULT_MAX] =
{ {
[ C(L1D) ] = { [ C(L1D) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
}, },
}, },
[ C(L1I ) ] = { [ C(L1I ) ] = {
...@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids ...@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
[ C(RESULT_MISS) ] = -1, [ C(RESULT_MISS) ] = -1,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0,
}, },
}, },
[ C(LL ) ] = { [ C(LL ) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
...@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids ...@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
}, },
[ C(DTLB) ] = { [ C(DTLB) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0, [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0,
...@@ -1223,6 +1224,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -1223,6 +1224,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
if (!intel_pmu_save_and_restart(counter)) if (!intel_pmu_save_and_restart(counter))
continue; continue;
data.period = counter->hw.last_period;
if (perf_counter_overflow(counter, 1, &data)) if (perf_counter_overflow(counter, 1, &data))
intel_pmu_disable_counter(&counter->hw, bit); intel_pmu_disable_counter(&counter->hw, bit);
} }
...@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void) ...@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void) static int amd_pmu_init(void)
{ {
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
x86_pmu = amd_pmu; x86_pmu = amd_pmu;
switch (boot_cpu_data.x86) { /* Events are common for all AMDs */
case 0x0f: memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
case 0x10:
case 0x11:
memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
sizeof(hw_cache_event_ids)); sizeof(hw_cache_event_ids));
pr_cont("AMD Family 0f/10/11 events, ");
break;
}
return 0; return 0;
} }
...@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) ...@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
*/ */
static inline static inline
void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{ {
if (entry->nr < MAX_STACK_DEPTH) if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip; entry->ip[entry->nr++] = ip;
} }
...@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg) ...@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name) static int backtrace_stack(void *data, char *name)
{ {
/* Don't bother with IRQ stacks for now */ /* Process all stacks: */
return -1; return 0;
} }
static void backtrace_address(void *data, unsigned long addr, int reliable) static void backtrace_address(void *data, unsigned long addr, int reliable)
...@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = { ...@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
.address = backtrace_address, .address = backtrace_address,
}; };
#include "../dumpstack.h"
static void static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{ {
unsigned long bp; callchain_store(entry, PERF_CONTEXT_KERNEL);
char *stack; callchain_store(entry, regs->ip);
int nr = entry->nr;
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
}
callchain_store(entry, instruction_pointer(regs)); /*
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
*/
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
unsigned long offset, addr = (unsigned long)from;
int type = in_nmi() ? KM_NMI : KM_IRQ0;
unsigned long size, len = 0;
struct page *page;
void *map;
int ret;
stack = ((char *)regs + sizeof(struct pt_regs)); do {
#ifdef CONFIG_FRAME_POINTER ret = __get_user_pages_fast(addr, 1, 0, &page);
bp = frame_pointer(regs); if (!ret)
#else break;
bp = 0;
#endif
dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); offset = addr & (PAGE_SIZE - 1);
size = min(PAGE_SIZE - offset, n - len);
entry->kernel = entry->nr - nr; map = kmap_atomic(page, type);
} memcpy(to, map+offset, size);
kunmap_atomic(map, type);
put_page(page);
len += size;
to += size;
addr += size;
struct stack_frame { } while (len < n);
const void __user *next_fp;
unsigned long return_address; return len;
}; }
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{ {
int ret; unsigned long bytes;
if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
return 0;
ret = 1; return bytes == sizeof(*frame);
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
pagefault_enable();
return ret;
} }
static void static void
...@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) ...@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
{ {
struct stack_frame frame; struct stack_frame frame;
const void __user *fp; const void __user *fp;
int nr = entry->nr;
regs = (struct pt_regs *)current->thread.sp0 - 1; if (!user_mode(regs))
regs = task_pt_regs(current);
fp = (void __user *)regs->bp; fp = (void __user *)regs->bp;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip); callchain_store(entry, regs->ip);
while (entry->nr < MAX_STACK_DEPTH) { while (entry->nr < PERF_MAX_STACK_DEPTH) {
frame.next_fp = NULL; frame.next_frame = NULL;
frame.return_address = 0; frame.return_address = 0;
if (!copy_stack_frame(fp, &frame)) if (!copy_stack_frame(fp, &frame))
break; break;
if ((unsigned long)fp < user_stack_pointer(regs)) if ((unsigned long)fp < regs->sp)
break; break;
callchain_store(entry, frame.return_address); callchain_store(entry, frame.return_address);
fp = frame.next_fp; fp = frame.next_frame;
} }
entry->user = entry->nr - nr;
} }
static void static void
...@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) ...@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
entry = &__get_cpu_var(irq_entry); entry = &__get_cpu_var(irq_entry);
entry->nr = 0; entry->nr = 0;
entry->hv = 0;
entry->kernel = 0;
entry->user = 0;
perf_do_callchain(regs, entry); perf_do_callchain(regs, entry);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
static inline pte_t gup_get_pte(pte_t *ptep) static inline pte_t gup_get_pte(pte_t *ptep)
{ {
#ifndef CONFIG_X86_PAE #ifndef CONFIG_X86_PAE
return *ptep; return ACCESS_ONCE(*ptep);
#else #else
/* /*
* With get_user_pages_fast, we walk down the pagetables without taking * With get_user_pages_fast, we walk down the pagetables without taking
...@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, ...@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1; return 1;
} }
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
unsigned long flags;
pgd_t *pgdp;
int nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
(void __user *)start, len)))
return 0;
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
* important workloads (eg. DB2), and whether limiting the batch size
* will decrease performance.
*
* It seems like we're in the clear for the moment. Direct-IO is
* the main guy that batches up lots of get_user_pages, and even
* they are limited to 64-at-a-time which is not so many.
*/
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables and pages from being freed on x86.
*
* So long as we atomically load page table pointers versus teardown
* (which we do on x86, with the above PAE exception), we can follow the
* address down to the the page and take a ref on it.
*/
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
/** /**
* get_user_pages_fast() - pin user pages in memory * get_user_pages_fast() - pin user pages in memory
* @start: starting user address * @start: starting user address
......
...@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { ...@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_delete = anon_inodefs_delete_dentry, .d_delete = anon_inodefs_delete_dentry,
}; };
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
* anon inodes.
*/
static int anon_set_page_dirty(struct page *page)
{
return 0;
};
static const struct address_space_operations anon_aops = {
.set_page_dirty = anon_set_page_dirty,
};
/** /**
* anon_inode_getfd - creates a new file instance by hooking it up to an * anon_inode_getfd - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class" * anonymous inode, and a dentry that describe the "class"
...@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void) ...@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void)
inode->i_fop = &anon_inode_fops; inode->i_fop = &anon_inode_fops;
inode->i_mapping->a_ops = &anon_aops;
/* /*
* Mark the inode dirty from the very beginning, * Mark the inode dirty from the very beginning,
* that way it will never be moved to the dirty * that way it will never be moved to the dirty
......
...@@ -24,7 +24,10 @@ D(12) KM_SOFTIRQ1, ...@@ -24,7 +24,10 @@ D(12) KM_SOFTIRQ1,
D(13) KM_SYNC_ICACHE, D(13) KM_SYNC_ICACHE,
D(14) KM_SYNC_DCACHE, D(14) KM_SYNC_DCACHE,
D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
D(16) KM_TYPE_NR D(16) KM_IRQ_PTE,
D(17) KM_NMI,
D(18) KM_NMI_PTE,
D(19) KM_TYPE_NR
}; };
#undef D #undef D
......
...@@ -853,6 +853,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, ...@@ -853,6 +853,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
struct vm_area_struct **pprev, unsigned long start, struct vm_area_struct **pprev, unsigned long start,
unsigned long end, unsigned long newflags); unsigned long end, unsigned long newflags);
/*
* doesn't attempt to fault and will return short.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
/* /*
* A callback you can register to apply pressure to ageable caches. * A callback you can register to apply pressure to ageable caches.
* *
......
...@@ -236,10 +236,16 @@ struct perf_counter_mmap_page { ...@@ -236,10 +236,16 @@ struct perf_counter_mmap_page {
/* /*
* Control data for the mmap() data buffer. * Control data for the mmap() data buffer.
* *
* User-space reading this value should issue an rmb(), on SMP capable * User-space reading the @data_head value should issue an rmb(), on
* platforms, after reading this value -- see perf_counter_wakeup(). * SMP capable platforms, after reading this value -- see
* perf_counter_wakeup().
*
* When the mapping is PROT_WRITE the @data_tail value should be
* written by userspace to reflect the last read data. In this case
* the kernel will not over-write unread data.
*/ */
__u64 data_head; /* head in the data section */ __u64 data_head; /* head in the data section */
__u64 data_tail; /* user-space written tail */
}; };
#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0)
...@@ -273,6 +279,15 @@ enum perf_event_type { ...@@ -273,6 +279,15 @@ enum perf_event_type {
*/ */
PERF_EVENT_MMAP = 1, PERF_EVENT_MMAP = 1,
/*
* struct {
* struct perf_event_header header;
* u64 id;
* u64 lost;
* };
*/
PERF_EVENT_LOST = 2,
/* /*
* struct { * struct {
* struct perf_event_header header; * struct perf_event_header header;
...@@ -313,30 +328,39 @@ enum perf_event_type { ...@@ -313,30 +328,39 @@ enum perf_event_type {
/* /*
* When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
* will be PERF_RECORD_* * will be PERF_SAMPLE_*
* *
* struct { * struct {
* struct perf_event_header header; * struct perf_event_header header;
* *
* { u64 ip; } && PERF_RECORD_IP * { u64 ip; } && PERF_SAMPLE_IP
* { u32 pid, tid; } && PERF_RECORD_TID * { u32 pid, tid; } && PERF_SAMPLE_TID
* { u64 time; } && PERF_RECORD_TIME * { u64 time; } && PERF_SAMPLE_TIME
* { u64 addr; } && PERF_RECORD_ADDR * { u64 addr; } && PERF_SAMPLE_ADDR
* { u64 config; } && PERF_RECORD_CONFIG * { u64 config; } && PERF_SAMPLE_CONFIG
* { u32 cpu, res; } && PERF_RECORD_CPU * { u32 cpu, res; } && PERF_SAMPLE_CPU
* *
* { u64 nr; * { u64 nr;
* { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
* *
* { u16 nr, * { u64 nr,
* hv, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
* kernel,
* user;
* u64 ips[nr]; } && PERF_RECORD_CALLCHAIN
* }; * };
*/ */
}; };
enum perf_callchain_context {
PERF_CONTEXT_HV = (__u64)-32,
PERF_CONTEXT_KERNEL = (__u64)-128,
PERF_CONTEXT_USER = (__u64)-512,
PERF_CONTEXT_GUEST = (__u64)-2048,
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
PERF_CONTEXT_GUEST_USER = (__u64)-2560,
PERF_CONTEXT_MAX = (__u64)-4095,
};
#ifdef __KERNEL__ #ifdef __KERNEL__
/* /*
* Kernel-internal data types and definitions: * Kernel-internal data types and definitions:
...@@ -356,6 +380,13 @@ enum perf_event_type { ...@@ -356,6 +380,13 @@ enum perf_event_type {
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#define PERF_MAX_STACK_DEPTH 255
struct perf_callchain_entry {
__u64 nr;
__u64 ip[PERF_MAX_STACK_DEPTH];
};
struct task_struct; struct task_struct;
/** /**
...@@ -414,6 +445,7 @@ struct file; ...@@ -414,6 +445,7 @@ struct file;
struct perf_mmap_data { struct perf_mmap_data {
struct rcu_head rcu_head; struct rcu_head rcu_head;
int nr_pages; /* nr of data pages */ int nr_pages; /* nr of data pages */
int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */ int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */ atomic_t poll; /* POLL_ for wakeups */
...@@ -423,8 +455,8 @@ struct perf_mmap_data { ...@@ -423,8 +455,8 @@ struct perf_mmap_data {
atomic_long_t done_head; /* completed head */ atomic_long_t done_head; /* completed head */
atomic_t lock; /* concurrent writes */ atomic_t lock; /* concurrent writes */
atomic_t wakeup; /* needs a wakeup */ atomic_t wakeup; /* needs a wakeup */
atomic_t lost; /* nr records lost */
struct perf_counter_mmap_page *user_page; struct perf_counter_mmap_page *user_page;
void *data_pages[0]; void *data_pages[0];
...@@ -604,6 +636,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); ...@@ -604,6 +636,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
extern int perf_counter_init_task(struct task_struct *child); extern int perf_counter_init_task(struct task_struct *child);
extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child);
extern void perf_counter_free_task(struct task_struct *task); extern void perf_counter_free_task(struct task_struct *task);
extern void set_perf_counter_pending(void);
extern void perf_counter_do_pending(void); extern void perf_counter_do_pending(void);
extern void perf_counter_print_debug(void); extern void perf_counter_print_debug(void);
extern void __perf_disable(void); extern void __perf_disable(void);
...@@ -649,18 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) ...@@ -649,18 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma)
extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_comm(struct task_struct *tsk);
extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk);
extern void perf_counter_task_migration(struct task_struct *task, int cpu);
#define MAX_STACK_DEPTH 255
struct perf_callchain_entry {
u16 nr;
u16 hv;
u16 kernel;
u16 user;
u64 ip[MAX_STACK_DEPTH];
};
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
extern int sysctl_perf_counter_paranoid; extern int sysctl_perf_counter_paranoid;
...@@ -701,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { } ...@@ -701,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_comm(struct task_struct *tsk) { }
static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { }
static inline void perf_counter_init(void) { } static inline void perf_counter_init(void) { }
static inline void perf_counter_task_migration(struct task_struct *task,
int cpu) { }
#endif #endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
This diff is collapsed.
...@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) ...@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
if (task_hot(p, old_rq->clock, NULL)) if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations); schedstat_inc(p, se.nr_forced2_migrations);
#endif #endif
perf_counter_task_migration(p, new_cpu); perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
1, 1, NULL, 0);
} }
p->se.vruntime -= old_cfsrq->min_vruntime - p->se.vruntime -= old_cfsrq->min_vruntime -
new_cfsrq->min_vruntime; new_cfsrq->min_vruntime;
......
...@@ -157,10 +157,15 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') ...@@ -157,10 +157,15 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
# If we're on a 64-bit kernel, use -m64
ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
M64 := -m64
endif
# CFLAGS and LDFLAGS are for the users to override from the command line. # CFLAGS and LDFLAGS are for the users to override from the command line.
CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6
LDFLAGS = -lpthread -lrt -lelf LDFLAGS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS) ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS) ALL_LDFLAGS = $(LDFLAGS)
STRIP ?= strip STRIP ?= strip
...@@ -285,6 +290,7 @@ LIB_FILE=libperf.a ...@@ -285,6 +290,7 @@ LIB_FILE=libperf.a
LIB_H += ../../include/linux/perf_counter.h LIB_H += ../../include/linux/perf_counter.h
LIB_H += perf.h LIB_H += perf.h
LIB_H += types.h
LIB_H += util/list.h LIB_H += util/list.h
LIB_H += util/rbtree.h LIB_H += util/rbtree.h
LIB_H += util/levenshtein.h LIB_H += util/levenshtein.h
......
This diff is collapsed.
...@@ -37,33 +37,37 @@ static pid_t target_pid = -1; ...@@ -37,33 +37,37 @@ static pid_t target_pid = -1;
static int inherit = 1; static int inherit = 1;
static int force = 0; static int force = 0;
static int append_file = 0; static int append_file = 0;
static int call_graph = 0;
static int verbose = 0; static int verbose = 0;
static long samples; static long samples;
static struct timeval last_read; static struct timeval last_read;
static struct timeval this_read; static struct timeval this_read;
static __u64 bytes_written; static u64 bytes_written;
static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
static int nr_poll; static int nr_poll;
static int nr_cpu; static int nr_cpu;
static int file_new = 1;
static struct perf_file_header file_header;
struct mmap_event { struct mmap_event {
struct perf_event_header header; struct perf_event_header header;
__u32 pid; u32 pid;
__u32 tid; u32 tid;
__u64 start; u64 start;
__u64 len; u64 len;
__u64 pgoff; u64 pgoff;
char filename[PATH_MAX]; char filename[PATH_MAX];
}; };
struct comm_event { struct comm_event {
struct perf_event_header header; struct perf_event_header header;
__u32 pid; u32 pid;
__u32 tid; u32 tid;
char comm[16]; char comm[16];
}; };
...@@ -77,10 +81,10 @@ struct mmap_data { ...@@ -77,10 +81,10 @@ struct mmap_data {
static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
static unsigned int mmap_read_head(struct mmap_data *md) static unsigned long mmap_read_head(struct mmap_data *md)
{ {
struct perf_counter_mmap_page *pc = md->base; struct perf_counter_mmap_page *pc = md->base;
int head; long head;
head = pc->data_head; head = pc->data_head;
rmb(); rmb();
...@@ -88,6 +92,32 @@ static unsigned int mmap_read_head(struct mmap_data *md) ...@@ -88,6 +92,32 @@ static unsigned int mmap_read_head(struct mmap_data *md)
return head; return head;
} }
static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
{
struct perf_counter_mmap_page *pc = md->base;
/*
* ensure all reads are done before we write the tail out.
*/
/* mb(); */
pc->data_tail = tail;
}
static void write_output(void *buf, size_t size)
{
while (size) {
int ret = write(output, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
bytes_written += ret;
}
}
static void mmap_read(struct mmap_data *md) static void mmap_read(struct mmap_data *md)
{ {
unsigned int head = mmap_read_head(md); unsigned int head = mmap_read_head(md);
...@@ -108,7 +138,7 @@ static void mmap_read(struct mmap_data *md) ...@@ -108,7 +138,7 @@ static void mmap_read(struct mmap_data *md)
* In either case, truncate and restart at head. * In either case, truncate and restart at head.
*/ */
diff = head - old; diff = head - old;
if (diff > md->mask / 2 || diff < 0) { if (diff < 0) {
struct timeval iv; struct timeval iv;
unsigned long msecs; unsigned long msecs;
...@@ -136,36 +166,17 @@ static void mmap_read(struct mmap_data *md) ...@@ -136,36 +166,17 @@ static void mmap_read(struct mmap_data *md)
size = md->mask + 1 - (old & md->mask); size = md->mask + 1 - (old & md->mask);
old += size; old += size;
while (size) { write_output(buf, size);
int ret = write(output, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
bytes_written += ret;
}
} }
buf = &data[old & md->mask]; buf = &data[old & md->mask];
size = head - old; size = head - old;
old += size; old += size;
while (size) { write_output(buf, size);
int ret = write(output, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
bytes_written += ret;
}
md->prev = old; md->prev = old;
mmap_write_tail(md, old);
} }
static volatile int done = 0; static volatile int done = 0;
...@@ -191,7 +202,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) ...@@ -191,7 +202,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
struct comm_event comm_ev; struct comm_event comm_ev;
char filename[PATH_MAX]; char filename[PATH_MAX];
char bf[BUFSIZ]; char bf[BUFSIZ];
int fd, ret; int fd;
size_t size; size_t size;
char *field, *sep; char *field, *sep;
DIR *tasks; DIR *tasks;
...@@ -201,8 +212,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full) ...@@ -201,8 +212,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
fd = open(filename, O_RDONLY); fd = open(filename, O_RDONLY);
if (fd < 0) { if (fd < 0) {
/*
* We raced with a task exiting - just return:
*/
if (verbose)
fprintf(stderr, "couldn't open %s\n", filename); fprintf(stderr, "couldn't open %s\n", filename);
exit(EXIT_FAILURE); return;
} }
if (read(fd, bf, sizeof(bf)) < 0) { if (read(fd, bf, sizeof(bf)) < 0) {
fprintf(stderr, "couldn't read %s\n", filename); fprintf(stderr, "couldn't read %s\n", filename);
...@@ -223,17 +238,13 @@ static void pid_synthesize_comm_event(pid_t pid, int full) ...@@ -223,17 +238,13 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.pid = pid; comm_ev.pid = pid;
comm_ev.header.type = PERF_EVENT_COMM; comm_ev.header.type = PERF_EVENT_COMM;
size = ALIGN(size, sizeof(__u64)); size = ALIGN(size, sizeof(u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
if (!full) { if (!full) {
comm_ev.tid = pid; comm_ev.tid = pid;
ret = write(output, &comm_ev, comm_ev.header.size); write_output(&comm_ev, comm_ev.header.size);
if (ret < 0) {
perror("failed to write");
exit(-1);
}
return; return;
} }
...@@ -248,11 +259,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) ...@@ -248,11 +259,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.tid = pid; comm_ev.tid = pid;
ret = write(output, &comm_ev, comm_ev.header.size); write_output(&comm_ev, comm_ev.header.size);
if (ret < 0) {
perror("failed to write");
exit(-1);
}
} }
closedir(tasks); closedir(tasks);
return; return;
...@@ -272,8 +279,12 @@ static void pid_synthesize_mmap_samples(pid_t pid) ...@@ -272,8 +279,12 @@ static void pid_synthesize_mmap_samples(pid_t pid)
fp = fopen(filename, "r"); fp = fopen(filename, "r");
if (fp == NULL) { if (fp == NULL) {
/*
* We raced with a task exiting - just return:
*/
if (verbose)
fprintf(stderr, "couldn't open %s\n", filename); fprintf(stderr, "couldn't open %s\n", filename);
exit(EXIT_FAILURE); return;
} }
while (1) { while (1) {
char bf[BUFSIZ], *pbf = bf; char bf[BUFSIZ], *pbf = bf;
...@@ -304,17 +315,14 @@ static void pid_synthesize_mmap_samples(pid_t pid) ...@@ -304,17 +315,14 @@ static void pid_synthesize_mmap_samples(pid_t pid)
size = strlen(execname); size = strlen(execname);
execname[size - 1] = '\0'; /* Remove \n */ execname[size - 1] = '\0'; /* Remove \n */
memcpy(mmap_ev.filename, execname, size); memcpy(mmap_ev.filename, execname, size);
size = ALIGN(size, sizeof(__u64)); size = ALIGN(size, sizeof(u64));
mmap_ev.len -= mmap_ev.start; mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) - mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size)); (sizeof(mmap_ev.filename) - size));
mmap_ev.pid = pid; mmap_ev.pid = pid;
mmap_ev.tid = pid; mmap_ev.tid = pid;
if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { write_output(&mmap_ev, mmap_ev.header.size);
perror("failed to write");
exit(-1);
}
} }
} }
...@@ -351,11 +359,25 @@ static void create_counter(int counter, int cpu, pid_t pid) ...@@ -351,11 +359,25 @@ static void create_counter(int counter, int cpu, pid_t pid)
int track = 1; int track = 1;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (freq) { if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD; attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1; attr->freq = 1;
attr->sample_freq = freq; attr->sample_freq = freq;
} }
if (call_graph)
attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
if (file_new) {
file_header.sample_type = attr->sample_type;
} else {
if (file_header.sample_type != attr->sample_type) {
fprintf(stderr, "incompatible append\n");
exit(-1);
}
}
attr->mmap = track; attr->mmap = track;
attr->comm = track; attr->comm = track;
attr->inherit = (cpu < 0) && inherit; attr->inherit = (cpu < 0) && inherit;
...@@ -410,7 +432,7 @@ static void create_counter(int counter, int cpu, pid_t pid) ...@@ -410,7 +432,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
mmap_array[nr_cpu][counter].prev = 0; mmap_array[nr_cpu][counter].prev = 0;
mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
error("failed to mmap with %d (%s)\n", errno, strerror(errno)); error("failed to mmap with %d (%s)\n", errno, strerror(errno));
exit(-1); exit(-1);
...@@ -435,6 +457,14 @@ static void open_counters(int cpu, pid_t pid) ...@@ -435,6 +457,14 @@ static void open_counters(int cpu, pid_t pid)
nr_cpu++; nr_cpu++;
} }
static void atexit_header(void)
{
file_header.data_size += bytes_written;
if (pwrite(output, &file_header, sizeof(file_header), 0) == -1)
perror("failed to write on file headers");
}
static int __cmd_record(int argc, const char **argv) static int __cmd_record(int argc, const char **argv)
{ {
int i, counter; int i, counter;
...@@ -448,6 +478,10 @@ static int __cmd_record(int argc, const char **argv) ...@@ -448,6 +478,10 @@ static int __cmd_record(int argc, const char **argv)
assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0); assert(nr_cpus >= 0);
atexit(sig_atexit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
if (!stat(output_name, &st) && !force && !append_file) { if (!stat(output_name, &st) && !force && !append_file) {
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
output_name); output_name);
...@@ -456,7 +490,7 @@ static int __cmd_record(int argc, const char **argv) ...@@ -456,7 +490,7 @@ static int __cmd_record(int argc, const char **argv)
flags = O_CREAT|O_RDWR; flags = O_CREAT|O_RDWR;
if (append_file) if (append_file)
flags |= O_APPEND; file_new = 0;
else else
flags |= O_TRUNC; flags |= O_TRUNC;
...@@ -466,15 +500,22 @@ static int __cmd_record(int argc, const char **argv) ...@@ -466,15 +500,22 @@ static int __cmd_record(int argc, const char **argv)
exit(-1); exit(-1);
} }
if (!file_new) {
if (read(output, &file_header, sizeof(file_header)) == -1) {
perror("failed to read file headers");
exit(-1);
}
lseek(output, file_header.data_size, SEEK_CUR);
}
atexit(atexit_header);
if (!system_wide) { if (!system_wide) {
open_counters(-1, target_pid != -1 ? target_pid : getpid()); open_counters(-1, target_pid != -1 ? target_pid : getpid());
} else for (i = 0; i < nr_cpus; i++) } else for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid); open_counters(i, target_pid);
atexit(sig_atexit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
if (target_pid == -1 && argc) { if (target_pid == -1 && argc) {
pid = fork(); pid = fork();
if (pid < 0) if (pid < 0)
...@@ -555,6 +596,8 @@ static const struct option options[] = { ...@@ -555,6 +596,8 @@ static const struct option options[] = {
"profile at this frequency"), "profile at this frequency"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages, OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"), "number of mmap data pages"),
OPT_BOOLEAN('g', "call-graph", &call_graph,
"do call-graph (stack chain/backtrace) recording"),
OPT_BOOLEAN('v', "verbose", &verbose, OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"), "be more verbose (show counter open errors, etc)"),
OPT_END() OPT_END()
......
This diff is collapsed.
This diff is collapsed.
...@@ -54,7 +54,7 @@ static int system_wide = 0; ...@@ -54,7 +54,7 @@ static int system_wide = 0;
static int default_interval = 100000; static int default_interval = 100000;
static __u64 count_filter = 5; static u64 count_filter = 5;
static int print_entries = 15; static int print_entries = 15;
static int target_pid = -1; static int target_pid = -1;
...@@ -79,8 +79,8 @@ static int dump_symtab; ...@@ -79,8 +79,8 @@ static int dump_symtab;
* Symbols * Symbols
*/ */
static __u64 min_ip; static u64 min_ip;
static __u64 max_ip = -1ll; static u64 max_ip = -1ll;
struct sym_entry { struct sym_entry {
struct rb_node rb_node; struct rb_node rb_node;
...@@ -194,7 +194,7 @@ static void print_sym_table(void) ...@@ -194,7 +194,7 @@ static void print_sym_table(void)
100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
if (nr_counters == 1) { if (nr_counters == 1) {
printf("%Ld", attrs[0].sample_period); printf("%Ld", (u64)attrs[0].sample_period);
if (freq) if (freq)
printf("Hz "); printf("Hz ");
else else
...@@ -372,7 +372,7 @@ static int parse_symbols(void) ...@@ -372,7 +372,7 @@ static int parse_symbols(void)
/* /*
* Binary search in the histogram table and record the hit: * Binary search in the histogram table and record the hit:
*/ */
static void record_ip(__u64 ip, int counter) static void record_ip(u64 ip, int counter)
{ {
struct symbol *sym = dso__find_symbol(kernel_dso, ip); struct symbol *sym = dso__find_symbol(kernel_dso, ip);
...@@ -392,7 +392,7 @@ static void record_ip(__u64 ip, int counter) ...@@ -392,7 +392,7 @@ static void record_ip(__u64 ip, int counter)
samples--; samples--;
} }
static void process_event(__u64 ip, int counter) static void process_event(u64 ip, int counter)
{ {
samples++; samples++;
...@@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md) ...@@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md)
for (; old != head;) { for (; old != head;) {
struct ip_event { struct ip_event {
struct perf_event_header header; struct perf_event_header header;
__u64 ip; u64 ip;
__u32 pid, target_pid; u32 pid, target_pid;
}; };
struct mmap_event { struct mmap_event {
struct perf_event_header header; struct perf_event_header header;
__u32 pid, target_pid; u32 pid, target_pid;
__u64 start; u64 start;
__u64 len; u64 len;
__u64 pgoff; u64 pgoff;
char filename[PATH_MAX]; char filename[PATH_MAX];
}; };
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#include "../../include/linux/perf_counter.h" #include "../../include/linux/perf_counter.h"
#include "types.h"
/* /*
* prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
...@@ -65,4 +66,10 @@ sys_perf_counter_open(struct perf_counter_attr *attr, ...@@ -65,4 +66,10 @@ sys_perf_counter_open(struct perf_counter_attr *attr,
#define MAX_COUNTERS 256 #define MAX_COUNTERS 256
#define MAX_NR_CPUS 256 #define MAX_NR_CPUS 256
struct perf_file_header {
u64 version;
u64 sample_type;
u64 data_size;
};
#endif #endif
#ifndef _PERF_TYPES_H
#define _PERF_TYPES_H
/*
* We define u64 as unsigned long long for every architecture
* so that we can print it with %Lx without getting warnings.
*/
typedef unsigned long long u64;
typedef signed long long s64;
typedef unsigned int u32;
typedef signed int s32;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned char u8;
typedef signed char s8;
#endif /* _PERF_TYPES_H */
...@@ -11,16 +11,21 @@ enum { ...@@ -11,16 +11,21 @@ enum {
D = GIT_DIGIT, D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */
PS = GIT_SPACE | GIT_PRINT_EXTRA,
}; };
unsigned char sane_ctype[256] = { unsigned char sane_ctype[256] = {
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
/* Nothing in the 128.. range */ /* Nothing in the 128.. range */
}; };
This diff is collapsed.
...@@ -15,7 +15,7 @@ static int hex(char ch) ...@@ -15,7 +15,7 @@ static int hex(char ch)
* While we find nice hex chars, build a long_val. * While we find nice hex chars, build a long_val.
* Return number of chars processed. * Return number of chars processed.
*/ */
int hex2u64(const char *ptr, __u64 *long_val) int hex2u64(const char *ptr, u64 *long_val)
{ {
const char *p = ptr; const char *p = ptr;
*long_val = 0; *long_val = 0;
......
#ifndef _PERF_STRING_H_ #ifndef _PERF_STRING_H_
#define _PERF_STRING_H_ #define _PERF_STRING_H_
#include <linux/types.h> #include "../types.h"
int hex2u64(const char *ptr, __u64 *val); int hex2u64(const char *ptr, u64 *val);
#endif #endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment