Commit efc9f05d authored by Stephane Eranian's avatar Stephane Eranian Committed by Ingo Molnar

perf_events: Update Intel extra regs shared constraints management

This patch improves the code managing the extra shared registers
used for offcore_response events on Intel Nehalem/Westmere. The
idea is to use static allocation instead of dynamic allocation.
This simplifies greatly the get and put constraint routines for
those events.

The patch also renames per_core to shared_regs because the same
data structure gets used whether or not HT is on. When HT is
off, those events still need to coordination because they use
a extra MSR that has to be shared within an event group.
Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110606145703.GA7258@quadSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent a7ac67ea
...@@ -44,6 +44,29 @@ do { \ ...@@ -44,6 +44,29 @@ do { \
} while (0) } while (0)
#endif #endif
/*
* | NHM/WSM | SNB |
* register -------------------------------
* | HT | no HT | HT | no HT |
*-----------------------------------------
* offcore | core | core | cpu | core |
* lbr_sel | core | core | cpu | core |
* ld_lat | cpu | core | cpu | core |
*-----------------------------------------
*
* Given that there is a small number of shared regs,
* we can pre-allocate their slot in the per-cpu
* per-core reg tables.
*/
enum extra_reg_type {
EXTRA_REG_NONE = -1, /* not used */
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_MAX /* number of entries needed */
};
/* /*
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
*/ */
...@@ -132,11 +155,10 @@ struct cpu_hw_events { ...@@ -132,11 +155,10 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
/* /*
* Intel percore register state. * manage shared (per-core, per-cpu) registers
* Coordinate shared resources between HT threads. * used on Intel NHM/WSM/SNB
*/ */
int percore_used; /* Used by this CPU? */ struct intel_shared_regs *shared_regs;
struct intel_percore *per_core;
/* /*
* AMD specific bits * AMD specific bits
...@@ -186,27 +208,46 @@ struct cpu_hw_events { ...@@ -186,27 +208,46 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \ #define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++) for ((e) = (c); (e)->weight; (e)++)
/*
* Per register state.
*/
struct er_account {
raw_spinlock_t lock; /* per-core: protect structure */
u64 config; /* extra MSR config */
u64 reg; /* extra MSR number */
atomic_t ref; /* reference count */
};
/* /*
* Extra registers for specific events. * Extra registers for specific events.
*
* Some events need large masks and require external MSRs. * Some events need large masks and require external MSRs.
* Define a mapping to these extra registers. * Those extra MSRs end up being shared for all events on
* a PMU and sometimes between PMU of sibling HT threads.
* In either case, the kernel needs to handle conflicting
* accesses to those extra, shared, regs. The data structure
* to manage those registers is stored in cpu_hw_event.
*/ */
struct extra_reg { struct extra_reg {
unsigned int event; unsigned int event;
unsigned int msr; unsigned int msr;
u64 config_mask; u64 config_mask;
u64 valid_mask; u64 valid_mask;
int idx; /* per_xxx->regs[] reg index */
}; };
#define EVENT_EXTRA_REG(e, ms, m, vm) { \ #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
.event = (e), \ .event = (e), \
.msr = (ms), \ .msr = (ms), \
.config_mask = (m), \ .config_mask = (m), \
.valid_mask = (vm), \ .valid_mask = (vm), \
.idx = EXTRA_REG_##i \
} }
#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
union perf_capabilities { union perf_capabilities {
struct { struct {
...@@ -253,7 +294,6 @@ struct x86_pmu { ...@@ -253,7 +294,6 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc, void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event); struct perf_event *event);
struct event_constraint *event_constraints; struct event_constraint *event_constraints;
struct event_constraint *percore_constraints;
void (*quirks)(void); void (*quirks)(void);
int perfctr_second_write; int perfctr_second_write;
...@@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index) ...@@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
*/ */
static int x86_pmu_extra_regs(u64 config, struct perf_event *event) static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{ {
struct hw_perf_event_extra *reg;
struct extra_reg *er; struct extra_reg *er;
event->hw.extra_reg = 0; reg = &event->hw.extra_reg;
event->hw.extra_config = 0;
if (!x86_pmu.extra_regs) if (!x86_pmu.extra_regs)
return 0; return 0;
...@@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) ...@@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
continue; continue;
if (event->attr.config1 & ~er->valid_mask) if (event->attr.config1 & ~er->valid_mask)
return -EINVAL; return -EINVAL;
event->hw.extra_reg = er->msr;
event->hw.extra_config = event->attr.config1; reg->idx = er->idx;
reg->config = event->attr.config1;
reg->reg = er->msr;
break; break;
} }
return 0; return 0;
...@@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event) ...@@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.last_cpu = -1; event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL; event->hw.last_tag = ~0ULL;
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event); return x86_pmu.hw_config(event);
} }
...@@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu) ...@@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask) u64 enable_mask)
{ {
if (hwc->extra_reg) if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg, hwc->extra_config); wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
wrmsrl(hwc->config_base, hwc->config | enable_mask); wrmsrl(hwc->config_base, hwc->config | enable_mask);
} }
...@@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event) ...@@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event)
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
if (!fake_cpuc) if (!fake_cpuc)
goto out; goto out;
/* /*
* the event is not yet connected with its * the event is not yet connected with its
* siblings therefore we must first collect * siblings therefore we must first collect
......
This diff is collapsed.
...@@ -536,6 +536,16 @@ struct perf_branch_stack { ...@@ -536,6 +536,16 @@ struct perf_branch_stack {
struct task_struct; struct task_struct;
/*
* extra PMU register associated with an event
*/
struct hw_perf_event_extra {
u64 config; /* register value */
unsigned int reg; /* register address or index */
int alloc; /* extra register already allocated */
int idx; /* index in shared_regs->regs[] */
};
/** /**
* struct hw_perf_event - performance event hardware details: * struct hw_perf_event - performance event hardware details:
*/ */
...@@ -549,9 +559,7 @@ struct hw_perf_event { ...@@ -549,9 +559,7 @@ struct hw_perf_event {
unsigned long event_base; unsigned long event_base;
int idx; int idx;
int last_cpu; int last_cpu;
unsigned int extra_reg; struct hw_perf_event_extra extra_reg;
u64 extra_config;
int extra_alloc;
}; };
struct { /* software */ struct { /* software */
struct hrtimer hrtimer; struct hrtimer hrtimer;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment