Commit f9129870 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Steven Rostedt

perf, x86: P4 PMU - Introduce event alias feature

Instead of hw_nmi_watchdog_set_attr() weak function
and appropriate x86_pmu::hw_watchdog_set_attr() call
we introduce even alias mechanism which allow us
to drop this routines completely and isolate quirks
of Netburst architecture inside P4 PMU code only.

The main idea remains the same though -- to allow
nmi-watchdog and perf top run simultaneously.

Note the aliasing mechanism applies to generic
PERF_COUNT_HW_CPU_CYCLES event only because arbitrary
event (say passed as RAW initially) might have some
additional bits set inside ESCR register changing
the behaviour of event and we can't guarantee anymore
that alias event will give the same result.

P.S. Thanks a huge to Don and Steven for for testing
     and early review.
Acked-by: default avatarDon Zickus <dzickus@redhat.com>
Tested-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Signed-off-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
CC: Ingo Molnar <mingo@elte.hu>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Stephane Eranian <eranian@google.com>
CC: Lin Ming <ming.m.lin@intel.com>
CC: Arnaldo Carvalho de Melo <acme@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20110708201712.GS23657@sunSigned-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
parent 4a9bd3f1
...@@ -101,6 +101,14 @@ ...@@ -101,6 +101,14 @@
#define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
/*
* If an event has alias it should be marked
* with a special bit. (Don't forget to check
* P4_PEBS_CONFIG_MASK and related bits on
* modification.)
*/
#define P4_CONFIG_ALIASABLE (1 << 9)
/* /*
* The bits we allow to pass for RAW events * The bits we allow to pass for RAW events
*/ */
...@@ -123,6 +131,31 @@ ...@@ -123,6 +131,31 @@
(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
/*
* In case of event aliasing we need to preserve some
* caller bits otherwise the mapping won't be complete.
*/
#define P4_CONFIG_EVENT_ALIAS_MASK \
(p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
p4_config_pack_cccr(P4_CCCR_EDGE | \
P4_CCCR_THRESHOLD_MASK | \
P4_CCCR_COMPLEMENT | \
P4_CCCR_COMPARE))
#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
((P4_CONFIG_HT) | \
p4_config_pack_escr(P4_ESCR_T0_OS | \
P4_ESCR_T0_USR | \
P4_ESCR_T1_OS | \
P4_ESCR_T1_USR) | \
p4_config_pack_cccr(P4_CCCR_OVF | \
P4_CCCR_CASCADE | \
P4_CCCR_FORCE_OVF | \
P4_CCCR_THREAD_ANY | \
P4_CCCR_OVF_PMI_T0 | \
P4_CCCR_OVF_PMI_T1 | \
P4_CONFIG_ALIASABLE))
static inline bool p4_is_event_cascaded(u64 config) static inline bool p4_is_event_cascaded(u64 config)
{ {
u32 cccr = p4_config_unpack_cccr(config); u32 cccr = p4_config_unpack_cccr(config);
......
...@@ -274,7 +274,6 @@ struct x86_pmu { ...@@ -274,7 +274,6 @@ struct x86_pmu {
void (*enable_all)(int added); void (*enable_all)(int added);
void (*enable)(struct perf_event *); void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *); void (*disable)(struct perf_event *);
void (*hw_watchdog_set_attr)(struct perf_event_attr *attr);
int (*hw_config)(struct perf_event *event); int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel; unsigned eventsel;
...@@ -360,12 +359,6 @@ static u64 __read_mostly hw_cache_extra_regs ...@@ -360,12 +359,6 @@ static u64 __read_mostly hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX]; [PERF_COUNT_HW_CACHE_RESULT_MAX];
void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
{
if (x86_pmu.hw_watchdog_set_attr)
x86_pmu.hw_watchdog_set_attr(wd_attr);
}
/* /*
* Propagate event elapsed time into the generic event. * Propagate event elapsed time into the generic event.
* Can only be executed on the CPU where the event is active. * Can only be executed on the CPU where the event is active.
......
...@@ -570,11 +570,92 @@ static __initconst const u64 p4_hw_cache_event_ids ...@@ -570,11 +570,92 @@ static __initconst const u64 p4_hw_cache_event_ids
}, },
}; };
/*
* Because of Netburst being quite restricted in now
* many same events can run simultaneously, we use
* event aliases, ie different events which have the
* same functionallity but use non-intersected resources
* (ESCR/CCCR/couter registers). This allow us to run
* two or more semi-same events together. It is done
* transparently to a user space.
*
* Never set any cusom internal bits such as P4_CONFIG_HT,
* P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
* either up-to-dated automatically either not appliable
* at all.
*
* And be really carefull choosing aliases!
*/
struct p4_event_alias {
u64 orig;
u64 alter;
} p4_event_aliases[] = {
{
/*
* Non-halted cycles can be substituted with
* non-sleeping cycles (see Intel SDM Vol3b for
* details).
*/
.orig =
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
.alter =
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
P4_CCCR_COMPARE),
},
};
static u64 p4_get_alias_event(u64 config)
{
u64 config_match;
int i;
/*
* Probably we're lucky and don't have to do
* matching over all config bits.
*/
if (!(config & P4_CONFIG_ALIASABLE))
return 0;
config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
/*
* If an event was previously swapped to the alter config
* we should swap it back otherwise contnention on registers
* will return back.
*/
for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
if (config_match == p4_event_aliases[i].orig) {
config_match = p4_event_aliases[i].alter;
break;
} else if (config_match == p4_event_aliases[i].alter) {
config_match = p4_event_aliases[i].orig;
break;
}
}
if (i >= ARRAY_SIZE(p4_event_aliases))
return 0;
return config_match |
(config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
}
static u64 p4_general_events[PERF_COUNT_HW_MAX] = { static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
/* non-halted CPU clocks */ /* non-halted CPU clocks */
[PERF_COUNT_HW_CPU_CYCLES] = [PERF_COUNT_HW_CPU_CYCLES] =
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
P4_CONFIG_ALIASABLE,
/* /*
* retired instructions * retired instructions
...@@ -719,31 +800,6 @@ static int p4_validate_raw_event(struct perf_event *event) ...@@ -719,31 +800,6 @@ static int p4_validate_raw_event(struct perf_event *event)
return 0; return 0;
} }
static void p4_hw_watchdog_set_attr(struct perf_event_attr *wd_attr)
{
/*
* Watchdog ticks are special on Netburst, we use
* that named "non-sleeping" ticks as recommended
* by Intel SDM Vol3b.
*/
WARN_ON_ONCE(wd_attr->type != PERF_TYPE_HARDWARE ||
wd_attr->config != PERF_COUNT_HW_CPU_CYCLES);
wd_attr->type = PERF_TYPE_RAW;
wd_attr->config =
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3)) |
p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
P4_CCCR_COMPARE);
}
static int p4_hw_config(struct perf_event *event) static int p4_hw_config(struct perf_event *event)
{ {
int cpu = get_cpu(); int cpu = get_cpu();
...@@ -1159,6 +1215,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign ...@@ -1159,6 +1215,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
struct p4_event_bind *bind; struct p4_event_bind *bind;
unsigned int i, thread, num; unsigned int i, thread, num;
int cntr_idx, escr_idx; int cntr_idx, escr_idx;
u64 config_alias;
int pass;
bitmap_zero(used_mask, X86_PMC_IDX_MAX); bitmap_zero(used_mask, X86_PMC_IDX_MAX);
bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
...@@ -1167,6 +1225,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign ...@@ -1167,6 +1225,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
hwc = &cpuc->event_list[i]->hw; hwc = &cpuc->event_list[i]->hw;
thread = p4_ht_thread(cpu); thread = p4_ht_thread(cpu);
pass = 0;
again:
/*
* Aliases are swappable so we may hit circular
* lock if both original config and alias need
* resources (MSR registers) which already busy.
*/
if (pass > 2)
goto done;
bind = p4_config_get_bind(hwc->config); bind = p4_config_get_bind(hwc->config);
escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
if (unlikely(escr_idx == -1)) if (unlikely(escr_idx == -1))
...@@ -1180,8 +1249,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign ...@@ -1180,8 +1249,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
} }
cntr_idx = p4_next_cntr(thread, used_mask, bind); cntr_idx = p4_next_cntr(thread, used_mask, bind);
if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
/*
* Probably an event alias is still available.
*/
config_alias = p4_get_alias_event(hwc->config);
if (!config_alias)
goto done; goto done;
hwc->config = config_alias;
pass++;
goto again;
}
p4_pmu_swap_config_ts(hwc, cpu); p4_pmu_swap_config_ts(hwc, cpu);
if (assign) if (assign)
...@@ -1218,7 +1296,6 @@ static __initconst const struct x86_pmu p4_pmu = { ...@@ -1218,7 +1296,6 @@ static __initconst const struct x86_pmu p4_pmu = {
.cntval_bits = ARCH_P4_CNTRVAL_BITS, .cntval_bits = ARCH_P4_CNTRVAL_BITS,
.cntval_mask = ARCH_P4_CNTRVAL_MASK, .cntval_mask = ARCH_P4_CNTRVAL_MASK,
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_watchdog_set_attr = p4_hw_watchdog_set_attr,
.hw_config = p4_hw_config, .hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events, .schedule_events = p4_pmu_schedule_events,
/* /*
......
...@@ -200,7 +200,6 @@ static int is_softlockup(unsigned long touch_ts) ...@@ -200,7 +200,6 @@ static int is_softlockup(unsigned long touch_ts)
} }
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
void __weak hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr) { }
static struct perf_event_attr wd_hw_attr = { static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE, .type = PERF_TYPE_HARDWARE,
...@@ -372,7 +371,6 @@ static int watchdog_nmi_enable(int cpu) ...@@ -372,7 +371,6 @@ static int watchdog_nmi_enable(int cpu)
wd_attr = &wd_hw_attr; wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
hw_nmi_watchdog_set_attr(wd_attr);
/* Try to register using hardware perf events */ /* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment