Commit 718a4b10 authored by Andi Kleen's avatar Andi Kleen Committed by Jiri Slaby

perf/x86/intel: Fix PEBS data source interpretation on Nehalem/Westmere

commit e17dc653 upstream.

Jiri reported some time ago that some entries in the PEBS data source table
in perf do not agree with the SDM. We investigated and the bits
changed for Sandy Bridge, but the SDM was not updated.

perf already implements the bits correctly for Sandy Bridge
and later. This patch patches it up for Nehalem and Westmere.
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1456871124-15985-1-git-send-email-andi@firstfloor.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarJiri Slaby <jslaby@suse.cz>
parent 1c936da2
...@@ -688,6 +688,8 @@ void intel_pmu_lbr_init_atom(void); ...@@ -688,6 +688,8 @@ void intel_pmu_lbr_init_atom(void);
void intel_pmu_lbr_init_snb(void); void intel_pmu_lbr_init_snb(void);
void intel_pmu_pebs_data_source_nhm(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event); int intel_pmu_setup_lbr_filter(struct perf_event *event);
int p4_pmu_init(void); int p4_pmu_init(void);
......
...@@ -2344,6 +2344,7 @@ __init int intel_pmu_init(void) ...@@ -2344,6 +2344,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk); x86_add_quirk(intel_nehalem_quirk);
pr_cont("Nehalem events, "); pr_cont("Nehalem events, ");
...@@ -2405,6 +2406,7 @@ __init int intel_pmu_init(void) ...@@ -2405,6 +2406,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
pr_cont("Westmere events, "); pr_cont("Westmere events, ");
break; break;
......
...@@ -50,7 +50,8 @@ union intel_x86_pebs_dse { ...@@ -50,7 +50,8 @@ union intel_x86_pebs_dse {
#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
static const u64 pebs_data_source[] = { /* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
...@@ -69,6 +70,14 @@ static const u64 pebs_data_source[] = { ...@@ -69,6 +70,14 @@ static const u64 pebs_data_source[] = {
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
}; };
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
}
static u64 precise_store_data(u64 status) static u64 precise_store_data(u64 status)
{ {
union intel_x86_pebs_dse dse; union intel_x86_pebs_dse dse;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment