Commit 722e76e6 authored by Stephane Eranian's avatar Stephane Eranian Committed by Thomas Gleixner

fix Haswell precise store data source encoding

This patch fixes a bug in  precise_store_data_hsw() whereby
it would set the data source memory level to the wrong value.

As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
Address Information in PEBS Record), when status bit 0 is set
this is a L1 hit, otherwise this is a L1 miss.

This patch encodes the memory level according to the specification.

In V2, we added the filtering on the store events.
Only the following events produce L1 information:
 * MEM_UOPS_RETIRED.STLB_MISS_STORES
 * MEM_UOPS_RETIRED.LOCK_STORES
 * MEM_UOPS_RETIRED.SPLIT_STORES
 * MEM_UOPS_RETIRED.ALL_STORES

Cc: mingo@elte.hu
Cc: acme@ghostprotocols.net
Cc: jolsa@redhat.com
Cc: jmario@redhat.com
Cc: ak@linux.intel.com
Tested-and-Reviewed-by: default avatarDon Zickus <dzickus@redhat.com>
Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140515155644.GA3884@quadSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 643fd0b9
...@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status) ...@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
return val; return val;
} }
static u64 precise_store_data_hsw(u64 status) static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{ {
union perf_mem_data_src dse; union perf_mem_data_src dse;
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0; dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE; dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA; dse.mem_lvl = PERF_MEM_LVL_NA;
/*
* L1 info only valid for following events:
*
* MEM_UOPS_RETIRED.STLB_MISS_STORES
* MEM_UOPS_RETIRED.LOCK_STORES
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
*/
if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
return dse.mem_lvl;
if (status & 1) if (status & 1)
dse.mem_lvl = PERF_MEM_LVL_L1; dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
else
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
/* Nothing else supported. Sorry. */ /* Nothing else supported. Sorry. */
return dse.val; return dse.val;
} }
...@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
data.data_src.val = load_latency_data(pebs->dse); data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val = data.data_src.val =
precise_store_data_hsw(pebs->dse); precise_store_data_hsw(event, pebs->dse);
else else
data.data_src.val = precise_store_data(pebs->dse); data.data_src.val = precise_store_data(pebs->dse);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment