Commit 9b7c8547 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.15-20171023' of...

Merge tag 'perf-core-for-mingo-4.15-20171023' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

 - Update vendor events JSON metrics for Intel's Broadwell, Broadwell
   Server, Haswell, Haswell Server, IvyBridge, IvyTown, JakeTown, Sandy
   Bridge, Skylake and SkyLake Server (Andi Kleen)

 - Add vendor event file for Intel's Goldmont Plus V1 (Kan Liang)

 - Move perf_mmap methods from 'perf record' and evlist.c to a separate
   mmap.[ch] pair, to better separate things and pave the way for further
   work on multithreading tools (Arnaldo Carvalho de Melo)

 - Do not check ABI headers in a detached tarball build, as it the kernel
   headers from where we copied tools/include/ are by definition not
   available (Arnaldo Carvalho de Melo)

 - Make 'perf script' use fprintf() like printing, i.e. receiving a FILE
   pointer so that it gets consistent with other tools/ code and allows
   for printing to per-event files (Arnaldo Carvalho de Melo)

 - Error handling fixes (resource release on exit) for 'perf script'
   and 'perf kmem' (Christophe JAILLET)

 - Make some 'perf event attr' tests optional on virtual machines, where
   tested counters are not available (Jiri Olsa)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 8776fe75 65db92e0
...@@ -204,7 +204,7 @@ For example Intel Core CPUs typically have four generic performance counters ...@@ -204,7 +204,7 @@ For example Intel Core CPUs typically have four generic performance counters
for the core, plus three fixed counters for instructions, cycles and for the core, plus three fixed counters for instructions, cycles and
ref-cycles. Some special events have restrictions on which counter they ref-cycles. Some special events have restrictions on which counter they
can schedule, and may not support multiple instances in a single group. can schedule, and may not support multiple instances in a single group.
When too many events are specified in the group none of them will not When too many events are specified in the group some of them will not
be measured. be measured.
Globally pinned events can limit the number of counters available for Globally pinned events can limit the number of counters available for
......
#include <linux/compiler.h>
#include <sys/types.h> #include <sys/types.h>
#include <regex.h> #include <regex.h>
...@@ -23,7 +24,7 @@ static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const c ...@@ -23,7 +24,7 @@ static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const c
return ops; return ops;
} }
static int arm__annotate_init(struct arch *arch) static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{ {
struct arm_annotate *arm; struct arm_annotate *arm;
int err; int err;
......
#include <linux/compiler.h>
#include <sys/types.h> #include <sys/types.h>
#include <regex.h> #include <regex.h>
...@@ -25,7 +26,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const ...@@ -25,7 +26,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const
return ops; return ops;
} }
static int arm64__annotate_init(struct arch *arch) static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{ {
struct arm64_annotate *arm; struct arm64_annotate *arm;
int err; int err;
......
#include <linux/compiler.h>
static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name) static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name)
{ {
int i; int i;
...@@ -46,7 +48,7 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con ...@@ -46,7 +48,7 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con
return ops; return ops;
} }
static int powerpc__annotate_init(struct arch *arch) static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{ {
if (!arch->initialized) { if (!arch->initialized) {
arch->initialized = true; arch->initialized = true;
......
#include <linux/compiler.h>
static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
{ {
struct ins_ops *ops = NULL; struct ins_ops *ops = NULL;
...@@ -19,7 +21,7 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na ...@@ -19,7 +21,7 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
return ops; return ops;
} }
static int s390__annotate_init(struct arch *arch) static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{ {
if (!arch->initialized) { if (!arch->initialized) {
arch->initialized = true; arch->initialized = true;
......
...@@ -122,3 +122,17 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) ...@@ -122,3 +122,17 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
return -1; return -1;
} }
static int x86__annotate_init(struct arch *arch, char *cpuid)
{
int err = 0;
if (arch->initialized)
return 0;
if (cpuid)
err = x86__cpuid_parse(arch, cpuid);
arch->initialized = true;
return err;
}
...@@ -1983,7 +1983,8 @@ int cmd_kmem(int argc, const char **argv) ...@@ -1983,7 +1983,8 @@ int cmd_kmem(int argc, const char **argv)
if (perf_time__parse_str(&ptime, time_str) != 0) { if (perf_time__parse_str(&ptime, time_str) != 0) {
pr_err("Invalid time string\n"); pr_err("Invalid time string\n");
return -EINVAL; ret = -EINVAL;
goto out_delete;
} }
if (!strcmp(argv[0], "stat")) { if (!strcmp(argv[0], "stat")) {
......
...@@ -129,107 +129,12 @@ static int process_synthesized_event(struct perf_tool *tool, ...@@ -129,107 +129,12 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, event, event->header.size); return record__write(rec, event, event->header.size);
} }
static int static int record__pushfn(void *to, void *bf, size_t size)
backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{ {
struct perf_event_header *pheader; struct record *rec = to;
u64 evt_head = head;
int size = mask + 1;
pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
pr_debug("Finished reading backward ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
return 0;
}
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
if (pheader->size == 0) {
pr_debug("Finished reading backward ring buffer: get start\n");
*end = evt_head;
return 0;
}
evt_head += pheader->size;
pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
}
WARN_ONCE(1, "Shouldn't get here\n");
return -1;
}
static int
rb_find_range(void *data, int mask, u64 head, u64 old,
u64 *start, u64 *end, bool backward)
{
if (!backward) {
*start = old;
*end = head;
return 0;
}
return backward_rb_find_range(data, mask, head, start, end);
}
static int
record__mmap_read(struct record *rec, struct perf_mmap *md,
bool overwrite, bool backward)
{
u64 head = perf_mmap__read_head(md);
u64 old = md->prev;
u64 end = head, start = old;
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
int rc = 0;
if (rb_find_range(data, md->mask, head,
old, &start, &end, backward))
return -1;
if (start == end)
return 0;
rec->samples++; rec->samples++;
return record__write(rec, bf, size);
size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
md->prev = head;
perf_mmap__consume(md, overwrite || backward);
return 0;
}
if ((start & md->mask) + size != (end & md->mask)) {
buf = &data[start & md->mask];
size = md->mask + 1 - (start & md->mask);
start += size;
if (record__write(rec, buf, size) < 0) {
rc = -1;
goto out;
}
}
buf = &data[start & md->mask];
size = end - start;
start += size;
if (record__write(rec, buf, size) < 0) {
rc = -1;
goto out;
}
md->prev = head;
perf_mmap__consume(md, overwrite || backward);
out:
return rc;
} }
static volatile int done; static volatile int done;
...@@ -576,8 +481,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli ...@@ -576,8 +481,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
if (maps[i].base) { if (maps[i].base) {
if (record__mmap_read(rec, &maps[i], if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
evlist->overwrite, backward) != 0) {
rc = -1; rc = -1;
goto out; goto out;
} }
......
This diff is collapsed.
...@@ -1828,16 +1828,14 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs ...@@ -1828,16 +1828,14 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
goto out_put; goto out_put;
} }
static void bpf_output__printer(enum binary_printer_ops op, static int bpf_output__printer(enum binary_printer_ops op,
unsigned int val, void *extra) unsigned int val, void *extra __maybe_unused, FILE *fp)
{ {
FILE *output = extra;
unsigned char ch = (unsigned char)val; unsigned char ch = (unsigned char)val;
switch (op) { switch (op) {
case BINARY_PRINT_CHAR_DATA: case BINARY_PRINT_CHAR_DATA:
fprintf(output, "%c", isprint(ch) ? ch : '.'); return fprintf(fp, "%c", isprint(ch) ? ch : '.');
break;
case BINARY_PRINT_DATA_BEGIN: case BINARY_PRINT_DATA_BEGIN:
case BINARY_PRINT_LINE_BEGIN: case BINARY_PRINT_LINE_BEGIN:
case BINARY_PRINT_ADDR: case BINARY_PRINT_ADDR:
...@@ -1850,13 +1848,15 @@ static void bpf_output__printer(enum binary_printer_ops op, ...@@ -1850,13 +1848,15 @@ static void bpf_output__printer(enum binary_printer_ops op,
default: default:
break; break;
} }
return 0;
} }
static void bpf_output__fprintf(struct trace *trace, static void bpf_output__fprintf(struct trace *trace,
struct perf_sample *sample) struct perf_sample *sample)
{ {
print_binary(sample->raw_data, sample->raw_size, 8, binary__fprintf(sample->raw_data, sample->raw_size, 8,
bpf_output__printer, trace->output); bpf_output__printer, NULL, trace->output);
} }
static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
......
...@@ -57,6 +57,11 @@ check () { ...@@ -57,6 +57,11 @@ check () {
} }
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
# differences.
test -d ../../include || exit 0
# simple diff check # simple diff check
for i in $HEADERS; do for i in $HEADERS; do
check $i -B check $i -B
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
This diff is collapsed.
[
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit). The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
"EventCode": "0x80",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "ICACHE.HIT",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is not in the ICache (miss). The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
"EventCode": "0x80",
"Counter": "0,1,2,3",
"UMask": "0x2",
"PEBScounters": "0,1,2,3",
"EventName": "ICACHE.MISSES",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line. The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS. Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
"EventCode": "0x80",
"Counter": "0,1,2,3",
"UMask": "0x3",
"PEBScounters": "0,1,2,3",
"EventName": "ICACHE.ACCESSES",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops. The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
"EventCode": "0xE7",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "MS_DECODED.MS_ENTRY",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "MS decode starts"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
"EventCode": "0xE9",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Decode restrictions due to predicting wrong instruction length"
}
]
\ No newline at end of file
[
{
"PEBS": "2",
"CollectPEBSRecord": "2",
"PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
"EventCode": "0x13",
"Counter": "0,1,2,3",
"UMask": "0x2",
"PEBScounters": "0,1,2,3",
"EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
"SampleAfterValue": "200003",
"BriefDescription": "Load uops that split a page (Precise event capable)"
},
{
"PEBS": "2",
"CollectPEBSRecord": "2",
"PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
"EventCode": "0x13",
"Counter": "0,1,2,3",
"UMask": "0x4",
"PEBScounters": "0,1,2,3",
"EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
"SampleAfterValue": "200003",
"BriefDescription": "Store uops that split a page (Precise event capable)"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.",
"EventCode": "0xC3",
"Counter": "0,1,2,3",
"UMask": "0x2",
"PEBScounters": "0,1,2,3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
"PDIR_COUNTER": "na",
"SampleAfterValue": "20003",
"BriefDescription": "Machine clears due to memory ordering issue"
}
]
\ No newline at end of file
[
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.",
"EventCode": "0x86",
"Counter": "0,1,2,3",
"UMask": "0x0",
"PEBScounters": "0,1,2,3",
"EventName": "FETCH_STALL.ALL",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Cycles code-fetch stalled due to any reason."
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
"EventCode": "0x86",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Cycles the code-fetch stalls and an ITLB miss is outstanding."
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
"EventCode": "0xCA",
"Counter": "0,1,2,3",
"UMask": "0x0",
"PEBScounters": "0,1,2,3",
"EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Unfilled issue slots per cycle"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.",
"EventCode": "0xCA",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
},
{
"CollectPEBSRecord": "1",
"PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
"EventCode": "0xCA",
"Counter": "0,1,2,3",
"UMask": "0x2",
"PEBScounters": "0,1,2,3",
"EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Unfilled issue slots per cycle to recover"
},
{
"CollectPEBSRecord": "2",
"PublicDescription": "Counts hardware interrupts received by the processor.",
"EventCode": "0xCB",
"Counter": "0,1,2,3",
"UMask": "0x1",
"PEBScounters": "0,1,2,3",
"EventName": "HW_INTERRUPTS.RECEIVED",
"PDIR_COUNTER": "na",
"SampleAfterValue": "203",
"BriefDescription": "Hardware interrupts received"
},
{
"CollectPEBSRecord": "2",
"PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
"EventCode": "0xCB",
"Counter": "0,1,2,3",
"UMask": "0x2",
"PEBScounters": "0,1,2,3",
"EventName": "HW_INTERRUPTS.MASKED",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Cycles hardware interrupts are masked"
},
{
"CollectPEBSRecord": "2",
"PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
"EventCode": "0xCB",
"Counter": "0,1,2,3",
"UMask": "0x4",
"PEBScounters": "0,1,2,3",
"EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
"PDIR_COUNTER": "na",
"SampleAfterValue": "200003",
"BriefDescription": "Cycles pending interrupts are masked"
}
]
\ No newline at end of file
This diff is collapsed.
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)", "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@", "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -79,13 +79,13 @@ ...@@ -79,13 +79,13 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,13 +49,13 @@ ...@@ -49,13 +49,13 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
...@@ -73,7 +73,7 @@ ...@@ -73,7 +73,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -9,6 +9,7 @@ GenuineIntel-6-27,v4,bonnell,core ...@@ -9,6 +9,7 @@ GenuineIntel-6-27,v4,bonnell,core
GenuineIntel-6-36,v4,bonnell,core GenuineIntel-6-36,v4,bonnell,core
GenuineIntel-6-35,v4,bonnell,core GenuineIntel-6-35,v4,bonnell,core
GenuineIntel-6-5C,v8,goldmont,core GenuineIntel-6-5C,v8,goldmont,core
GenuineIntel-6-7A,v1,goldmontplus,core
GenuineIntel-6-3C,v24,haswell,core GenuineIntel-6-3C,v24,haswell,core
GenuineIntel-6-45,v24,haswell,core GenuineIntel-6-45,v24,haswell,core
GenuineIntel-6-46,v24,haswell,core GenuineIntel-6-46,v24,haswell,core
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,13 +49,13 @@ ...@@ -49,13 +49,13 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
...@@ -73,7 +73,7 @@ ...@@ -73,7 +73,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
}, },
{ {
"BriefDescription": "Total issue-pipeline slots", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,19 +49,19 @@ ...@@ -49,19 +49,19 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
...@@ -73,19 +73,19 @@ ...@@ -73,19 +73,19 @@
}, },
{ {
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
"MetricGroup": "Memory_Bound;Memory_Lat", "MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency" "MetricName": "Load_Miss_Real_Latency"
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -13,19 +13,19 @@ ...@@ -13,19 +13,19 @@
}, },
{ {
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
"MetricGroup": "Frontend", "MetricGroup": "Frontend",
"MetricName": "IFetch_Line_Utilization" "MetricName": "IFetch_Line_Utilization"
}, },
{ {
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
"MetricGroup": "DSB; Frontend_Bandwidth", "MetricGroup": "DSB; Frontend_Bandwidth",
"MetricName": "DSB_Coverage" "MetricName": "DSB_Coverage"
}, },
{ {
"BriefDescription": "Cycles Per Instruction (threaded)", "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / INST_RETIRED.ANY / cycles", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary", "MetricGroup": "Pipeline;Summary",
"MetricName": "CPI" "MetricName": "CPI"
}, },
...@@ -36,8 +36,8 @@ ...@@ -36,8 +36,8 @@
"MetricName": "CLKS" "MetricName": "CLKS"
}, },
{ {
"BriefDescription": "Total issue-pipeline slots (per-core)", "BriefDescription": "Total issue-pipeline slots",
"MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "TopDownL1", "MetricGroup": "TopDownL1",
"MetricName": "SLOTS" "MetricName": "SLOTS"
}, },
...@@ -49,25 +49,25 @@ ...@@ -49,25 +49,25 @@
}, },
{ {
"BriefDescription": "Instructions Per Cycle (per physical core)", "BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CoreIPC" "MetricName": "CoreIPC"
}, },
{ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
"MetricGroup": "Pipeline;Ports_Utilization", "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP" "MetricName": "ILP"
}, },
{ {
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
"MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
"MetricGroup": "Unknown_Branches", "MetricGroup": "Unknown_Branches",
"MetricName": "BAClear_Cost" "MetricName": "BAClear_Cost"
}, },
{ {
"BriefDescription": "Core actual clocks when any thread is active on the physical core", "BriefDescription": "Core actual clocks when any thread is active on the physical core",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "SMT", "MetricGroup": "SMT",
"MetricName": "CORE_CLKS" "MetricName": "CORE_CLKS"
}, },
...@@ -79,34 +79,16 @@ ...@@ -79,34 +79,16 @@
}, },
{ {
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
"MetricGroup": "Memory_Bound;Memory_BW", "MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP" "MetricName": "MLP"
}, },
{ {
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
"MetricGroup": "TLB", "MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization" "MetricName": "Page_Walks_Utilization"
}, },
{
"BriefDescription": "L1 cache miss per kilo instruction for demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY",
"MetricGroup": "Cache_Misses;",
"MetricName": "L1MPKI"
},
{
"BriefDescription": "L2 cache miss per kilo instruction for demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY",
"MetricGroup": "Cache_Misses;",
"MetricName": "L2MPKI"
},
{
"BriefDescription": "L3 cache miss per kilo instruction for demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY",
"MetricGroup": "Cache_Misses;",
"MetricName": "L3MPKI"
},
{ {
"BriefDescription": "Average CPU Utilization", "BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
...@@ -115,7 +97,7 @@ ...@@ -115,7 +97,7 @@
}, },
{ {
"BriefDescription": "Giga Floating Point Operations Per Second", "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time", "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary", "MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs" "MetricName": "GFLOPs"
}, },
......
...@@ -7,3 +7,4 @@ ret = 1 ...@@ -7,3 +7,4 @@ ret = 1
# events are disabled by default when attached to cpu # events are disabled by default when attached to cpu
disabled=1 disabled=1
enable_on_exec=0 enable_on_exec=0
optional=1
...@@ -4,3 +4,4 @@ args = -e cycles kill >/dev/null 2>&1 ...@@ -4,3 +4,4 @@ args = -e cycles kill >/dev/null 2>&1
ret = 1 ret = 1
[event:base-stat] [event:base-stat]
optional=1
...@@ -32,6 +32,7 @@ config=2 ...@@ -32,6 +32,7 @@ config=2
fd=5 fd=5
type=0 type=0
config=0 config=0
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
[event6:base-stat] [event6:base-stat]
...@@ -52,15 +53,18 @@ optional=1 ...@@ -52,15 +53,18 @@ optional=1
fd=8 fd=8
type=0 type=0
config=1 config=1
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
[event9:base-stat] [event9:base-stat]
fd=9 fd=9
type=0 type=0
config=4 config=4
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
[event10:base-stat] [event10:base-stat]
fd=10 fd=10
type=0 type=0
config=5 config=5
optional=1
...@@ -33,6 +33,7 @@ config=2 ...@@ -33,6 +33,7 @@ config=2
fd=5 fd=5
type=0 type=0
config=0 config=0
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
[event6:base-stat] [event6:base-stat]
...@@ -53,18 +54,21 @@ optional=1 ...@@ -53,18 +54,21 @@ optional=1
fd=8 fd=8
type=0 type=0
config=1 config=1
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
[event9:base-stat] [event9:base-stat]
fd=9 fd=9
type=0 type=0
config=4 config=4
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
[event10:base-stat] [event10:base-stat]
fd=10 fd=10
type=0 type=0
config=5 config=5
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -74,6 +78,7 @@ config=5 ...@@ -74,6 +78,7 @@ config=5
fd=11 fd=11
type=3 type=3
config=0 config=0
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -83,6 +88,7 @@ config=0 ...@@ -83,6 +88,7 @@ config=0
fd=12 fd=12
type=3 type=3
config=65536 config=65536
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -92,6 +98,7 @@ config=65536 ...@@ -92,6 +98,7 @@ config=65536
fd=13 fd=13
type=3 type=3
config=2 config=2
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -101,3 +108,4 @@ config=2 ...@@ -101,3 +108,4 @@ config=2
fd=14 fd=14
type=3 type=3
config=65538 config=65538
optional=1
...@@ -33,6 +33,7 @@ config=2 ...@@ -33,6 +33,7 @@ config=2
fd=5 fd=5
type=0 type=0
config=0 config=0
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
[event6:base-stat] [event6:base-stat]
...@@ -53,18 +54,21 @@ optional=1 ...@@ -53,18 +54,21 @@ optional=1
fd=8 fd=8
type=0 type=0
config=1 config=1
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
[event9:base-stat] [event9:base-stat]
fd=9 fd=9
type=0 type=0
config=4 config=4
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
[event10:base-stat] [event10:base-stat]
fd=10 fd=10
type=0 type=0
config=5 config=5
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -74,6 +78,7 @@ config=5 ...@@ -74,6 +78,7 @@ config=5
fd=11 fd=11
type=3 type=3
config=0 config=0
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -83,6 +88,7 @@ config=0 ...@@ -83,6 +88,7 @@ config=0
fd=12 fd=12
type=3 type=3
config=65536 config=65536
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -92,6 +98,7 @@ config=65536 ...@@ -92,6 +98,7 @@ config=65536
fd=13 fd=13
type=3 type=3
config=2 config=2
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -101,6 +108,7 @@ config=2 ...@@ -101,6 +108,7 @@ config=2
fd=14 fd=14
type=3 type=3
config=65538 config=65538
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_L1I << 0 | # PERF_COUNT_HW_CACHE_L1I << 0 |
...@@ -120,6 +128,7 @@ optional=1 ...@@ -120,6 +128,7 @@ optional=1
fd=16 fd=16
type=3 type=3
config=65537 config=65537
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_DTLB << 0 | # PERF_COUNT_HW_CACHE_DTLB << 0 |
...@@ -129,6 +138,7 @@ config=65537 ...@@ -129,6 +138,7 @@ config=65537
fd=17 fd=17
type=3 type=3
config=3 config=3
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_DTLB << 0 | # PERF_COUNT_HW_CACHE_DTLB << 0 |
...@@ -138,6 +148,7 @@ config=3 ...@@ -138,6 +148,7 @@ config=3
fd=18 fd=18
type=3 type=3
config=65539 config=65539
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_ITLB << 0 | # PERF_COUNT_HW_CACHE_ITLB << 0 |
...@@ -147,6 +158,7 @@ config=65539 ...@@ -147,6 +158,7 @@ config=65539
fd=19 fd=19
type=3 type=3
config=4 config=4
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_ITLB << 0 | # PERF_COUNT_HW_CACHE_ITLB << 0 |
...@@ -156,3 +168,4 @@ config=4 ...@@ -156,3 +168,4 @@ config=4
fd=20 fd=20
type=3 type=3
config=65540 config=65540
optional=1
...@@ -33,6 +33,7 @@ config=2 ...@@ -33,6 +33,7 @@ config=2
fd=5 fd=5
type=0 type=0
config=0 config=0
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
[event6:base-stat] [event6:base-stat]
...@@ -53,18 +54,21 @@ optional=1 ...@@ -53,18 +54,21 @@ optional=1
fd=8 fd=8
type=0 type=0
config=1 config=1
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
[event9:base-stat] [event9:base-stat]
fd=9 fd=9
type=0 type=0
config=4 config=4
optional=1
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES # PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
[event10:base-stat] [event10:base-stat]
fd=10 fd=10
type=0 type=0
config=5 config=5
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -74,6 +78,7 @@ config=5 ...@@ -74,6 +78,7 @@ config=5
fd=11 fd=11
type=3 type=3
config=0 config=0
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
...@@ -83,6 +88,7 @@ config=0 ...@@ -83,6 +88,7 @@ config=0
fd=12 fd=12
type=3 type=3
config=65536 config=65536
optional=1
# PERF_TYPE_HW_CACHE / # PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -92,6 +98,7 @@ config=65536 ...@@ -92,6 +98,7 @@ config=65536
fd=13 fd=13
type=3 type=3
config=2 config=2
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_LL << 0 | # PERF_COUNT_HW_CACHE_LL << 0 |
...@@ -101,6 +108,7 @@ config=2 ...@@ -101,6 +108,7 @@ config=2
fd=14 fd=14
type=3 type=3
config=65538 config=65538
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_L1I << 0 | # PERF_COUNT_HW_CACHE_L1I << 0 |
...@@ -120,6 +128,7 @@ optional=1 ...@@ -120,6 +128,7 @@ optional=1
fd=16 fd=16
type=3 type=3
config=65537 config=65537
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_DTLB << 0 | # PERF_COUNT_HW_CACHE_DTLB << 0 |
...@@ -129,6 +138,7 @@ config=65537 ...@@ -129,6 +138,7 @@ config=65537
fd=17 fd=17
type=3 type=3
config=3 config=3
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_DTLB << 0 | # PERF_COUNT_HW_CACHE_DTLB << 0 |
...@@ -138,6 +148,7 @@ config=3 ...@@ -138,6 +148,7 @@ config=3
fd=18 fd=18
type=3 type=3
config=65539 config=65539
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_ITLB << 0 | # PERF_COUNT_HW_CACHE_ITLB << 0 |
...@@ -147,6 +158,7 @@ config=65539 ...@@ -147,6 +158,7 @@ config=65539
fd=19 fd=19
type=3 type=3
config=4 config=4
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_ITLB << 0 | # PERF_COUNT_HW_CACHE_ITLB << 0 |
...@@ -156,6 +168,7 @@ config=4 ...@@ -156,6 +168,7 @@ config=4
fd=20 fd=20
type=3 type=3
config=65540 config=65540
optional=1
# PERF_TYPE_HW_CACHE, # PERF_TYPE_HW_CACHE,
# PERF_COUNT_HW_CACHE_L1D << 0 | # PERF_COUNT_HW_CACHE_L1D << 0 |
......
...@@ -5,3 +5,4 @@ ret = 1 ...@@ -5,3 +5,4 @@ ret = 1
[event:base-stat] [event:base-stat]
inherit=0 inherit=0
optional=1
...@@ -13,6 +13,7 @@ libperf-y += find_bit.o ...@@ -13,6 +13,7 @@ libperf-y += find_bit.o
libperf-y += kallsyms.o libperf-y += kallsyms.o
libperf-y += levenshtein.o libperf-y += levenshtein.o
libperf-y += llvm-utils.o libperf-y += llvm-utils.o
libperf-y += mmap.o
libperf-y += memswap.o libperf-y += memswap.o
libperf-y += parse-events.o libperf-y += parse-events.o
libperf-y += perf_regs.o libperf-y += perf_regs.o
......
...@@ -49,10 +49,9 @@ struct arch { ...@@ -49,10 +49,9 @@ struct arch {
void *priv; void *priv;
unsigned int model; unsigned int model;
unsigned int family; unsigned int family;
int (*init)(struct arch *arch); int (*init)(struct arch *arch, char *cpuid);
bool (*ins_is_fused)(struct arch *arch, const char *ins1, bool (*ins_is_fused)(struct arch *arch, const char *ins1,
const char *ins2); const char *ins2);
int (*cpuid_parse)(struct arch *arch, char *cpuid);
struct { struct {
char comment_char; char comment_char;
char skip_functions_char; char skip_functions_char;
...@@ -132,10 +131,10 @@ static struct arch architectures[] = { ...@@ -132,10 +131,10 @@ static struct arch architectures[] = {
}, },
{ {
.name = "x86", .name = "x86",
.init = x86__annotate_init,
.instructions = x86__instructions, .instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions), .nr_instructions = ARRAY_SIZE(x86__instructions),
.ins_is_fused = x86__ins_is_fused, .ins_is_fused = x86__ins_is_fused,
.cpuid_parse = x86__cpuid_parse,
.objdump = { .objdump = {
.comment_char = '#', .comment_char = '#',
}, },
...@@ -1447,16 +1446,13 @@ int symbol__disassemble(struct symbol *sym, struct map *map, ...@@ -1447,16 +1446,13 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
*parch = arch; *parch = arch;
if (arch->init) { if (arch->init) {
err = arch->init(arch); err = arch->init(arch, cpuid);
if (err) { if (err) {
pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
return err; return err;
} }
} }
if (arch->cpuid_parse && cpuid)
arch->cpuid_parse(arch, cpuid);
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
symfs_filename, sym->name, map->unmap_ip(map, sym->start), symfs_filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end)); map->unmap_ip(map, sym->end));
......
...@@ -111,50 +111,53 @@ int dump_printf(const char *fmt, ...) ...@@ -111,50 +111,53 @@ int dump_printf(const char *fmt, ...)
return ret; return ret;
} }
static void trace_event_printer(enum binary_printer_ops op, static int trace_event_printer(enum binary_printer_ops op,
unsigned int val, void *extra) unsigned int val, void *extra, FILE *fp)
{ {
const char *color = PERF_COLOR_BLUE; const char *color = PERF_COLOR_BLUE;
union perf_event *event = (union perf_event *)extra; union perf_event *event = (union perf_event *)extra;
unsigned char ch = (unsigned char)val; unsigned char ch = (unsigned char)val;
int printed = 0;
switch (op) { switch (op) {
case BINARY_PRINT_DATA_BEGIN: case BINARY_PRINT_DATA_BEGIN:
printf("."); printed += fprintf(fp, ".");
color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
event->header.size); event->header.size);
break; break;
case BINARY_PRINT_LINE_BEGIN: case BINARY_PRINT_LINE_BEGIN:
printf("."); printed += fprintf(fp, ".");
break; break;
case BINARY_PRINT_ADDR: case BINARY_PRINT_ADDR:
color_fprintf(stdout, color, " %04x: ", val); printed += color_fprintf(fp, color, " %04x: ", val);
break; break;
case BINARY_PRINT_NUM_DATA: case BINARY_PRINT_NUM_DATA:
color_fprintf(stdout, color, " %02x", val); printed += color_fprintf(fp, color, " %02x", val);
break; break;
case BINARY_PRINT_NUM_PAD: case BINARY_PRINT_NUM_PAD:
color_fprintf(stdout, color, " "); printed += color_fprintf(fp, color, " ");
break; break;
case BINARY_PRINT_SEP: case BINARY_PRINT_SEP:
color_fprintf(stdout, color, " "); printed += color_fprintf(fp, color, " ");
break; break;
case BINARY_PRINT_CHAR_DATA: case BINARY_PRINT_CHAR_DATA:
color_fprintf(stdout, color, "%c", printed += color_fprintf(fp, color, "%c",
isprint(ch) ? ch : '.'); isprint(ch) ? ch : '.');
break; break;
case BINARY_PRINT_CHAR_PAD: case BINARY_PRINT_CHAR_PAD:
color_fprintf(stdout, color, " "); printed += color_fprintf(fp, color, " ");
break; break;
case BINARY_PRINT_LINE_END: case BINARY_PRINT_LINE_END:
color_fprintf(stdout, color, "\n"); printed += color_fprintf(fp, color, "\n");
break; break;
case BINARY_PRINT_DATA_END: case BINARY_PRINT_DATA_END:
printf("\n"); printed += fprintf(fp, "\n");
break; break;
default: default:
break; break;
} }
return printed;
} }
void trace_event(union perf_event *event) void trace_event(union perf_event *event)
......
This diff is collapsed.
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
#include "../perf.h" #include "../perf.h"
#include "event.h" #include "event.h"
#include "evsel.h" #include "evsel.h"
#include "mmap.h"
#include "util.h" #include "util.h"
#include "auxtrace.h"
#include <signal.h> #include <signal.h>
#include <unistd.h> #include <unistd.h>
...@@ -24,55 +24,6 @@ struct record_opts; ...@@ -24,55 +24,6 @@ struct record_opts;
#define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap {
void *base;
int mask;
int fd;
refcount_t refcnt;
u64 prev;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
};
static inline size_t
perf_mmap__mmap_len(struct perf_mmap *map)
{
return map->mask + 1 + page_size;
}
/*
* State machine of bkw_mmap_state:
*
* .________________(forbid)_____________.
* | V
* NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
* ^ ^ | ^ |
* | |__(forbid)____/ |___(forbid)___/|
* | |
* \_________________(3)_______________/
*
* NOTREADY : Backward ring buffers are not ready
* RUNNING : Backward ring buffers are recording
* DATA_PENDING : We are required to collect data from backward ring buffers
* EMPTY : We have collected data from backward ring buffers.
*
* (0): Setup backward ring buffer
* (1): Pause ring buffers for reading
* (2): Read from ring buffers
* (3): Resume ring buffers for recording
*/
enum bkw_mmap_state {
BKW_MMAP_NOTREADY,
BKW_MMAP_RUNNING,
BKW_MMAP_DATA_PENDING,
BKW_MMAP_EMPTY,
};
struct perf_evlist { struct perf_evlist {
struct list_head entries; struct list_head entries;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
...@@ -177,12 +128,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); ...@@ -177,12 +128,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
void perf_mmap__read_catchup(struct perf_mmap *md);
void perf_mmap__consume(struct perf_mmap *md, bool overwrite);
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
...@@ -286,25 +231,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); ...@@ -286,25 +231,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
{
struct perf_event_mmap_page *pc = mm->base;
u64 head = ACCESS_ONCE(pc->data_head);
rmb();
return head;
}
static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
{
struct perf_event_mmap_page *pc = md->base;
/*
* ensure all reads are done before we write the tail out.
*/
mb();
pc->data_tail = tail;
}
bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str);
void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__to_front(struct perf_evlist *evlist,
struct perf_evsel *move_evsel); struct perf_evsel *move_evsel);
......
This diff is collapsed.
#ifndef __PERF_MMAP_H
#define __PERF_MMAP_H 1
#include <linux/compiler.h>
#include <linux/refcount.h>
#include <linux/types.h>
#include <asm/barrier.h>
#include <stdbool.h>
#include "auxtrace.h"
#include "event.h"
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap {
void *base;
int mask;
int fd;
refcount_t refcnt;
u64 prev;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
};
/*
* State machine of bkw_mmap_state:
*
* .________________(forbid)_____________.
* | V
* NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
* ^ ^ | ^ |
* | |__(forbid)____/ |___(forbid)___/|
* | |
* \_________________(3)_______________/
*
* NOTREADY : Backward ring buffers are not ready
* RUNNING : Backward ring buffers are recording
* DATA_PENDING : We are required to collect data from backward ring buffers
* EMPTY : We have collected data from backward ring buffers.
*
* (0): Setup backward ring buffer
* (1): Pause ring buffers for reading
* (2): Read from ring buffers
* (3): Resume ring buffers for recording
*/
enum bkw_mmap_state {
BKW_MMAP_NOTREADY,
BKW_MMAP_RUNNING,
BKW_MMAP_DATA_PENDING,
BKW_MMAP_EMPTY,
};
struct mmap_params {
int prot, mask;
struct auxtrace_mmap_params auxtrace_mp;
};
int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
void perf_mmap__read_catchup(struct perf_mmap *md);
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
{
struct perf_event_mmap_page *pc = mm->base;
u64 head = ACCESS_ONCE(pc->data_head);
rmb();
return head;
}
static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
{
struct perf_event_mmap_page *pc = md->base;
/*
* ensure all reads are done before we write the tail out.
*/
mb();
pc->data_tail = tail;
}
union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
void *to, int push(void *to, void *buf, size_t size));
size_t perf_mmap__mmap_len(struct perf_mmap *map);
#endif /*__PERF_MMAP_H */
...@@ -9,9 +9,10 @@ ...@@ -9,9 +9,10 @@
#ifndef __PERF_NAMESPACES_H #ifndef __PERF_NAMESPACES_H
#define __PERF_NAMESPACES_H #define __PERF_NAMESPACES_H
#include "../perf.h" #include <sys/types.h>
#include <linux/list.h> #include <linux/perf_event.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/types.h>
struct namespaces_event; struct namespaces_event;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment