Commit c22497f5 authored by Kan Liang's avatar Kan Liang Committed by Ingo Molnar

perf/x86/intel: Support adaptive PEBS v4

Adaptive PEBS is a new way to report PEBS sampling information. Instead
of a fixed size record for all PEBS events it allows to configure the
PEBS record to only include the information needed. Events can then opt
in to use such an extended record, or stay with a basic record which
only contains the IP.

The major new feature is to support LBRs in PEBS record.
Besides normal LBR, this allows (much faster) large PEBS, while still
supporting callstacks through callstack LBR. So essentially a lot of
profiling can now be done without frequent interrupts, dropping the
overhead significantly.

The main requirement still is to use a period, and not use frequency
mode, because frequency mode requires reevaluating the frequency on each
overflow.

The floating point state (XMM) is also supported, which allows efficient
profiling of FP function arguments.

Introduce specific drain function to handle variable length records.
Use a new callback to parse the new record format, and also handle the
STATUS field now being at a different offset.

Add code to set up the configuration register. Since there is only a
single register, all events either get the full super set of all events,
or only the basic record.
Originally-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-6-kan.liang@linux.intel.com
[ Renamed GPRS => GP. ]
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 477f00f9
...@@ -2145,6 +2145,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event) ...@@ -2145,6 +2145,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
bits <<= (idx * 4); bits <<= (idx * 4);
mask = 0xfULL << (idx * 4); mask = 0xfULL << (idx * 4);
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
}
rdmsrl(hwc->config_base, ctrl_val); rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask; ctrl_val &= ~mask;
ctrl_val |= bits; ctrl_val |= bits;
...@@ -3510,6 +3515,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) ...@@ -3510,6 +3515,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
{ {
cpuc->pebs_record_size = x86_pmu.pebs_record_size;
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu); cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs) if (!cpuc->shared_regs)
......
This diff is collapsed.
...@@ -1080,6 +1080,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) ...@@ -1080,6 +1080,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
} }
} }
void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
u64 info = lbr->lbr[i].info;
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
e->from = lbr->lbr[i].from;
e->to = lbr->lbr[i].to;
e->mispred = !!(info & LBR_INFO_MISPRED);
e->predicted = !(info & LBR_INFO_MISPRED);
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = info & LBR_INFO_CYCLES;
e->reserved = 0;
}
intel_pmu_lbr_filter(cpuc);
}
/* /*
* Map interface branch filters onto LBR filters * Map interface branch filters onto LBR filters
*/ */
......
...@@ -224,6 +224,11 @@ struct cpu_hw_events { ...@@ -224,6 +224,11 @@ struct cpu_hw_events {
int n_pebs; int n_pebs;
int n_large_pebs; int n_large_pebs;
/* Current super set of events hardware configuration */
u64 pebs_data_cfg;
u64 active_pebs_data_cfg;
int pebs_record_size;
/* /*
* Intel LBR bits * Intel LBR bits
*/ */
...@@ -490,6 +495,7 @@ union perf_capabilities { ...@@ -490,6 +495,7 @@ union perf_capabilities {
* values > 32bit. * values > 32bit.
*/ */
u64 full_width_write:1; u64 full_width_write:1;
u64 pebs_baseline:1;
}; };
u64 capabilities; u64 capabilities;
}; };
...@@ -634,11 +640,12 @@ struct x86_pmu { ...@@ -634,11 +640,12 @@ struct x86_pmu {
pebs_no_xmm_regs :1; pebs_no_xmm_regs :1;
int pebs_record_size; int pebs_record_size;
int pebs_buffer_size; int pebs_buffer_size;
int max_pebs_events;
void (*drain_pebs)(struct pt_regs *regs); void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints; struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event); void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
unsigned long large_pebs_flags; unsigned long large_pebs_flags;
u64 rtm_abort_event;
/* /*
* Intel LBR * Intel LBR
...@@ -978,6 +985,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); ...@@ -978,6 +985,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
void intel_pmu_auto_reload_read(struct perf_event *event); void intel_pmu_auto_reload_read(struct perf_event *event);
void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
void intel_ds_init(void); void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
......
...@@ -116,6 +116,7 @@ ...@@ -116,6 +116,7 @@
#define LBR_INFO_CYCLES 0xffff #define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
...@@ -87,6 +89,12 @@ ...@@ -87,6 +89,12 @@
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
#define ARCH_PERFMON_EVENTS_COUNT 7 #define ARCH_PERFMON_EVENTS_COUNT 7
#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
#define PEBS_DATACFG_GP BIT_ULL(1)
#define PEBS_DATACFG_XMMS BIT_ULL(2)
#define PEBS_DATACFG_LBRS BIT_ULL(3)
#define PEBS_DATACFG_LBR_SHIFT 24
/* /*
* Intel "Architectural Performance Monitoring" CPUID * Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details: * detection/enumeration details:
...@@ -176,6 +184,41 @@ struct x86_pmu_capability { ...@@ -176,6 +184,41 @@ struct x86_pmu_capability {
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
* Adaptive PEBS v4
*/
struct pebs_basic {
u64 format_size;
u64 ip;
u64 applicable_counters;
u64 tsc;
};
struct pebs_meminfo {
u64 address;
u64 aux;
u64 latency;
u64 tsx_tuning;
};
struct pebs_gprs {
u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
u64 r8, r9, r10, r11, r12, r13, r14, r15;
};
struct pebs_xmm {
u64 xmm[16*2]; /* two entries for each register */
};
struct pebs_lbr_entry {
u64 from, to, info;
};
struct pebs_lbr {
struct pebs_lbr_entry lbr[0]; /* Variable length */
};
/* /*
* IBS cpuid feature detection * IBS cpuid feature detection
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment