Commit 0f9e0422 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

 - Add support for accessing the general purpose counters on Alder Lake
   via MMIO

 - Add new LBR format v7 support which is v5 modulo TSX

 - Fix counter enumeration on Alder Lake hybrids

 - Overhaul how context time updates are done and get rid of
   perf_event::shadow_ctx_time.

 - The usual amount of fixes: event mask correction, supported event
   types reporting, etc.

* tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/perf: Avoid warning for Arch LBR without XSAVE
  perf/x86/intel/uncore: Add IMC uncore support for ADL
  perf/x86/intel/lbr: Add static_branch for LBR INFO flags
  perf/x86/intel/lbr: Support LBR format V7
  perf/x86/rapl: fix AMD event handling
  perf/x86/intel/uncore: Fix CAS_COUNT_WRITE issue for ICX
  perf/x86/intel: Add a quirk for the calculation of the number of counters on Alder Lake
  perf: Fix perf_event_read_local() time
parents e783362e 8c16dc04
...@@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void) ...@@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters; pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
} }
/*
* Quirk: For some Alder Lake machine, when all E-cores are disabled in
* a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
* the X86_FEATURE_HYBRID_CPU is still set. The above codes will
* mistakenly add extra counters for P-cores. Correct the number of
* counters here.
*/
if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint) pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
...@@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void) ...@@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void)
} }
if (x86_pmu.lbr_nr) { if (x86_pmu.lbr_nr) {
intel_pmu_lbr_init();
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */ /* only support branch_stack snapshot for perfmon >= v2 */
......
...@@ -8,14 +8,6 @@ ...@@ -8,14 +8,6 @@
#include "../perf_event.h" #include "../perf_event.h"
static const enum {
LBR_EIP_FLAGS = 1,
LBR_TSX = 2,
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
};
/* /*
* Intel LBR_SELECT bits * Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
...@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void) ...@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) { for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0); wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0); wrmsrl(x86_pmu.lbr_to + i, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0); wrmsrl(x86_pmu.lbr_info + i, 0);
} }
} }
...@@ -305,11 +297,10 @@ enum { ...@@ -305,11 +297,10 @@ enum {
*/ */
static inline bool lbr_from_signext_quirk_needed(void) static inline bool lbr_from_signext_quirk_needed(void)
{ {
int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM); boot_cpu_has(X86_FEATURE_RTM);
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); return !tsx_support && x86_pmu.lbr_has_tsx;
} }
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
...@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) ...@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx) void intel_pmu_lbr_restore(void *ctx)
{ {
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx; struct x86_perf_task_context *task_ctx = ctx;
int i; bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos = task_ctx->tos; u64 tos = task_ctx->tos;
unsigned lbr_idx, mask;
int i;
mask = x86_pmu.lbr_nr - 1; mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) { for (i = 0; i < task_ctx->valid_lbrs; i++) {
...@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx) ...@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask; lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0); wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0); wrlbr_to(lbr_idx, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) if (need_info)
wrlbr_info(lbr_idx, 0); wrlbr_info(lbr_idx, 0);
} }
...@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx) ...@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx) void intel_pmu_lbr_save(void *ctx)
{ {
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx; struct x86_perf_task_context *task_ctx = ctx;
bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask; unsigned lbr_idx, mask;
u64 tos; u64 tos;
int i; int i;
...@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) ...@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{ {
bool need_info = false, call_stack = false; bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1; unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos(); u64 tos = intel_pmu_lbr_tos();
int i; int i;
int out = 0; int out = 0;
...@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) ...@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask; unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
int skip = 0;
u16 cycles = 0; u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL); from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL); to = rdlbr_to(lbr_idx, NULL);
...@@ -845,38 +833,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) ...@@ -845,38 +833,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from) if (call_stack && !from)
break; break;
if (lbr_format == LBR_FORMAT_INFO && need_info) { if (x86_pmu.lbr_has_info) {
if (need_info) {
u64 info; u64 info;
info = rdlbr_info(lbr_idx, NULL); info = rdlbr_info(lbr_idx, NULL);
mis = !!(info & LBR_INFO_MISPRED); mis = !!(info & LBR_INFO_MISPRED);
pred = !mis; pred = !mis;
cycles = (info & LBR_INFO_CYCLES);
if (x86_pmu.lbr_has_tsx) {
in_tx = !!(info & LBR_INFO_IN_TX); in_tx = !!(info & LBR_INFO_IN_TX);
abort = !!(info & LBR_INFO_ABORT); abort = !!(info & LBR_INFO_ABORT);
cycles = (info & LBR_INFO_CYCLES);
} }
if (lbr_format == LBR_FORMAT_TIME) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
skip = 1;
cycles = ((to >> 48) & LBR_INFO_CYCLES);
to = (u64)((((s64)to) << 16) >> 16);
} }
} else {
int skip = 0;
if (lbr_flags & LBR_EIP_FLAGS) { if (x86_pmu.lbr_from_flags) {
mis = !!(from & LBR_FROM_FLAG_MISPRED); mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis; pred = !mis;
skip = 1; skip = 1;
} }
if (lbr_flags & LBR_TSX) { if (x86_pmu.lbr_has_tsx) {
in_tx = !!(from & LBR_FROM_FLAG_IN_TX); in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
abort = !!(from & LBR_FROM_FLAG_ABORT); abort = !!(from & LBR_FROM_FLAG_ABORT);
skip = 3; skip = 3;
} }
from = (u64)((((s64)from) << skip) >> skip); from = (u64)((((s64)from) << skip) >> skip);
if (x86_pmu.lbr_to_cycles) {
cycles = ((to >> 48) & LBR_INFO_CYCLES);
to = (u64)((((s64)to) << 16) >> 16);
}
}
/* /*
* Some CPUs report duplicated abort records, * Some CPUs report duplicated abort records,
* with the second entry not having an abort bit set. * with the second entry not having an abort bit set.
...@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) ...@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos; cpuc->lbr_stack.hw_idx = tos;
} }
static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
static __always_inline int get_lbr_br_type(u64 info) static __always_inline int get_lbr_br_type(u64 info)
{ {
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type) int type = 0;
return 0;
return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; if (static_branch_likely(&x86_lbr_type))
type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
return type;
} }
static __always_inline bool get_lbr_mispred(u64 info) static __always_inline bool get_lbr_mispred(u64 info)
{ {
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred) bool mispred = 0;
return 0;
return !!(info & LBR_INFO_MISPRED); if (static_branch_likely(&x86_lbr_mispred))
} mispred = !!(info & LBR_INFO_MISPRED);
static __always_inline bool get_lbr_predicted(u64 info) return mispred;
{
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
return 0;
return !(info & LBR_INFO_MISPRED);
} }
static __always_inline u16 get_lbr_cycles(u64 info) static __always_inline u16 get_lbr_cycles(u64 info)
{ {
u16 cycles = info & LBR_INFO_CYCLES;
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
!(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID)) (!static_branch_likely(&x86_lbr_cycles) ||
return 0; !(info & LBR_INFO_CYC_CNT_VALID)))
cycles = 0;
return info & LBR_INFO_CYCLES; return cycles;
} }
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
...@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, ...@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from; e->from = from;
e->to = to; e->to = to;
e->mispred = get_lbr_mispred(info); e->mispred = get_lbr_mispred(info);
e->predicted = get_lbr_predicted(info); e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX); e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT); e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info); e->cycles = get_lbr_cycles(info);
...@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) ...@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
(x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO; reg->config |= LBR_NO_INFO;
return 0; return 0;
...@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void) ...@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
} }
void intel_pmu_lbr_init(void)
{
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
fallthrough;
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
case LBR_FORMAT_INFO:
x86_pmu.lbr_has_tsx = 1;
fallthrough;
case LBR_FORMAT_INFO2:
x86_pmu.lbr_has_info = 1;
break;
case LBR_FORMAT_TIME:
x86_pmu.lbr_from_flags = 1;
x86_pmu.lbr_to_cycles = 1;
break;
}
if (x86_pmu.lbr_has_info) {
/*
* Only used in combination with baseline pebs.
*/
static_branch_enable(&x86_lbr_mispred);
static_branch_enable(&x86_lbr_cycles);
}
}
/* /*
* LBR state size is variable based on the max number of registers. * LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match * This calculates the expected state size, which should match
...@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void) ...@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure. * Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match. * Disable LBR XSAVES support if the size doesn't match.
*/ */
if (xfeature_size(XFEATURE_LBR) == 0)
return false;
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false; return false;
...@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void) ...@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type; x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr; x86_pmu.lbr_nr = lbr_nr;
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
if (x86_pmu.lbr_timed_lbr)
static_branch_enable(&x86_lbr_cycles);
if (x86_pmu.lbr_br_type)
static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available(); arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) { if (arch_lbr_xsave) {
......
...@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { ...@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = { static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init, .cpu_init = adl_uncore_cpu_init,
.mmio_init = tgl_uncore_mmio_init, .mmio_init = adl_uncore_mmio_init,
}; };
static const struct intel_uncore_init_fun icx_uncore_init __initconst = { static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
......
...@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void); ...@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void); void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void); void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void); void icl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void); void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void);
void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid); int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */ /* uncore_snbep.c */
......
...@@ -494,7 +494,7 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) ...@@ -494,7 +494,7 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr); writel(0, box->io_addr);
} }
static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
struct perf_event *event) struct perf_event *event)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
......
...@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); ...@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event); struct perf_event *event);
void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h" #include "uncore.h"
#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */ /* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
...@@ -64,6 +65,20 @@ ...@@ -64,6 +65,20 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53 #define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660 #define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641 #define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
/* SNB event control */ /* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
...@@ -155,6 +170,7 @@ ...@@ -155,6 +170,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
...@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { ...@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC), PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
}, },
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ } { /* end: all zeroes */ }
}; };
...@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) ...@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) static void __uncore_imc_init_box(struct intel_uncore_box *box,
unsigned int base_offset)
{ {
struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu; struct intel_uncore_pmu *pmu = box->pmu;
...@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) ...@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32); addr |= ((resource_size_t)mch_bar << 32);
#endif #endif
addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size); box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr) if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
} }
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
{
__uncore_imc_init_box(box, 0);
}
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box, .init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box, .exit_box = uncore_mmio_exit_box,
...@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void) ...@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void)
} }
/* end of Tiger Lake MMIO uncore support */ /* end of Tiger Lake MMIO uncore support */
/* Alder Lake MMIO uncore support */
#define ADL_UNCORE_IMC_BASE 0xd900
#define ADL_UNCORE_IMC_MAP_SIZE 0x200
#define ADL_UNCORE_IMC_CTR 0xe8
#define ADL_UNCORE_IMC_CTRL 0xd0
#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
#define ADL_UNCORE_IMC_BOX_CTL 0xc4
#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
ADL_UNCORE_IMC_CTL_RST_CTRS)
static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
{
__uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
/* The global control in MC1 can control both MCs. */
if (box->io_addr && (box->pmu->pmu_idx == 1))
writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
}
static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
}
static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
writel(0, box->io_addr + uncore_mmio_box_ctl(box));
}
static struct intel_uncore_ops adl_uncore_mmio_ops = {
.init_box = adl_uncore_imc_init_box,
.exit_box = uncore_mmio_exit_box,
.disable_box = adl_uncore_mmio_disable_box,
.enable_box = adl_uncore_mmio_enable_box,
.disable_event = intel_generic_uncore_mmio_disable_event,
.enable_event = intel_generic_uncore_mmio_enable_event,
.read_counter = uncore_mmio_read_counter,
};
#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
ADL_UNC_CTL_CHMASK_MASK | \
SNB_UNC_CTL_EDGE_DET)
static struct attribute *adl_uncore_imc_formats_attr[] = {
&format_attr_event.attr,
&format_attr_chmask.attr,
&format_attr_edge.attr,
NULL,
};
static const struct attribute_group adl_uncore_imc_format_group = {
.name = "format",
.attrs = adl_uncore_imc_formats_attr,
};
static struct intel_uncore_type adl_uncore_imc = {
.name = "imc",
.num_counters = 5,
.num_boxes = 2,
.perf_ctr_bits = 64,
.perf_ctr = ADL_UNCORE_IMC_CTR,
.event_ctl = ADL_UNCORE_IMC_CTRL,
.event_mask = ADL_UNC_IMC_EVENT_MASK,
.box_ctl = ADL_UNCORE_IMC_BOX_CTL,
.mmio_offset = 0,
.mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
.ops = &adl_uncore_mmio_ops,
.format_group = &adl_uncore_imc_format_group,
};
enum perf_adl_uncore_imc_freerunning_types {
ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
ADL_MMIO_UNCORE_IMC_DATA_READ,
ADL_MMIO_UNCORE_IMC_DATA_WRITE,
ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
};
static struct freerunning_counters adl_uncore_imc_freerunning[] = {
[ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
[ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
[ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
};
static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
{
__uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
}
static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
.init_box = adl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
.read_counter = uncore_mmio_read_counter,
.hw_config = uncore_freerunning_hw_config,
};
static struct intel_uncore_type adl_uncore_imc_free_running = {
.name = "imc_free_running",
.num_counters = 3,
.num_boxes = 2,
.num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
.mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
.freerunning = adl_uncore_imc_freerunning,
.ops = &adl_uncore_imc_freerunning_ops,
.event_descs = tgl_uncore_imc_events,
.format_group = &tgl_uncore_imc_format_group,
};
static struct intel_uncore_type *adl_mmio_uncores[] = {
&adl_uncore_imc,
&adl_uncore_imc_free_running,
NULL
};
void adl_uncore_mmio_init(void)
{
uncore_mmio_uncores = adl_mmio_uncores;
}
/* end of Alder Lake MMIO uncore support */
...@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = { ...@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48, .fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
.event_descs = hswep_uncore_imc_events, .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK, .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
......
...@@ -215,7 +215,8 @@ enum { ...@@ -215,7 +215,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05, LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06, LBR_FORMAT_TIME = 0x06,
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, LBR_FORMAT_INFO2 = 0x07,
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
}; };
enum { enum {
...@@ -840,6 +841,11 @@ struct x86_pmu { ...@@ -840,6 +841,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
unsigned int lbr_has_info:1;
unsigned int lbr_has_tsx:1;
unsigned int lbr_from_flags:1;
unsigned int lbr_to_cycles:1;
/* /*
* Intel Architectural LBR CPUID Enumeration * Intel Architectural LBR CPUID Enumeration
*/ */
...@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void); ...@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void); void intel_pmu_lbr_init_knl(void);
void intel_pmu_lbr_init(void);
void intel_pmu_arch_lbr_init(void); void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void); void intel_pmu_pebs_data_source_nhm(void);
......
...@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = { ...@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX) * - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures * - want to use same event codes across both architectures
*/ */
static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = { static struct perf_msr amd_rapl_msrs[] = {
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
[PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
}; };
static int rapl_cpu_offline(unsigned int cpu) static int rapl_cpu_offline(unsigned int cpu)
{ {
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
......
...@@ -693,18 +693,6 @@ struct perf_event { ...@@ -693,18 +693,6 @@ struct perf_event {
u64 total_time_running; u64 total_time_running;
u64 tstamp; u64 tstamp;
/*
* timestamp shadows the actual context timing but it can
* be safely used in NMI interrupt context. It reflects the
* context time as it was when the event was last scheduled in,
* or when ctx_sched_in failed to schedule the event because we
* run out of PMC.
*
* ctx_time already accounts for ctx->timestamp. Therefore to
* compute ctx_time for a sample, simply add perf_clock().
*/
u64 shadow_ctx_time;
struct perf_event_attr attr; struct perf_event_attr attr;
u16 header_size; u16 header_size;
u16 id_header_size; u16 id_header_size;
...@@ -852,6 +840,7 @@ struct perf_event_context { ...@@ -852,6 +840,7 @@ struct perf_event_context {
*/ */
u64 time; u64 time;
u64 timestamp; u64 timestamp;
u64 timeoffset;
/* /*
* These fields let us detect when two contexts have both * These fields let us detect when two contexts have both
...@@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern { ...@@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern {
struct perf_cgroup_info { struct perf_cgroup_info {
u64 time; u64 time;
u64 timestamp; u64 timestamp;
u64 timeoffset;
int active;
}; };
struct perf_cgroup { struct perf_cgroup {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment