Commit 4a2b88eb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 perf event updates from Ingo Molnar:

 - Add support for Intel Sapphire Rapids server CPU uncore events

 - Allow the AMD uncore driver to be built as a module

 - Misc cleanups and fixes

* tag 'perf-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  perf/x86/amd/ibs: Add bitfield definitions in new <asm/amd-ibs.h> header
  perf/amd/uncore: Allow the driver to be built as a module
  x86/cpu: Add get_llc_id() helper function
  perf/amd/uncore: Clean up header use, use <linux/ include paths instead of <asm/
  perf/amd/uncore: Simplify code, use free_percpu()'s built-in check for NULL
  perf/hw_breakpoint: Replace deprecated CPU-hotplug functions
  perf/x86/intel: Replace deprecated CPU-hotplug functions
  perf/x86: Remove unused assignment to pointer 'e'
  perf/x86/intel/uncore: Fix IIO cleanup mapping procedure for SNR/ICX
  perf/x86/intel/uncore: Support IMC free-running counters on Sapphire Rapids server
  perf/x86/intel/uncore: Support IIO free-running counters on Sapphire Rapids server
  perf/x86/intel/uncore: Factor out snr_uncore_mmio_map()
  perf/x86/intel/uncore: Add alias PMU name
  perf/x86/intel/uncore: Add Sapphire Rapids server MDF support
  perf/x86/intel/uncore: Add Sapphire Rapids server M3UPI support
  perf/x86/intel/uncore: Add Sapphire Rapids server UPI support
  perf/x86/intel/uncore: Add Sapphire Rapids server M2M support
  perf/x86/intel/uncore: Add Sapphire Rapids server IMC support
  perf/x86/intel/uncore: Add Sapphire Rapids server PCU support
  perf/x86/intel/uncore: Add Sapphire Rapids server M2PCIe support
  ...
parents 5d3c0db4 6a371baf
What: /sys/bus/event_source/devices/uncore_*/alias
Date: June 2021
KernelVersion: 5.15
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Read-only. An attribute to describe the alias name of
the uncore PMU if an alias exists on some platforms.
The 'perf(1)' tool should treat both names the same.
They both can be used to access the uncore PMU.
Example:
$ cat /sys/devices/uncore_cha_2/alias
uncore_type_0_2
...@@ -34,4 +34,14 @@ config PERF_EVENTS_AMD_POWER ...@@ -34,4 +34,14 @@ config PERF_EVENTS_AMD_POWER
(CPUID Fn8000_0007_EDX[12]) interface to calculate the (CPUID Fn8000_0007_EDX[12]) interface to calculate the
average power consumption on Family 15h processors. average power consumption on Family 15h processors.
config PERF_EVENTS_AMD_UNCORE
tristate "AMD Uncore performance events"
depends on PERF_EVENTS && CPU_SUP_AMD
default y
help
Include support for AMD uncore performance events for use with
e.g., perf stat -e amd_l3/.../,amd_df/.../.
To compile this driver as a module, choose M here: the
module will be called 'amd-uncore'.
endmenu endmenu
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o obj-$(CONFIG_CPU_SUP_AMD) += core.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
amd-uncore-objs := uncore.o
ifdef CONFIG_AMD_IOMMU ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += iommu.o obj-$(CONFIG_CPU_SUP_AMD) += iommu.o
endif endif
...@@ -26,6 +26,7 @@ static u32 ibs_caps; ...@@ -26,6 +26,7 @@ static u32 ibs_caps;
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/amd-ibs.h>
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
...@@ -100,15 +101,6 @@ struct perf_ibs { ...@@ -100,15 +101,6 @@ struct perf_ibs {
u64 (*get_count)(u64 config); u64 (*get_count)(u64 config);
}; };
struct perf_ibs_data {
u32 size;
union {
u32 data[0]; /* data buffer starts here */
u32 caps;
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
static int static int
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
{ {
...@@ -329,11 +321,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, ...@@ -329,11 +321,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
static u64 get_ibs_fetch_count(u64 config) static u64 get_ibs_fetch_count(u64 config)
{ {
return (config & IBS_FETCH_CNT) >> 12; union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config;
return fetch_ctl.fetch_cnt << 4;
} }
static u64 get_ibs_op_count(u64 config) static u64 get_ibs_op_count(u64 config)
{ {
union ibs_op_ctl op_ctl = (union ibs_op_ctl)config;
u64 count = 0; u64 count = 0;
/* /*
...@@ -341,12 +336,12 @@ static u64 get_ibs_op_count(u64 config) ...@@ -341,12 +336,12 @@ static u64 get_ibs_op_count(u64 config)
* and the lower 7 bits of CurCnt are randomized. * and the lower 7 bits of CurCnt are randomized.
* Otherwise CurCnt has the full 27-bit current counter value. * Otherwise CurCnt has the full 27-bit current counter value.
*/ */
if (config & IBS_OP_VAL) { if (op_ctl.op_val) {
count = (config & IBS_OP_MAX_CNT) << 4; count = op_ctl.opmaxcnt << 4;
if (ibs_caps & IBS_CAPS_OPCNTEXT) if (ibs_caps & IBS_CAPS_OPCNTEXT)
count += config & IBS_OP_MAX_CNT_EXT_MASK; count += op_ctl.opmaxcnt_ext << 20;
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) { } else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
count = (config & IBS_OP_CUR_CNT) >> 32; count = op_ctl.opcurcnt;
} }
return count; return count;
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/cpufeature.h>
#include <linux/smp.h>
#include <asm/cpufeature.h>
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/smp.h>
#define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4 #define NUM_COUNTERS_L2 4
...@@ -347,6 +347,7 @@ static struct pmu amd_nb_pmu = { ...@@ -347,6 +347,7 @@ static struct pmu amd_nb_pmu = {
.stop = amd_uncore_stop, .stop = amd_uncore_stop,
.read = amd_uncore_read, .read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
}; };
static struct pmu amd_llc_pmu = { static struct pmu amd_llc_pmu = {
...@@ -360,6 +361,7 @@ static struct pmu amd_llc_pmu = { ...@@ -360,6 +361,7 @@ static struct pmu amd_llc_pmu = {
.stop = amd_uncore_stop, .stop = amd_uncore_stop,
.read = amd_uncore_read, .read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
}; };
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
...@@ -452,7 +454,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu) ...@@ -452,7 +454,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
if (amd_uncore_llc) { if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu); uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
uncore->id = per_cpu(cpu_llc_id, cpu); uncore->id = get_llc_id(cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore; *per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
...@@ -659,12 +661,34 @@ static int __init amd_uncore_init(void) ...@@ -659,12 +661,34 @@ static int __init amd_uncore_init(void)
fail_llc: fail_llc:
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu); perf_pmu_unregister(&amd_nb_pmu);
if (amd_uncore_llc) free_percpu(amd_uncore_llc);
free_percpu(amd_uncore_llc);
fail_nb: fail_nb:
if (amd_uncore_nb) free_percpu(amd_uncore_nb);
free_percpu(amd_uncore_nb);
return ret; return ret;
} }
device_initcall(amd_uncore_init);
static void __exit amd_uncore_exit(void)
{
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
perf_pmu_unregister(&amd_llc_pmu);
free_percpu(amd_uncore_llc);
amd_uncore_llc = NULL;
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
perf_pmu_unregister(&amd_nb_pmu);
free_percpu(amd_uncore_nb);
amd_uncore_nb = NULL;
}
}
module_init(amd_uncore_init);
module_exit(amd_uncore_exit);
MODULE_DESCRIPTION("AMD Uncore Driver");
MODULE_LICENSE("GPL v2");
...@@ -1087,10 +1087,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ...@@ -1087,10 +1087,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* validate an event group (assign == NULL) * validate an event group (assign == NULL)
*/ */
if (!unsched && assign) { if (!unsched && assign) {
for (i = 0; i < n; i++) { for (i = 0; i < n; i++)
e = cpuc->event_list[i];
static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
}
} else { } else {
for (i = n0; i < n; i++) { for (i = n0; i < n; i++) {
e = cpuc->event_list[i]; e = cpuc->event_list[i];
......
...@@ -5032,9 +5032,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev, ...@@ -5032,9 +5032,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev,
x86_pmu.attr_freeze_on_smi = val; x86_pmu.attr_freeze_on_smi = val;
get_online_cpus(); cpus_read_lock();
on_each_cpu(flip_smm_bit, &val, 1); on_each_cpu(flip_smm_bit, &val, 1);
put_online_cpus(); cpus_read_unlock();
done: done:
mutex_unlock(&freeze_on_smi_mutex); mutex_unlock(&freeze_on_smi_mutex);
...@@ -5077,9 +5077,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev, ...@@ -5077,9 +5077,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev,
allow_tsx_force_abort = val; allow_tsx_force_abort = val;
get_online_cpus(); cpus_read_lock();
on_each_cpu(update_tfa_sched, NULL, 1); on_each_cpu(update_tfa_sched, NULL, 1);
put_online_cpus(); cpus_read_unlock();
return count; return count;
} }
......
...@@ -1708,7 +1708,7 @@ static __init int pt_init(void) ...@@ -1708,7 +1708,7 @@ static __init int pt_init(void)
if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV; return -ENODEV;
get_online_cpus(); cpus_read_lock();
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
u64 ctl; u64 ctl;
...@@ -1716,7 +1716,7 @@ static __init int pt_init(void) ...@@ -1716,7 +1716,7 @@ static __init int pt_init(void)
if (!ret && (ctl & RTIT_CTL_TRACEEN)) if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++; prior_warn++;
} }
put_online_cpus(); cpus_read_unlock();
if (prior_warn) { if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt); x86_add_exclusive(x86_lbr_exclusive_pt);
......
...@@ -842,6 +842,18 @@ static const struct attribute_group uncore_pmu_attr_group = { ...@@ -842,6 +842,18 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs, .attrs = uncore_pmu_attrs,
}; };
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
if (type->num_boxes == 1)
sprintf(pmu_name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu_name, "uncore_type_%u_%d",
type->type_id, type->box_ids[pmu->pmu_idx]);
}
}
static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
{ {
struct intel_uncore_type *type = pmu->type; struct intel_uncore_type *type = pmu->type;
...@@ -851,12 +863,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) ...@@ -851,12 +863,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use uncore_type_&typeid_&boxid as name. * Use uncore_type_&typeid_&boxid as name.
*/ */
if (!type->name) { if (!type->name) {
if (type->num_boxes == 1) uncore_get_alias_name(pmu->name, pmu);
sprintf(pmu->name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu->name, "uncore_type_%u_%d",
type->type_id, type->box_ids[pmu->pmu_idx]);
}
return; return;
} }
...@@ -865,9 +872,13 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) ...@@ -865,9 +872,13 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
sprintf(pmu->name, "uncore_%s", type->name); sprintf(pmu->name, "uncore_%s", type->name);
else else
sprintf(pmu->name, "uncore"); sprintf(pmu->name, "uncore");
} else } else {
sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx); /*
* Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
}
} }
static int uncore_pmu_register(struct intel_uncore_pmu *pmu) static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
...@@ -1663,6 +1674,7 @@ struct intel_uncore_init_fun { ...@@ -1663,6 +1674,7 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void); void (*cpu_init)(void);
int (*pci_init)(void); int (*pci_init)(void);
void (*mmio_init)(void); void (*mmio_init)(void);
bool use_discovery;
}; };
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
...@@ -1765,6 +1777,13 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = { ...@@ -1765,6 +1777,13 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
.mmio_init = snr_uncore_mmio_init, .mmio_init = snr_uncore_mmio_init,
}; };
static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
.cpu_init = spr_uncore_cpu_init,
.pci_init = spr_uncore_pci_init,
.mmio_init = spr_uncore_mmio_init,
.use_discovery = true,
};
static const struct intel_uncore_init_fun generic_uncore_init __initconst = { static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
.cpu_init = intel_uncore_generic_uncore_cpu_init, .cpu_init = intel_uncore_generic_uncore_cpu_init,
.pci_init = intel_uncore_generic_uncore_pci_init, .pci_init = intel_uncore_generic_uncore_pci_init,
...@@ -1809,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { ...@@ -1809,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{}, {},
}; };
...@@ -1832,8 +1852,13 @@ static int __init intel_uncore_init(void) ...@@ -1832,8 +1852,13 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else else
return -ENODEV; return -ENODEV;
} else } else {
uncore_init = (struct intel_uncore_init_fun *)id->driver_data; uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_no_discover && uncore_init->use_discovery)
return -ENODEV;
if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
return -ENODEV;
}
if (uncore_init->pci_init) { if (uncore_init->pci_init) {
pret = uncore_init->pci_init(); pret = uncore_init->pci_init();
......
...@@ -561,6 +561,7 @@ struct event_constraint * ...@@ -561,6 +561,7 @@ struct event_constraint *
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu);
extern struct intel_uncore_type *empty_uncore[]; extern struct intel_uncore_type *empty_uncore[];
extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_msr_uncores;
...@@ -608,6 +609,9 @@ void snr_uncore_mmio_init(void); ...@@ -608,6 +609,9 @@ void snr_uncore_mmio_init(void);
int icx_uncore_pci_init(void); int icx_uncore_pci_init(void);
void icx_uncore_cpu_init(void); void icx_uncore_cpu_init(void);
void icx_uncore_mmio_init(void); void icx_uncore_mmio_init(void);
int spr_uncore_pci_init(void);
void spr_uncore_cpu_init(void);
void spr_uncore_mmio_init(void);
/* uncore_nhmex.c */ /* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void); void nhmex_uncore_cpu_init(void);
...@@ -337,17 +337,17 @@ static const struct attribute_group generic_uncore_format_group = { ...@@ -337,17 +337,17 @@ static const struct attribute_group generic_uncore_format_group = {
.attrs = generic_uncore_formats_attr, .attrs = generic_uncore_formats_attr,
}; };
static void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
{ {
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
} }
static void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
{ {
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
} }
static void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
{ {
wrmsrl(uncore_msr_box_ctl(box), 0); wrmsrl(uncore_msr_box_ctl(box), 0);
} }
...@@ -377,7 +377,7 @@ static struct intel_uncore_ops generic_uncore_msr_ops = { ...@@ -377,7 +377,7 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
.read_counter = uncore_msr_read_counter, .read_counter = uncore_msr_read_counter,
}; };
static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
{ {
struct pci_dev *pdev = box->pci_dev; struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box); int box_ctl = uncore_pci_box_ctl(box);
...@@ -386,7 +386,7 @@ static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) ...@@ -386,7 +386,7 @@ static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
} }
static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
{ {
struct pci_dev *pdev = box->pci_dev; struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box); int box_ctl = uncore_pci_box_ctl(box);
...@@ -394,7 +394,7 @@ static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) ...@@ -394,7 +394,7 @@ static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
} }
static void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
{ {
struct pci_dev *pdev = box->pci_dev; struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box); int box_ctl = uncore_pci_box_ctl(box);
...@@ -411,8 +411,8 @@ static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box, ...@@ -411,8 +411,8 @@ static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, hwc->config); pci_write_config_dword(pdev, hwc->config_base, hwc->config);
} }
static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
struct perf_event *event) struct perf_event *event)
{ {
struct pci_dev *pdev = box->pci_dev; struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
...@@ -420,8 +420,8 @@ static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, ...@@ -420,8 +420,8 @@ static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, 0); pci_write_config_dword(pdev, hwc->config_base, 0);
} }
static u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
struct perf_event *event) struct perf_event *event)
{ {
struct pci_dev *pdev = box->pci_dev; struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
...@@ -454,7 +454,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) ...@@ -454,7 +454,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
} }
static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{ {
unsigned int box_ctl = generic_uncore_mmio_box_ctl(box); unsigned int box_ctl = generic_uncore_mmio_box_ctl(box);
struct intel_uncore_type *type = box->pmu->type; struct intel_uncore_type *type = box->pmu->type;
...@@ -478,7 +478,7 @@ static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) ...@@ -478,7 +478,7 @@ static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr); writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr);
} }
static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
{ {
if (!box->io_addr) if (!box->io_addr)
return; return;
...@@ -486,7 +486,7 @@ static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) ...@@ -486,7 +486,7 @@ static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr); writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr);
} }
static void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
{ {
if (!box->io_addr) if (!box->io_addr)
return; return;
...@@ -505,8 +505,8 @@ static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, ...@@ -505,8 +505,8 @@ static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
writel(hwc->config, box->io_addr + hwc->config_base); writel(hwc->config, box->io_addr + hwc->config_base);
} }
static void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event) struct perf_event *event)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
...@@ -568,8 +568,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id, ...@@ -568,8 +568,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
return true; return true;
} }
static struct intel_uncore_type ** struct intel_uncore_type **
intel_uncore_generic_init_uncores(enum uncore_access_type type_id) intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra)
{ {
struct intel_uncore_discovery_type *type; struct intel_uncore_discovery_type *type;
struct intel_uncore_type **uncores; struct intel_uncore_type **uncores;
...@@ -577,7 +577,7 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id) ...@@ -577,7 +577,7 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
struct rb_node *node; struct rb_node *node;
int i = 0; int i = 0;
uncores = kcalloc(num_discovered_types[type_id] + 1, uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1,
sizeof(struct intel_uncore_type *), GFP_KERNEL); sizeof(struct intel_uncore_type *), GFP_KERNEL);
if (!uncores) if (!uncores)
return empty_uncore; return empty_uncore;
...@@ -606,17 +606,17 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id) ...@@ -606,17 +606,17 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
void intel_uncore_generic_uncore_cpu_init(void) void intel_uncore_generic_uncore_cpu_init(void)
{ {
uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR); uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0);
} }
int intel_uncore_generic_uncore_pci_init(void) int intel_uncore_generic_uncore_pci_init(void)
{ {
uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI); uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0);
return 0; return 0;
} }
void intel_uncore_generic_uncore_mmio_init(void) void intel_uncore_generic_uncore_mmio_init(void)
{ {
uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO); uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0);
} }
...@@ -129,3 +129,24 @@ void intel_uncore_clear_discovery_tables(void); ...@@ -129,3 +129,24 @@ void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void); void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void); int intel_uncore_generic_uncore_pci_init(void);
void intel_uncore_generic_uncore_mmio_init(void); void intel_uncore_generic_uncore_mmio_init(void);
void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
struct perf_event *event);
struct intel_uncore_type **
intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
*/
#include <asm/msr-index.h>
/*
* IBS Hardware MSRs
*/
/* MSR 0xc0011030: IBS Fetch Control */
union ibs_fetch_ctl {
__u64 val;
struct {
__u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
fetch_cnt:16, /* 16-31: instruction fetch count */
fetch_lat:16, /* 32-47: instruction fetch latency */
fetch_en:1, /* 48: instruction fetch enable */
fetch_val:1, /* 49: instruction fetch valid */
fetch_comp:1, /* 50: instruction fetch complete */
ic_miss:1, /* 51: i-cache miss */
phy_addr_valid:1,/* 52: physical address valid */
l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
* (needs IbsPhyAddrValid) */
l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
rand_en:1, /* 57: random tagging enable */
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
* (needs IbsFetchComp) */
reserved:5; /* 59-63: reserved */
};
};
/* MSR 0xc0011033: IBS Execution Control */
union ibs_op_ctl {
__u64 val;
struct {
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
reserved0:1, /* 16: reserved */
op_en:1, /* 17: op sampling enable */
op_val:1, /* 18: op sample valid */
cnt_ctl:1, /* 19: periodic op counter control */
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved1:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
reserved2:5; /* 59-63: reserved */
};
};
/* MSR 0xc0011035: IBS Op Data 2 */
union ibs_op_data {
__u64 val;
struct {
__u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
reserved1:2, /* 32-33: reserved */
op_return:1, /* 34: return op */
op_brn_taken:1, /* 35: taken branch op */
op_brn_misp:1, /* 36: mispredicted branch op */
op_brn_ret:1, /* 37: branch op retired */
op_rip_invalid:1, /* 38: RIP is invalid */
op_brn_fuse:1, /* 39: fused branch op */
op_microcode:1, /* 40: microcode op */
reserved2:23; /* 41-63: reserved */
};
};
/* MSR 0xc0011036: IBS Op Data 2 */
union ibs_op_data2 {
__u64 val;
struct {
__u64 data_src:3, /* 0-2: data source */
reserved0:1, /* 3: reserved */
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
reserved1:57; /* 5-63: reserved */
};
};
/* MSR 0xc0011037: IBS Op Data 3 */
union ibs_op_data3 {
__u64 val;
struct {
__u64 ld_op:1, /* 0: load op */
st_op:1, /* 1: store op */
dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
dc_miss:1, /* 7: data cache miss */
dc_mis_acc:1, /* 8: misaligned access */
reserved:4, /* 9-12: reserved */
dc_wc_mem_acc:1, /* 13: write combining memory access */
dc_uc_mem_acc:1, /* 14: uncacheable memory access */
dc_locked_op:1, /* 15: locked operation */
dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
dc_lin_addr_valid:1, /* 17: data cache linear address valid */
dc_phy_addr_valid:1, /* 18: data cache physical address valid */
dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
l2_miss:1, /* 20: L2 cache miss */
sw_pf:1, /* 21: software prefetch */
op_mem_width:4, /* 22-25: load/store size in bytes */
op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
dc_miss_lat:16, /* 32-47: data cache miss latency */
tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
};
};
/* MSR 0xc001103c: IBS Fetch Control Extended */
union ic_ibs_extd_ctl {
__u64 val;
struct {
__u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
reserved:48; /* 16-63: reserved */
};
};
/*
* IBS driver related
*/
struct perf_ibs_data {
u32 size;
union {
u32 data[0]; /* data buffer starts here */
u32 caps;
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
...@@ -795,6 +795,8 @@ extern int set_tsc_mode(unsigned int val); ...@@ -795,6 +795,8 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow); DECLARE_PER_CPU(u64, msr_misc_features_shadow);
extern u16 get_llc_id(unsigned int cpu);
#ifdef CONFIG_CPU_SUP_AMD #ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void); extern u32 amd_get_highest_perf(void);
......
...@@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) ...@@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
node = numa_cpu_node(cpu); node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
node = per_cpu(cpu_llc_id, cpu); node = get_llc_id(cpu);
/* /*
* On multi-fabric platform (e.g. Numascale NumaChip) a * On multi-fabric platform (e.g. Numascale NumaChip) a
......
...@@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings); ...@@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */ /* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
u16 get_llc_id(unsigned int cpu)
{
return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(get_llc_id);
/* correctly size the local cpu masks */ /* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void) void __init setup_cpu_local_masks(void)
{ {
......
...@@ -568,7 +568,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, ...@@ -568,7 +568,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
if (!cpu_events) if (!cpu_events)
return (void __percpu __force *)ERR_PTR(-ENOMEM); return (void __percpu __force *)ERR_PTR(-ENOMEM);
get_online_cpus(); cpus_read_lock();
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
bp = perf_event_create_kernel_counter(attr, cpu, NULL, bp = perf_event_create_kernel_counter(attr, cpu, NULL,
triggered, context); triggered, context);
...@@ -579,7 +579,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, ...@@ -579,7 +579,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
per_cpu(*cpu_events, cpu) = bp; per_cpu(*cpu_events, cpu) = bp;
} }
put_online_cpus(); cpus_read_unlock();
if (likely(!err)) if (likely(!err))
return cpu_events; return cpu_events;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment