Commit 1e0c661f authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/perf' into aarch64/for-next/core

Merge in ARM PMU and perf updates for 4.15:

  - Support for the Statistical Profiling Extension
  - Support for Hisilicon's SoC PMU
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parents 611479c7 07141342
* ARMv8.2 Statistical Profiling Extension (SPE) Performance Monitor Units (PMU)
ARMv8.2 introduces the optional Statistical Profiling Extension for collecting
performance sample data using an in-memory trace buffer.
** SPE Required properties:
- compatible : should be one of:
"arm,statistical-profiling-extension-v1"
- interrupts : Exactly 1 PPI must be listed. For heterogeneous systems where
SPE is only supported on a subset of the CPUs, please consult
the arm,gic-v3 binding for details on describing a PPI partition.
** Example:
spe-pmu {
compatible = "arm,statistical-profiling-extension-v1";
interrupts = <GIC_PPI 05 IRQ_TYPE_LEVEL_HIGH &part1>;
};
HiSilicon SoC uncore Performance Monitoring Unit (PMU)
======================================================
The HiSilicon SoC chip includes various independent system device PMUs
such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
independent and have hardware logic to gather statistics and performance
information.
The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
HiSilicon SoC uncore PMU driver
---------------------------------------
Each device PMU has separate registers for event counting, control and
interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
be described in the sysfs, see :
/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
module.
e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
SCCL ID #3.
e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
SCCL ID #1.
The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
ID used to count the uncore PMU event.
Example usage of perf:
$# perf list
hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
------------------------------------------
hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
------------------------------------------
hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
------------------------------------------
hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
------------------------------------------
$# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
The current driver does not support sampling. So "perf record" is unsupported.
Also attach to a task is unsupported as the events are all uncore.
Note: Please contact the maintainer for a complete list of events supported for
the PMU devices in the SoC and its information if needed.
......@@ -6241,6 +6241,13 @@ S: Maintained
F: drivers/net/ethernet/hisilicon/
F: Documentation/devicetree/bindings/net/hisilicon*.txt
HISILICON PMU DRIVER
M: Shaokun Zhang <zhangshaokun@hisilicon.com>
W: http://www.hisilicon.com
S: Supported
F: drivers/perf/hisilicon
F: Documentation/perf/hisi-pmu.txt
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
......
......@@ -31,6 +31,8 @@
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define psb_csync() asm volatile("hint #17" : : : "memory")
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
......
......@@ -172,6 +172,99 @@
#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
/*** Statistical Profiling Extension ***/
/* ID registers */
#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7)
#define SYS_PMSIDR_EL1_FE_SHIFT 0
#define SYS_PMSIDR_EL1_FT_SHIFT 1
#define SYS_PMSIDR_EL1_FL_SHIFT 2
#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3
#define SYS_PMSIDR_EL1_LDS_SHIFT 4
#define SYS_PMSIDR_EL1_ERND_SHIFT 5
#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8
#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL
#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12
#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL
#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16
#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL
#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0
#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU
#define SYS_PMBIDR_EL1_P_SHIFT 4
#define SYS_PMBIDR_EL1_F_SHIFT 5
/* Sampling controls */
#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
#define SYS_PMSCR_EL1_E0SPE_SHIFT 0
#define SYS_PMSCR_EL1_E1SPE_SHIFT 1
#define SYS_PMSCR_EL1_CX_SHIFT 3
#define SYS_PMSCR_EL1_PA_SHIFT 4
#define SYS_PMSCR_EL1_TS_SHIFT 5
#define SYS_PMSCR_EL1_PCT_SHIFT 6
#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0)
#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0
#define SYS_PMSCR_EL2_E2SPE_SHIFT 1
#define SYS_PMSCR_EL2_CX_SHIFT 3
#define SYS_PMSCR_EL2_PA_SHIFT 4
#define SYS_PMSCR_EL2_TS_SHIFT 5
#define SYS_PMSCR_EL2_PCT_SHIFT 6
#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2)
#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3)
#define SYS_PMSIRR_EL1_RND_SHIFT 0
#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8
#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
/* Filtering controls */
#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
#define SYS_PMSFCR_EL1_FE_SHIFT 0
#define SYS_PMSFCR_EL1_FT_SHIFT 1
#define SYS_PMSFCR_EL1_FL_SHIFT 2
#define SYS_PMSFCR_EL1_B_SHIFT 16
#define SYS_PMSFCR_EL1_LD_SHIFT 17
#define SYS_PMSFCR_EL1_ST_SHIFT 18
#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
/* Buffer controls */
#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
#define SYS_PMBLIMITR_EL1_E_SHIFT 0
#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
/* Buffer error reporting */
#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3)
#define SYS_PMBSR_EL1_COLL_SHIFT 16
#define SYS_PMBSR_EL1_S_SHIFT 17
#define SYS_PMBSR_EL1_EA_SHIFT 18
#define SYS_PMBSR_EL1_DL_SHIFT 19
#define SYS_PMBSR_EL1_EC_SHIFT 26
#define SYS_PMBSR_EL1_EC_MASK 0x3fUL
#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0
#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL
#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0
#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL
#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
/*** End of Statistical Profiling Extension ***/
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
......
......@@ -480,14 +480,21 @@ set_hcr:
/* Statistical profiling */
ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer
cbz x0, 6f // Skip if SPE not present
cbnz x2, 5f // VHE?
cbz x0, 7f // Skip if SPE not present
cbnz x2, 6f // VHE?
mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x4, 5f // then permit sampling of physical
mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:
mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x3, x3, x1 // If we don't have VHE, then
b 6f // use EL1&0 translation.
5: // For VHE, use EL2 translation
b 7f // use EL1&0 translation.
6: // For VHE, use EL2 translation
orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
6:
7:
msr mdcr_el2, x3 // Configure debug traps
/* Stage-2 translation */
......
......@@ -65,16 +65,6 @@
default: write_debug(ptr[0], reg, 0); \
}
#define PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
#define PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
#define PMBLIMITR_EL1_E BIT(0)
#define PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
#define PMBIDR_EL1_P BIT(4)
#define psb_csync() asm volatile("hint #17")
static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
{
/* The vcpu can run. but it can't hide. */
......@@ -90,18 +80,18 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
return;
/* Yes; is it owned by EL3? */
reg = read_sysreg_s(PMBIDR_EL1);
if (reg & PMBIDR_EL1_P)
reg = read_sysreg_s(SYS_PMBIDR_EL1);
if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
return;
/* No; is the host actually using the thing? */
reg = read_sysreg_s(PMBLIMITR_EL1);
if (!(reg & PMBLIMITR_EL1_E))
reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
return;
/* Yes; save the control register and disable data generation */
*pmscr_el1 = read_sysreg_s(PMSCR_EL1);
write_sysreg_s(0, PMSCR_EL1);
*pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
write_sysreg_s(0, SYS_PMSCR_EL1);
isb();
/* Now drain all buffered data to memory */
......@@ -122,7 +112,7 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
isb();
/* Re-enable data generation */
write_sysreg_s(pmscr_el1, PMSCR_EL1);
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
}
void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
......
......@@ -17,6 +17,13 @@ config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
config HISI_PMU
bool "HiSilicon SoC PMU"
depends on ARM64 && ACPI
help
Support for HiSilicon SoC uncore performance monitoring
unit (PMU), such as: L3C, HHA and DDRC.
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
......@@ -43,4 +50,12 @@ config XGENE_PMU
help
Say y if you want to use APM X-Gene SoC performance monitors.
config ARM_SPE_PMU
tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
depends on PERF_EVENTS && ARM64
help
Enable perf support for the ARMv8.2 Statistical Profiling
Extension, which provides periodic sampling of operations in
the CPU pipeline and reports this via the perf AUX interface.
endmenu
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
This diff is collapsed.
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* HiSilicon SoC Hardware event counters support
*
* Copyright (C) 2017 Hisilicon Limited
* Author: Anurup M <anurup.m@huawei.com>
* Shaokun Zhang <zhangshaokun@hisilicon.com>
*
* This code is based on the uncore PMUs like arm-cci and arm-ccn.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __HISI_UNCORE_PMU_H__
#define __HISI_UNCORE_PMU_H__
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/types.h>
#undef pr_fmt
#define pr_fmt(fmt) "hisi_pmu: " fmt
#define HISI_MAX_COUNTERS 0x10
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
#define HISI_PMU_ATTR(_name, _func, _config) \
(&((struct dev_ext_attribute[]) { \
{ __ATTR(_name, 0444, _func, NULL), (void *)_config } \
})[0].attr.attr)
#define HISI_PMU_FORMAT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
#define HISI_PMU_EVENT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
struct hisi_pmu;
struct hisi_uncore_ops {
void (*write_evtype)(struct hisi_pmu *, int, u32);
int (*get_event_idx)(struct perf_event *);
u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
void (*write_counter)(struct hisi_pmu *, struct hw_perf_event *, u64);
void (*enable_counter)(struct hisi_pmu *, struct hw_perf_event *);
void (*disable_counter)(struct hisi_pmu *, struct hw_perf_event *);
void (*enable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
void (*start_counters)(struct hisi_pmu *);
void (*stop_counters)(struct hisi_pmu *);
};
struct hisi_pmu_hwevents {
struct perf_event *hw_events[HISI_MAX_COUNTERS];
DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
};
/* Generic pmu struct for different pmu types */
struct hisi_pmu {
struct pmu pmu;
const struct hisi_uncore_ops *ops;
struct hisi_pmu_hwevents pmu_events;
/* associated_cpus: All CPUs associated with the PMU */
cpumask_t associated_cpus;
/* CPU used for counting */
int on_cpu;
int irq;
struct device *dev;
struct hlist_node node;
int sccl_id;
int ccl_id;
void __iomem *base;
/* the ID of the PMU modules */
u32 index_id;
int num_counters;
int counter_bits;
/* check event code range */
int check_event;
};
int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx);
int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
void hisi_uncore_pmu_read(struct perf_event *event);
int hisi_uncore_pmu_add(struct perf_event *event, int flags);
void hisi_uncore_pmu_del(struct perf_event *event, int flags);
void hisi_uncore_pmu_start(struct perf_event *event, int flags);
void hisi_uncore_pmu_stop(struct perf_event *event, int flags);
void hisi_uncore_pmu_set_event_period(struct perf_event *event);
void hisi_uncore_pmu_event_update(struct perf_event *event);
int hisi_uncore_pmu_event_init(struct perf_event *event);
void hisi_uncore_pmu_enable(struct pmu *pmu);
void hisi_uncore_pmu_disable(struct pmu *pmu);
ssize_t hisi_event_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
ssize_t hisi_format_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
ssize_t hisi_cpumask_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf);
int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
#endif /* __HISI_UNCORE_PMU_H__ */
......@@ -153,6 +153,9 @@ enum cpuhp_state {
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
CPUHP_AP_PERF_ARM_CCN_ONLINE,
CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
......
......@@ -941,6 +941,7 @@ enum perf_callchain_context {
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
......
......@@ -411,6 +411,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
return NULL;
}
EXPORT_SYMBOL_GPL(perf_aux_output_begin);
static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
{
......@@ -480,6 +481,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
rb_free_aux(rb);
ring_buffer_put(rb);
}
EXPORT_SYMBOL_GPL(perf_aux_output_end);
/*
* Skip over a given number of bytes in the AUX buffer, due to, for example,
......@@ -505,6 +507,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
return 0;
}
EXPORT_SYMBOL_GPL(perf_aux_output_skip);
void *perf_get_aux(struct perf_output_handle *handle)
{
......@@ -514,6 +517,7 @@ void *perf_get_aux(struct perf_output_handle *handle)
return handle->rb->aux_priv;
}
EXPORT_SYMBOL_GPL(perf_get_aux);
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
......
......@@ -863,6 +863,7 @@ int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity)
return 0;
}
EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition);
void kstat_incr_irq_this_cpu(unsigned int irq)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment