Commit 3bff6112 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "x86 Intel updates:

   - Add Jasper Lake support

   - Add support for TopDown metrics on Ice Lake

   - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support

   - Add a PCI sub driver to support uncore PMUs where the PCI resources
     have been claimed already - extending the range of supported
     systems.

  x86 AMD updates:

   - Restore 'perf stat -a' behaviour to program the uncore PMU to count
     all CPU threads.

   - Fix setting the proper count when sampling Large Increment per
     Cycle events / 'paired' events.

   - Fix IBS Fetch sampling on F17h and some other IBS fine tuning,
     greatly reducing the number of interrupts when large sample periods
     are specified.

   - Extends Family 17h RAPL support to also work on compatible F19h
     machines.

  Core code updates:

   - Fix race in perf_mmap_close()

   - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be
     closed if the leader is removed.

   - Smaller fixes and updates"

* tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
  perf/core: Fix race in the perf_mmap_close() function
  perf/x86: Fix n_metric for cancelled txn
  perf/x86: Fix n_pair for cancelled txn
  x86/events/amd/iommu: Fix sizeof mismatch
  perf/x86/intel: Check perf metrics feature for each CPU
  perf/x86/intel: Fix Ice Lake event constraint table
  perf/x86/intel/uncore: Fix the scale of the IMC free-running events
  perf/x86/intel/uncore: Fix for iio mapping on Skylake Server
  perf/x86/msr: Add Jasper Lake support
  perf/x86/intel: Add Jasper Lake support
  perf/x86/intel/uncore: Reduce the number of CBOX counters
  perf/x86/intel/uncore: Update Ice Lake uncore units
  perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support
  perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge
  perf/x86/intel/uncore: Generic support for the PCI sub driver
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister()
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_register()
  perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu()
  perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info()
  perf/amd/uncore: Inform the user how many counters each uncore PMU has
  ...
parents dd502a81 f91072ed
...@@ -89,6 +89,7 @@ struct perf_ibs { ...@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period; u64 max_period;
unsigned long offset_mask[1]; unsigned long offset_mask[1];
int offset_max; int offset_max;
unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu; struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs; struct attribute **format_attrs;
...@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config) ...@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
{ {
u64 count = 0; u64 count = 0;
if (config & IBS_OP_VAL) /*
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ * If the internal 27-bit counter rolled over, the count is MaxCnt
* and the lower 7 bits of CurCnt are randomized.
if (ibs_caps & IBS_CAPS_RDWROPCNT) * Otherwise CurCnt has the full 27-bit current counter value.
count += (config & IBS_OP_CUR_CNT) >> 32; */
if (config & IBS_OP_VAL) {
count = (config & IBS_OP_MAX_CNT) << 4;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
count += config & IBS_OP_MAX_CNT_EXT_MASK;
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
count = (config & IBS_OP_CUR_CNT) >> 32;
}
return count; return count;
} }
...@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, ...@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config) struct hw_perf_event *hwc, u64 config)
{ {
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); u64 tmp = hwc->config | config;
if (perf_ibs->fetch_count_reset_broken)
wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
} }
/* /*
...@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags) ...@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
u64 period; u64 period, config = 0;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return; return;
...@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags) ...@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0; hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period); perf_ibs_set_period(perf_ibs, hwc, &period);
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
}
config |= period >> 4;
/* /*
* Set STARTED before enabling the hardware, such that a subsequent NMI * Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it. * must observe it.
*/ */
set_bit(IBS_STARTED, pcpu->state); set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state); clear_bit(IBS_STOPPING, pcpu->state);
perf_ibs_enable_event(perf_ibs, hwc, period >> 4); perf_ibs_enable_event(perf_ibs, hwc, config);
perf_event_update_userpage(event); perf_event_update_userpage(event);
} }
...@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) ...@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_ibs_data ibs_data; struct perf_ibs_data ibs_data;
int offset, size, check_rip, offset_max, throttle = 0; int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr; unsigned int msr;
u64 *buf, *config, period; u64 *buf, *config, period, new_config = 0;
if (!test_bit(IBS_STARTED, pcpu->state)) { if (!test_bit(IBS_STARTED, pcpu->state)) {
fail: fail:
...@@ -626,18 +645,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) ...@@ -626,18 +645,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
perf_ibs->offset_max, perf_ibs->offset_max,
offset + 1); offset + 1);
} while (offset < offset_max); } while (offset < offset_max);
/*
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
* depending on their availability.
* Can't add to offset_max as they are staggered
*/
if (event->attr.sample_type & PERF_SAMPLE_RAW) { if (event->attr.sample_type & PERF_SAMPLE_RAW) {
/* if (perf_ibs == &perf_ibs_op) {
* Read IbsBrTarget and IbsOpData4 separately if (ibs_caps & IBS_CAPS_BRNTRGT) {
* depending on their availability. rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
* Can't add to offset_max as they are staggered size++;
*/ }
if (ibs_caps & IBS_CAPS_BRNTRGT) { if (ibs_caps & IBS_CAPS_OPDATA4) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
size++; size++;
}
} }
if (ibs_caps & IBS_CAPS_OPDATA4) { if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
size++; size++;
} }
} }
...@@ -666,13 +691,17 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) ...@@ -666,13 +691,17 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (throttle) { if (throttle) {
perf_ibs_stop(event, 0); perf_ibs_stop(event, 0);
} else { } else {
period >>= 4; if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
if ((ibs_caps & IBS_CAPS_RDWROPCNT) && new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
(*config & IBS_OP_CNT_CTL)) period &= ~IBS_OP_MAX_CNT_EXT_MASK;
period |= *config & IBS_OP_CUR_CNT_RAND; }
if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
new_config |= *config & IBS_OP_CUR_CNT_RAND;
}
new_config |= period >> 4;
perf_ibs_enable_event(perf_ibs, hwc, period); perf_ibs_enable_event(perf_ibs, hwc, new_config);
} }
perf_event_update_userpage(event); perf_event_update_userpage(event);
...@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void) ...@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
{ {
struct attribute **attr = ibs_op_format_attrs; struct attribute **attr = ibs_op_format_attrs;
/*
* Some chips fail to reset the fetch count when it is written; instead
* they need a 0-1 transition of IbsFetchEn.
*/
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
perf_ibs_fetch.fetch_count_reset_broken = 1;
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) { if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr; *attr++ = &format_attr_cnt_ctl.attr;
} }
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
}
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
......
...@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void) ...@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
while (amd_iommu_v2_event_descs[i].attr.attr.name) while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++; i++;
attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL); attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
if (!attrs) if (!attrs)
return -ENOMEM; return -ENOMEM;
......
...@@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags) ...@@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags)
} }
/* /*
* Convert logical CPU number to L3 PMC Config ThreadMask format * Return a full thread and slice mask unless user
* has provided them
*/ */
static u64 l3_thread_slice_mask(int cpu) static u64 l3_thread_slice_mask(u64 config)
{ {
u64 thread_mask, core = topology_core_id(cpu); if (boot_cpu_data.x86 <= 0x18)
unsigned int shift, thread = 0; return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
if (topology_smt_supported() && !topology_is_primary_thread(cpu)) /*
thread = 1; * If the user doesn't specify a threadmask, they're not trying to
* count core 0, so we enable all cores & threads.
if (boot_cpu_data.x86 <= 0x18) { * We'll also assume that they want to count slice 0 if they specify
shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread; * a threadmask and leave sliceid and enallslices unpopulated.
thread_mask = BIT_ULL(shift); */
if (!(config & AMD64_L3_F19H_THREAD_MASK))
return AMD64_L3_SLICE_MASK | thread_mask; return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
} AMD64_L3_EN_ALL_CORES;
core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
shift = AMD64_L3_THREAD_SHIFT + thread;
thread_mask = BIT_ULL(shift);
return AMD64_L3_EN_ALL_SLICES | core | thread_mask; return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
AMD64_L3_COREID_MASK);
} }
static int amd_uncore_event_init(struct perf_event *event) static int amd_uncore_event_init(struct perf_event *event)
...@@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event) ...@@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* For other events, the two fields do not affect the count. * For other events, the two fields do not affect the count.
*/ */
if (l3_mask && is_llc_event(event)) if (l3_mask && is_llc_event(event))
hwc->config |= l3_thread_slice_mask(event->cpu); hwc->config |= l3_thread_slice_mask(event->attr.config);
uncore = event_to_amd_uncore(event); uncore = event_to_amd_uncore(event);
if (!uncore) if (!uncore)
...@@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = { ...@@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = {
.attrs = amd_uncore_attrs, .attrs = amd_uncore_attrs,
}; };
/* #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
* Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
* on family struct kobj_attribute *attr, \
*/ char *page) \
#define AMD_FORMAT_ATTR(_dev, _name, _format) \ { \
static ssize_t \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
_dev##_show##_name(struct device *dev, \ return sprintf(page, _format "\n"); \
struct device_attribute *attr, \ } \
char *page) \ static struct kobj_attribute format_attr_##_var = \
{ \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \ DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
} \ DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev); DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
/* Used for each uncore counter type */ DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
#define AMD_ATTRIBUTE(_name) \ DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
static struct attribute *amd_uncore_format_attr_##_name[] = { \ DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
&format_attr_event_##_name.attr, \ DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
&format_attr_umask.attr, \ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
NULL, \ DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
}; \ DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
static struct attribute_group amd_uncore_format_group_##_name = { \
.name = "format", \ static struct attribute *amd_uncore_df_format_attr[] = {
.attrs = amd_uncore_format_attr_##_name, \ &format_attr_event12.attr, /* event14 if F17h+ */
}; \ &format_attr_umask.attr,
static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \ NULL,
&amd_uncore_attr_group, \ };
&amd_uncore_format_group_##_name, \
NULL, \ static struct attribute *amd_uncore_l3_format_attr[] = {
&format_attr_event12.attr, /* event8 if F17h+ */
&format_attr_umask.attr,
NULL, /* slicemask if F17h, coreid if F19h */
NULL, /* threadmask8 if F17h, enallslices if F19h */
NULL, /* enallcores if F19h */
NULL, /* sliceid if F19h */
NULL, /* threadmask2 if F19h */
NULL,
};
static struct attribute_group amd_uncore_df_format_group = {
.name = "format",
.attrs = amd_uncore_df_format_attr,
}; };
AMD_FORMAT_ATTR(event, , "config:0-7,32-35"); static struct attribute_group amd_uncore_l3_format_group = {
AMD_FORMAT_ATTR(umask, , "config:8-15"); .name = "format",
AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60"); .attrs = amd_uncore_l3_format_attr,
AMD_FORMAT_ATTR(event, _l3, "config:0-7"); };
AMD_ATTRIBUTE(df);
AMD_ATTRIBUTE(l3); static const struct attribute_group *amd_uncore_df_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_df_format_group,
NULL,
};
static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_l3_format_group,
NULL,
};
static struct pmu amd_nb_pmu = { static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context, .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_df_attr_groups,
.name = "amd_nb",
.event_init = amd_uncore_event_init, .event_init = amd_uncore_event_init,
.add = amd_uncore_add, .add = amd_uncore_add,
.del = amd_uncore_del, .del = amd_uncore_del,
...@@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = { ...@@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = { static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context, .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_l3_attr_groups,
.name = "amd_l2",
.event_init = amd_uncore_event_init, .event_init = amd_uncore_event_init,
.add = amd_uncore_add, .add = amd_uncore_add,
.del = amd_uncore_del, .del = amd_uncore_del,
...@@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu) ...@@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
static int __init amd_uncore_init(void) static int __init amd_uncore_init(void)
{ {
struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
int ret = -ENODEV; int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
...@@ -538,6 +567,8 @@ static int __init amd_uncore_init(void) ...@@ -538,6 +567,8 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV; return -ENODEV;
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) { if (boot_cpu_data.x86 >= 0x17) {
/* /*
* For F17h and above, the Northbridge counters are * For F17h and above, the Northbridge counters are
...@@ -545,27 +576,16 @@ static int __init amd_uncore_init(void) ...@@ -545,27 +576,16 @@ static int __init amd_uncore_init(void)
* counters are supported too. The PMUs are exported * counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF. * based on family as either L2 or L3 and NB or DF.
*/ */
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3; num_counters_llc = NUM_COUNTERS_L3;
amd_nb_pmu.name = "amd_df"; amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3"; amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
l3_mask = true; l3_mask = true;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
amd_nb_pmu.name = "amd_nb";
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
l3_mask = false;
} }
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
if (boot_cpu_data.x86 >= 0x17)
*df_attr = &format_attr_event14.attr;
amd_uncore_nb = alloc_percpu(struct amd_uncore *); amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) { if (!amd_uncore_nb) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -575,13 +595,29 @@ static int __init amd_uncore_init(void) ...@@ -575,13 +595,29 @@ static int __init amd_uncore_init(void)
if (ret) if (ret)
goto fail_nb; goto fail_nb;
pr_info("%s NB counters detected\n", pr_info("%d %s %s counters detected\n", num_counters_nb,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
"HYGON" : "AMD"); amd_nb_pmu.name);
ret = 0; ret = 0;
} }
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
*l3_attr++ = &format_attr_coreid.attr;
*l3_attr++ = &format_attr_enallslices.attr;
*l3_attr++ = &format_attr_enallcores.attr;
*l3_attr++ = &format_attr_sliceid.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
*l3_attr++ = &format_attr_slicemask.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}
amd_uncore_llc = alloc_percpu(struct amd_uncore *); amd_uncore_llc = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_llc) { if (!amd_uncore_llc) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -591,9 +627,9 @@ static int __init amd_uncore_init(void) ...@@ -591,9 +627,9 @@ static int __init amd_uncore_init(void)
if (ret) if (ret)
goto fail_llc; goto fail_llc;
pr_info("%s LLC counters detected\n", pr_info("%d %s %s counters detected\n", num_counters_llc,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
"HYGON" : "AMD"); amd_llc_pmu.name);
ret = 0; ret = 0;
} }
......
...@@ -105,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event) ...@@ -105,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base)) if (unlikely(!hwc->event_base))
return 0; return 0;
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
return x86_pmu.update_topdown_event(event);
/* /*
* Careful: an NMI might modify the previous event value. * Careful: an NMI might modify the previous event value.
* *
...@@ -1056,6 +1059,45 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ...@@ -1056,6 +1059,45 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
return unsched ? -EINVAL : 0; return unsched ? -EINVAL : 0;
} }
static int add_nr_metric_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
if (is_metric_event(event)) {
if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
return -EINVAL;
cpuc->n_metric++;
cpuc->n_txn_metric++;
}
return 0;
}
static void del_nr_metric_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
if (is_metric_event(event))
cpuc->n_metric--;
}
static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
int max_count, int n)
{
if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
return -EINVAL;
if (n >= max_count + cpuc->n_metric)
return -EINVAL;
cpuc->event_list[n] = event;
if (is_counter_pair(&event->hw)) {
cpuc->n_pair++;
cpuc->n_txn_pair++;
}
return 0;
}
/* /*
* dogrp: true if must collect siblings events (group) * dogrp: true if must collect siblings events (group)
* returns total number of events and error code * returns total number of events and error code
...@@ -1092,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, ...@@ -1092,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
} }
if (is_x86_event(leader)) { if (is_x86_event(leader)) {
if (n >= max_count) if (collect_event(cpuc, leader, max_count, n))
return -EINVAL; return -EINVAL;
cpuc->event_list[n] = leader;
n++; n++;
if (is_counter_pair(&leader->hw))
cpuc->n_pair++;
} }
if (!dogrp) if (!dogrp)
return n; return n;
for_each_sibling_event(event, leader) { for_each_sibling_event(event, leader) {
if (!is_x86_event(event) || if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
event->state <= PERF_EVENT_STATE_OFF)
continue; continue;
if (n >= max_count) if (collect_event(cpuc, event, max_count, n))
return -EINVAL; return -EINVAL;
cpuc->event_list[n] = event;
n++; n++;
if (is_counter_pair(&event->hw))
cpuc->n_pair++;
} }
return n; return n;
} }
...@@ -1135,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event, ...@@ -1135,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0; hwc->event_base = 0;
break; break;
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
/* All the metric events are mapped onto the fixed counter 3. */
idx = INTEL_PMC_IDX_FIXED_SLOTS;
/* fall through */
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1: case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
(idx - INTEL_PMC_IDX_FIXED); (idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30; hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
INTEL_PMC_FIXED_RDPMC_BASE;
break; break;
default: default:
...@@ -1270,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event) ...@@ -1270,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base)) if (unlikely(!hwc->event_base))
return 0; return 0;
if (unlikely(is_topdown_count(event)) &&
x86_pmu.set_topdown_event_period)
return x86_pmu.set_topdown_event_period(event);
/* /*
* If we are way outside a reasonable range then just skip forward: * If we are way outside a reasonable range then just skip forward:
*/ */
...@@ -1309,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event) ...@@ -1309,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/* /*
* Clear the Merge event counter's upper 16 bits since * Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width * we currently declare a 48-bit counter width
*/ */
if (is_counter_pair(hwc)) if (is_counter_pair(hwc))
wrmsrl(x86_pmu_event_addr(idx + 1), 0); wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
/* /*
* Due to erratum on certan cpu we need * Due to erratum on certan cpu we need
...@@ -1551,6 +1596,8 @@ static void x86_pmu_del(struct perf_event *event, int flags) ...@@ -1551,6 +1596,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
} }
cpuc->event_constraint[i-1] = NULL; cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events; --cpuc->n_events;
if (x86_pmu.intel_cap.perf_metrics)
del_nr_metric_event(cpuc, event);
perf_event_update_userpage(event); perf_event_update_userpage(event);
...@@ -2018,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) ...@@ -2018,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
perf_pmu_disable(pmu); perf_pmu_disable(pmu);
__this_cpu_write(cpu_hw_events.n_txn, 0); __this_cpu_write(cpu_hw_events.n_txn, 0);
__this_cpu_write(cpu_hw_events.n_txn_pair, 0);
__this_cpu_write(cpu_hw_events.n_txn_metric, 0);
} }
/* /*
...@@ -2043,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) ...@@ -2043,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
*/ */
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
__this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
perf_pmu_enable(pmu); perf_pmu_enable(pmu);
} }
...@@ -2264,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m ...@@ -2264,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
static int x86_pmu_event_idx(struct perf_event *event) static int x86_pmu_event_idx(struct perf_event *event)
{ {
int idx = event->hw.idx; struct hw_perf_event *hwc = &event->hw;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0; return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { if (is_metric_idx(hwc->idx))
idx -= INTEL_PMC_IDX_FIXED; return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
idx |= 1 << 30; else
} return hwc->event_base_rdpmc + 1;
return idx + 1;
} }
static ssize_t get_attr_rdpmc(struct device *cdev, static ssize_t get_attr_rdpmc(struct device *cdev,
......
...@@ -243,10 +243,14 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { ...@@ -243,10 +243,14 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
static struct event_constraint intel_icl_event_constraints[] = { static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
...@@ -309,6 +313,12 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, ...@@ -309,6 +313,12 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2"); "4", "2");
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
static struct attribute *snb_events_attrs[] = { static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued), EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired), EVENT_PTR(td_slots_retired),
...@@ -2165,11 +2175,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx) ...@@ -2165,11 +2175,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event) static void intel_pmu_disable_fixed(struct perf_event *event)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask; u64 ctrl_val, mask;
int idx = hwc->idx;
mask = 0xfULL << (idx * 4); if (is_topdown_idx(idx)) {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* When there are other active TopDown events,
* don't disable the fixed counter 3.
*/
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
return;
idx = INTEL_PMC_IDX_FIXED_SLOTS;
}
intel_clear_masks(event, idx);
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
rdmsrl(hwc->config_base, ctrl_val); rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask; ctrl_val &= ~mask;
wrmsrl(hwc->config_base, ctrl_val); wrmsrl(hwc->config_base, ctrl_val);
...@@ -2180,17 +2203,28 @@ static void intel_pmu_disable_event(struct perf_event *event) ...@@ -2180,17 +2203,28 @@ static void intel_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx; int idx = hwc->idx;
if (idx < INTEL_PMC_IDX_FIXED) { switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
intel_clear_masks(event, idx); intel_clear_masks(event, idx);
x86_pmu_disable_event(event); x86_pmu_disable_event(event);
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) { break;
intel_clear_masks(event, idx); case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_disable_fixed(event); intel_pmu_disable_fixed(event);
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) { break;
case INTEL_PMC_IDX_FIXED_BTS:
intel_pmu_disable_bts(); intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer(); intel_pmu_drain_bts_buffer();
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR) return;
case INTEL_PMC_IDX_FIXED_VLBR:
intel_clear_masks(event, idx); intel_clear_masks(event, idx);
break;
default:
intel_clear_masks(event, idx);
pr_warn("Failed to disable the event with invalid index %d\n",
idx);
return;
}
/* /*
* Needs to be called after x86_pmu_disable_event, * Needs to be called after x86_pmu_disable_event,
...@@ -2208,10 +2242,189 @@ static void intel_pmu_del_event(struct perf_event *event) ...@@ -2208,10 +2242,189 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_pebs_del(event); intel_pmu_pebs_del(event);
} }
static int icl_set_topdown_event_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
s64 left = local64_read(&hwc->period_left);
/*
* The values in PERF_METRICS MSR are derived from fixed counter 3.
* Software should start both registers, PERF_METRICS and fixed
* counter 3, from zero.
* Clear PERF_METRICS and Fixed counter 3 in initialization.
* After that, both MSRs will be cleared for each read.
* Don't need to clear them again.
*/
if (left == x86_pmu.max_period) {
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
wrmsrl(MSR_PERF_METRICS, 0);
hwc->saved_slots = 0;
hwc->saved_metric = 0;
}
if ((hwc->saved_slots) && is_slots_event(event)) {
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
}
perf_event_update_userpage(event);
return 0;
}
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
{
u32 val;
/*
* The metric is reported as an 8bit integer fraction
* suming up to 0xff.
* slots-in-metric = (Metric / 0xff) * slots
*/
val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
return mul_u64_u32_div(slots, val, 0xff);
}
static u64 icl_get_topdown_value(struct perf_event *event,
u64 slots, u64 metrics)
{
int idx = event->hw.idx;
u64 delta;
if (is_metric_idx(idx))
delta = icl_get_metrics_event_value(metrics, slots, idx);
else
delta = slots;
return delta;
}
static void __icl_update_topdown_event(struct perf_event *event,
u64 slots, u64 metrics,
u64 last_slots, u64 last_metrics)
{
u64 delta, last = 0;
delta = icl_get_topdown_value(event, slots, metrics);
if (last_slots)
last = icl_get_topdown_value(event, last_slots, last_metrics);
/*
* The 8bit integer fraction of metric may be not accurate,
* especially when the changes is very small.
* For example, if only a few bad_spec happens, the fraction
* may be reduced from 1 to 0. If so, the bad_spec event value
* will be 0 which is definitely less than the last value.
* Avoid update event->count for this case.
*/
if (delta > last) {
delta -= last;
local64_add(delta, &event->count);
}
}
static void update_saved_topdown_regs(struct perf_event *event,
u64 slots, u64 metrics)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
int idx;
event->hw.saved_slots = slots;
event->hw.saved_metric = metrics;
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
other->hw.saved_slots = slots;
other->hw.saved_metric = metrics;
}
}
/*
* Update all active Topdown events.
*
* The PERF_METRICS and Fixed counter 3 are read separately. The values may be
* modify by a NMI. PMU has to be disabled before calling this function.
*/
static u64 icl_update_topdown_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
u64 slots, metrics;
bool reset = true;
int idx;
/* read Fixed counter 3 */
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
if (!slots)
return 0;
/* read PERF_METRICS */
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
__icl_update_topdown_event(other, slots, metrics,
event ? event->hw.saved_slots : 0,
event ? event->hw.saved_metric : 0);
}
/*
* Check and update this event, which may have been cleared
* in active_mask e.g. x86_pmu_stop()
*/
if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
__icl_update_topdown_event(event, slots, metrics,
event->hw.saved_slots,
event->hw.saved_metric);
/*
* In x86_pmu_stop(), the event is cleared in active_mask first,
* then drain the delta, which indicates context switch for
* counting.
* Save metric and slots for context switch.
* Don't need to reset the PERF_METRICS and Fixed counter 3.
* Because the values will be restored in next schedule in.
*/
update_saved_topdown_regs(event, slots, metrics);
reset = false;
}
if (reset) {
/* The fixed counter 3 has to be written before the PERF_METRICS. */
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
wrmsrl(MSR_PERF_METRICS, 0);
if (event)
update_saved_topdown_regs(event, 0, 0);
}
return slots;
}
static void intel_pmu_read_topdown_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/* Only need to call update_topdown_event() once for group read. */
if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
!is_slots_event(event))
return;
perf_pmu_disable(event->pmu);
x86_pmu.update_topdown_event(event);
perf_pmu_enable(event->pmu);
}
static void intel_pmu_read_event(struct perf_event *event) static void intel_pmu_read_event(struct perf_event *event)
{ {
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event); intel_pmu_auto_reload_read(event);
else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
intel_pmu_read_topdown_event(event);
else else
x86_perf_event_update(event); x86_perf_event_update(event);
} }
...@@ -2219,8 +2432,22 @@ static void intel_pmu_read_event(struct perf_event *event) ...@@ -2219,8 +2432,22 @@ static void intel_pmu_read_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct perf_event *event) static void intel_pmu_enable_fixed(struct perf_event *event)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask, bits = 0; u64 ctrl_val, mask, bits = 0;
int idx = hwc->idx;
if (is_topdown_idx(idx)) {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* When there are other active TopDown events,
* don't enable the fixed counter 3 again.
*/
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
return;
idx = INTEL_PMC_IDX_FIXED_SLOTS;
}
intel_set_masks(event, idx);
/* /*
* Enable IRQ generation (0x8), if not PEBS, * Enable IRQ generation (0x8), if not PEBS,
...@@ -2240,6 +2467,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event) ...@@ -2240,6 +2467,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
bits |= 0x4; bits |= 0x4;
idx -= INTEL_PMC_IDX_FIXED;
bits <<= (idx * 4); bits <<= (idx * 4);
mask = 0xfULL << (idx * 4); mask = 0xfULL << (idx * 4);
...@@ -2262,18 +2490,27 @@ static void intel_pmu_enable_event(struct perf_event *event) ...@@ -2262,18 +2490,27 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (unlikely(event->attr.precise_ip)) if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_enable(event); intel_pmu_pebs_enable(event);
if (idx < INTEL_PMC_IDX_FIXED) { switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
intel_set_masks(event, idx); intel_set_masks(event, idx);
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) { break;
intel_set_masks(event, idx); case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_enable_fixed(event); intel_pmu_enable_fixed(event);
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) { break;
case INTEL_PMC_IDX_FIXED_BTS:
if (!__this_cpu_read(cpu_hw_events.enabled)) if (!__this_cpu_read(cpu_hw_events.enabled))
return; return;
intel_pmu_enable_bts(hwc->config); intel_pmu_enable_bts(hwc->config);
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR) break;
case INTEL_PMC_IDX_FIXED_VLBR:
intel_set_masks(event, idx); intel_set_masks(event, idx);
break;
default:
pr_warn("Failed to enable the event with invalid index %d\n",
idx);
}
} }
static void intel_pmu_add_event(struct perf_event *event) static void intel_pmu_add_event(struct perf_event *event)
...@@ -2389,7 +2626,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -2389,7 +2626,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
/* /*
* PEBS overflow sets bit 62 in the global status register * PEBS overflow sets bit 62 in the global status register
*/ */
if (__test_and_clear_bit(62, (unsigned long *)&status)) { if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) {
u64 pebs_enabled = cpuc->pebs_enabled; u64 pebs_enabled = cpuc->pebs_enabled;
handled++; handled++;
...@@ -2410,7 +2647,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -2410,7 +2647,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
/* /*
* Intel PT * Intel PT
*/ */
if (__test_and_clear_bit(55, (unsigned long *)&status)) { if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
handled++; handled++;
if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
perf_guest_cbs->handle_intel_pt_intr)) perf_guest_cbs->handle_intel_pt_intr))
...@@ -2419,6 +2656,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -2419,6 +2656,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
intel_pt_interrupt(); intel_pt_interrupt();
} }
/*
* Intel Perf mertrics
*/
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++;
if (x86_pmu.update_topdown_event)
x86_pmu.update_topdown_event(NULL);
}
/* /*
* Checkpointed counters can lead to 'spurious' PMIs because the * Checkpointed counters can lead to 'spurious' PMIs because the
* rollback caused by the PMI will have cleared the overflow status * rollback caused by the PMI will have cleared the overflow status
...@@ -3355,6 +3601,56 @@ static int intel_pmu_hw_config(struct perf_event *event) ...@@ -3355,6 +3601,56 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (event->attr.type != PERF_TYPE_RAW) if (event->attr.type != PERF_TYPE_RAW)
return 0; return 0;
/*
* Config Topdown slots and metric events
*
* The slots event on Fixed Counter 3 can support sampling,
* which will be handled normally in x86_perf_event_update().
*
* Metric events don't support sampling and require being paired
* with a slots event as group leader. When the slots event
* is used in a metrics group, it too cannot support sampling.
*/
if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
if (event->attr.config1 || event->attr.config2)
return -EINVAL;
/*
* The TopDown metrics events and slots event don't
* support any filters.
*/
if (event->attr.config & X86_ALL_EVENT_FLAGS)
return -EINVAL;
if (is_metric_event(event)) {
struct perf_event *leader = event->group_leader;
/* The metric events don't support sampling. */
if (is_sampling_event(event))
return -EINVAL;
/* The metric events require a slots group leader. */
if (!is_slots_event(leader))
return -EINVAL;
/*
* The leader/SLOTS must not be a sampling event for
* metric use; hardware requires it starts at 0 when used
* in conjunction with MSR_PERF_METRICS.
*/
if (is_sampling_event(leader))
return -EINVAL;
event->event_caps |= PERF_EV_CAP_SIBLING;
/*
* Only once we have a METRICs sibling do we
* need TopDown magic.
*/
leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
}
}
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0; return 0;
...@@ -3787,6 +4083,17 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -3787,6 +4083,17 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.counter_freezing) if (x86_pmu.counter_freezing)
enable_counter_freeze(); enable_counter_freeze();
/* Disable perf metrics if any added CPU doesn't support it. */
if (x86_pmu.intel_cap.perf_metrics) {
union perf_capabilities perf_cap;
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
if (!perf_cap.perf_metrics) {
x86_pmu.intel_cap.perf_metrics = 0;
x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
}
}
if (!cpuc->shared_regs) if (!cpuc->shared_regs)
return; return;
...@@ -4355,6 +4662,15 @@ static struct attribute *icl_events_attrs[] = { ...@@ -4355,6 +4662,15 @@ static struct attribute *icl_events_attrs[] = {
NULL, NULL,
}; };
static struct attribute *icl_td_events_attrs[] = {
EVENT_PTR(slots),
EVENT_PTR(td_retiring),
EVENT_PTR(td_bad_spec),
EVENT_PTR(td_fe_bound),
EVENT_PTR(td_be_bound),
NULL,
};
static struct attribute *icl_tsx_events_attrs[] = { static struct attribute *icl_tsx_events_attrs[] = {
EVENT_PTR(tx_start), EVENT_PTR(tx_start),
EVENT_PTR(tx_abort), EVENT_PTR(tx_abort),
...@@ -4830,6 +5146,7 @@ __init int intel_pmu_init(void) ...@@ -4830,6 +5146,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ATOM_TREMONT_D: case INTEL_FAM6_ATOM_TREMONT_D:
case INTEL_FAM6_ATOM_TREMONT: case INTEL_FAM6_ATOM_TREMONT:
case INTEL_FAM6_ATOM_TREMONT_L:
x86_pmu.late_ack = true; x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids)); sizeof(hw_cache_event_ids));
...@@ -5139,10 +5456,13 @@ __init int intel_pmu_init(void) ...@@ -5139,10 +5456,13 @@ __init int intel_pmu_init(void)
hsw_format_attr : nhm_format_attr; hsw_format_attr : nhm_format_attr;
extra_skl_attr = skl_format_attr; extra_skl_attr = skl_format_attr;
mem_attr = icl_events_attrs; mem_attr = icl_events_attrs;
td_attr = icl_td_events_attrs;
tsx_attr = icl_tsx_events_attrs; tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true; x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem); intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.update_topdown_event = icl_update_topdown_event;
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
pr_cont("Icelake events, "); pr_cont("Icelake events, ");
name = "icelake"; name = "icelake";
break; break;
...@@ -5198,6 +5518,15 @@ __init int intel_pmu_init(void) ...@@ -5198,6 +5518,15 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters * counter, so do not extend mask to generic counters
*/ */
for_each_event_constraint(c, x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) {
/*
* Don't extend the topdown slots and metrics
* events to the generic counters.
*/
if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
c->weight = hweight64(c->idxmsk64);
continue;
}
if (c->cmask == FIXED_EVENT_FLAGS if (c->cmask == FIXED_EVENT_FLAGS
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
...@@ -5253,6 +5582,9 @@ __init int intel_pmu_init(void) ...@@ -5253,6 +5582,9 @@ __init int intel_pmu_init(void)
if (x86_pmu.counter_freezing) if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4; x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
if (x86_pmu.intel_cap.perf_metrics)
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
return 0; return 0;
} }
......
...@@ -670,9 +670,7 @@ int intel_pmu_drain_bts_buffer(void) ...@@ -670,9 +670,7 @@ int intel_pmu_drain_bts_buffer(void)
static inline void intel_pmu_drain_pebs_buffer(void) static inline void intel_pmu_drain_pebs_buffer(void)
{ {
struct pt_regs regs; x86_pmu.drain_pebs(NULL);
x86_pmu.drain_pebs(&regs);
} }
/* /*
...@@ -1737,6 +1735,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -1737,6 +1735,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct x86_perf_regs perf_regs; struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs; struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit); void *at = get_next_pebs_record_by_bit(base, top, bit);
struct pt_regs dummy_iregs;
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
/* /*
...@@ -1749,6 +1748,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -1749,6 +1748,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
} else if (!intel_pmu_save_and_restart(event)) } else if (!intel_pmu_save_and_restart(event))
return; return;
if (!iregs)
iregs = &dummy_iregs;
while (count > 1) { while (count > 1) {
setup_sample(event, iregs, at, &data, regs); setup_sample(event, iregs, at, &data, regs);
perf_event_output(event, &data, regs); perf_event_output(event, &data, regs);
...@@ -1758,16 +1760,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -1758,16 +1760,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
} }
setup_sample(event, iregs, at, &data, regs); setup_sample(event, iregs, at, &data, regs);
if (iregs == &dummy_iregs) {
/* /*
* All but the last records are processed. * The PEBS records may be drained in the non-overflow context,
* The last one is left to be able to call the overflow handler. * e.g., large PEBS + context switch. Perf should treat the
*/ * last record the same as other PEBS records, and doesn't
if (perf_event_overflow(event, &data, regs)) { * invoke the generic overflow handler.
x86_pmu_stop(event, 0); */
return; perf_event_output(event, &data, regs);
} else {
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
} }
} }
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
......
...@@ -12,6 +12,8 @@ struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; ...@@ -12,6 +12,8 @@ struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
static bool pcidrv_registered; static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver; struct pci_driver *uncore_pci_driver;
/* The PCI driver for the device which the uncore doesn't own. */
struct pci_driver *uncore_pci_sub_driver;
/* pci bus to socket mapping */ /* pci bus to socket mapping */
DEFINE_RAW_SPINLOCK(pci2phy_map_lock); DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
...@@ -989,65 +991,71 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) ...@@ -989,65 +991,71 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
} }
/* /*
* add a pci uncore device * Get the die information of a PCI device.
* @pdev: The PCI device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/ */
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
int *phys_id, int *die)
{ {
struct intel_uncore_type *type; *phys_id = uncore_pcibus_to_physid(pdev->bus);
struct intel_uncore_pmu *pmu = NULL; if (*phys_id < 0)
struct intel_uncore_box *box;
int phys_id, die, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV; return -ENODEV;
die = (topology_max_die_per_package() > 1) ? phys_id : *die = (topology_max_die_per_package() > 1) ? *phys_id :
topology_phys_to_logical_pkg(phys_id); topology_phys_to_logical_pkg(*phys_id);
if (die < 0) if (*die < 0)
return -EINVAL; return -EINVAL;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { return 0;
int idx = UNCORE_PCI_DEV_IDX(id->driver_data); }
uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
/* /*
* Some platforms, e.g. Knights Landing, use a common PCI device ID * Find the PMU of a PCI device.
* for multiple instances of an uncore PMU device type. We should check * @pdev: The PCI device.
* PCI slot and func to indicate the uncore box. * @ids: The ID table of the available PCI devices with a PMU.
*/ */
if (id->driver_data & ~0xffff) { static struct intel_uncore_pmu *
struct pci_driver *pci_drv = pdev->driver; uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
const struct pci_device_id *ids = pci_drv->id_table; {
unsigned int devfn; struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_type *type;
while (ids && ids->vendor) { kernel_ulong_t data;
if ((ids->vendor == pdev->vendor) && unsigned int devfn;
(ids->device == pdev->device)) {
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), while (ids && ids->vendor) {
UNCORE_PCI_DEV_FUNC(ids->driver_data)); if ((ids->vendor == pdev->vendor) &&
if (devfn == pdev->devfn) { (ids->device == pdev->device)) {
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; data = ids->driver_data;
break; devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
} UNCORE_PCI_DEV_FUNC(data));
if (devfn == pdev->devfn) {
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
break;
} }
ids++;
} }
if (pmu == NULL) ids++;
return -ENODEV;
} else {
/*
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
*/
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
} }
return pmu;
}
/*
* Register the PMU for a PCI device
* @pdev: The PCI device.
* @type: The corresponding PMU type of the device.
* @pmu: The corresponding PMU of the device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/
static int uncore_pci_pmu_register(struct pci_dev *pdev,
struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu,
int phys_id, int die)
{
struct intel_uncore_box *box;
int ret;
if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL; return -EINVAL;
...@@ -1067,7 +1075,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1067,7 +1075,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
box->pci_dev = pdev; box->pci_dev = pdev;
box->pmu = pmu; box->pmu = pmu;
uncore_box_init(box); uncore_box_init(box);
pci_set_drvdata(pdev, box);
pmu->boxes[die] = box; pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1) if (atomic_inc_return(&pmu->activeboxes) > 1)
...@@ -1076,7 +1083,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1076,7 +1083,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
/* First active box registers the pmu */ /* First active box registers the pmu */
ret = uncore_pmu_register(pmu); ret = uncore_pmu_register(pmu);
if (ret) { if (ret) {
pci_set_drvdata(pdev, NULL);
pmu->boxes[die] = NULL; pmu->boxes[die] = NULL;
uncore_box_exit(box); uncore_box_exit(box);
kfree(box); kfree(box);
...@@ -1084,18 +1090,87 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1084,18 +1090,87 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
return ret; return ret;
} }
/*
* add a pci uncore device
*/
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
int phys_id, die, ret;
ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
if (ret)
return ret;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
/*
* Some platforms, e.g. Knights Landing, use a common PCI device ID
* for multiple instances of an uncore PMU device type. We should check
* PCI slot and func to indicate the uncore box.
*/
if (id->driver_data & ~0xffff) {
struct pci_driver *pci_drv = pdev->driver;
pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
if (pmu == NULL)
return -ENODEV;
} else {
/*
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
*/
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
pci_set_drvdata(pdev, pmu->boxes[die]);
return ret;
}
/*
* Unregister the PMU of a PCI device
* @pmu: The corresponding PMU is unregistered.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
int phys_id, int die)
{
struct intel_uncore_box *box = pmu->boxes[die];
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pmu->boxes[die] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
kfree(box);
}
static void uncore_pci_remove(struct pci_dev *pdev) static void uncore_pci_remove(struct pci_dev *pdev)
{ {
struct intel_uncore_box *box; struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu; struct intel_uncore_pmu *pmu;
int i, phys_id, die; int i, phys_id, die;
phys_id = uncore_pcibus_to_physid(pdev->bus); if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
return;
box = pci_get_drvdata(pdev); box = pci_get_drvdata(pdev);
if (!box) { if (!box) {
die = (topology_max_die_per_package() > 1) ? phys_id :
topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[die].dev[i] == pdev) { if (uncore_extra_pci_dev[die].dev[i] == pdev) {
uncore_extra_pci_dev[die].dev[i] = NULL; uncore_extra_pci_dev[die].dev[i] = NULL;
...@@ -1107,15 +1182,84 @@ static void uncore_pci_remove(struct pci_dev *pdev) ...@@ -1107,15 +1182,84 @@ static void uncore_pci_remove(struct pci_dev *pdev)
} }
pmu = box->pmu; pmu = box->pmu;
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
pmu->boxes[box->dieid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pci_pmu_unregister(pmu, phys_id, die);
uncore_pmu_unregister(pmu); }
uncore_box_exit(box);
kfree(box); static int uncore_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev);
struct intel_uncore_pmu *pmu;
int phys_id, die;
/* Unregister the PMU when the device is going to be deleted. */
if (action != BUS_NOTIFY_DEL_DEVICE)
return NOTIFY_DONE;
pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
if (!pmu)
return NOTIFY_DONE;
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
return NOTIFY_DONE;
uncore_pci_pmu_unregister(pmu, phys_id, die);
return NOTIFY_OK;
}
static struct notifier_block uncore_notifier = {
.notifier_call = uncore_bus_notify,
};
static void uncore_pci_sub_driver_init(void)
{
const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct pci_dev *pci_sub_dev;
bool notify = false;
unsigned int devfn;
int phys_id, die;
while (ids && ids->vendor) {
pci_sub_dev = NULL;
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
/*
* Search the available device, and register the
* corresponding PMU.
*/
while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
ids->device, pci_sub_dev))) {
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
UNCORE_PCI_DEV_FUNC(ids->driver_data));
if (devfn != pci_sub_dev->devfn)
continue;
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
if (!pmu)
continue;
if (uncore_pci_get_dev_die_info(pci_sub_dev,
&phys_id, &die))
continue;
if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
phys_id, die))
notify = true;
}
ids++;
}
if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
notify = false;
if (!notify)
uncore_pci_sub_driver = NULL;
} }
static int __init uncore_pci_init(void) static int __init uncore_pci_init(void)
...@@ -1141,6 +1285,9 @@ static int __init uncore_pci_init(void) ...@@ -1141,6 +1285,9 @@ static int __init uncore_pci_init(void)
if (ret) if (ret)
goto errtype; goto errtype;
if (uncore_pci_sub_driver)
uncore_pci_sub_driver_init();
pcidrv_registered = true; pcidrv_registered = true;
return 0; return 0;
...@@ -1158,6 +1305,8 @@ static void uncore_pci_exit(void) ...@@ -1158,6 +1305,8 @@ static void uncore_pci_exit(void)
{ {
if (pcidrv_registered) { if (pcidrv_registered) {
pcidrv_registered = false; pcidrv_registered = false;
if (uncore_pci_sub_driver)
bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
pci_unregister_driver(uncore_pci_driver); pci_unregister_driver(uncore_pci_driver);
uncore_types_exit(uncore_pci_uncores); uncore_types_exit(uncore_pci_uncores);
kfree(uncore_extra_pci_dev); kfree(uncore_extra_pci_dev);
...@@ -1478,12 +1627,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { ...@@ -1478,12 +1627,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
}; };
static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
.cpu_init = icl_uncore_cpu_init, .cpu_init = tgl_uncore_cpu_init,
.mmio_init = tgl_uncore_mmio_init, .mmio_init = tgl_uncore_mmio_init,
}; };
static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
.cpu_init = icl_uncore_cpu_init, .cpu_init = tgl_uncore_cpu_init,
.mmio_init = tgl_l_uncore_mmio_init, .mmio_init = tgl_l_uncore_mmio_init,
}; };
......
...@@ -552,6 +552,7 @@ extern struct intel_uncore_type **uncore_msr_uncores; ...@@ -552,6 +552,7 @@ extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores; extern struct intel_uncore_type **uncore_pci_uncores;
extern struct intel_uncore_type **uncore_mmio_uncores; extern struct intel_uncore_type **uncore_mmio_uncores;
extern struct pci_driver *uncore_pci_driver; extern struct pci_driver *uncore_pci_driver;
extern struct pci_driver *uncore_pci_sub_driver;
extern raw_spinlock_t pci2phy_map_lock; extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head; extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct pci_extra_dev *uncore_extra_pci_dev;
...@@ -567,6 +568,7 @@ void snb_uncore_cpu_init(void); ...@@ -567,6 +568,7 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void); void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void); void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void); void icl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void); void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid); int snb_pci2phy_map_init(int devid);
......
...@@ -126,6 +126,10 @@ ...@@ -126,6 +126,10 @@
#define ICL_UNC_CBO_0_PER_CTR0 0x702 #define ICL_UNC_CBO_0_PER_CTR0 0x702
#define ICL_UNC_CBO_MSR_OFFSET 0x8 #define ICL_UNC_CBO_MSR_OFFSET 0x8
/* ICL ARB register */
#define ICL_UNC_ARB_PER_CTR 0x3b1
#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
...@@ -313,15 +317,21 @@ void skl_uncore_cpu_init(void) ...@@ -313,15 +317,21 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops; snb_uncore_arb.ops = &skl_uncore_msr_ops;
} }
static struct intel_uncore_ops icl_uncore_msr_ops = {
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
};
static struct intel_uncore_type icl_uncore_cbox = { static struct intel_uncore_type icl_uncore_cbox = {
.name = "cbox", .name = "cbox",
.num_counters = 4, .num_counters = 2,
.perf_ctr_bits = 44, .perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_CBO_0_PER_CTR0, .perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK, .event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = ICL_UNC_CBO_MSR_OFFSET, .msr_offset = ICL_UNC_CBO_MSR_OFFSET,
.ops = &skl_uncore_msr_ops, .ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group, .format_group = &snb_uncore_format_group,
}; };
...@@ -350,13 +360,25 @@ static struct intel_uncore_type icl_uncore_clockbox = { ...@@ -350,13 +360,25 @@ static struct intel_uncore_type icl_uncore_clockbox = {
.single_fixed = 1, .single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK, .event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group, .format_group = &icl_uncore_clock_format_group,
.ops = &skl_uncore_msr_ops, .ops = &icl_uncore_msr_ops,
.event_descs = icl_uncore_events, .event_descs = icl_uncore_events,
}; };
static struct intel_uncore_type icl_uncore_arb = {
.name = "arb",
.num_counters = 1,
.num_boxes = 1,
.perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_ARB_PER_CTR,
.event_ctl = ICL_UNC_ARB_PERFEVTSEL,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
static struct intel_uncore_type *icl_msr_uncores[] = { static struct intel_uncore_type *icl_msr_uncores[] = {
&icl_uncore_cbox, &icl_uncore_cbox,
&snb_uncore_arb, &icl_uncore_arb,
&icl_uncore_clockbox, &icl_uncore_clockbox,
NULL, NULL,
}; };
...@@ -374,6 +396,21 @@ void icl_uncore_cpu_init(void) ...@@ -374,6 +396,21 @@ void icl_uncore_cpu_init(void)
{ {
uncore_msr_uncores = icl_msr_uncores; uncore_msr_uncores = icl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num(); icl_uncore_cbox.num_boxes = icl_get_cbox_num();
}
static struct intel_uncore_type *tgl_msr_uncores[] = {
&icl_uncore_cbox,
&snb_uncore_arb,
&icl_uncore_clockbox,
NULL,
};
void tgl_uncore_cpu_init(void)
{
uncore_msr_uncores = tgl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
icl_uncore_cbox.ops = &skl_uncore_msr_ops;
icl_uncore_clockbox.ops = &skl_uncore_msr_ops;
snb_uncore_arb.ops = &skl_uncore_msr_ops; snb_uncore_arb.ops = &skl_uncore_msr_ops;
} }
......
...@@ -393,6 +393,11 @@ ...@@ -393,6 +393,11 @@
#define SNR_M2M_PCI_PMON_BOX_CTL 0x438 #define SNR_M2M_PCI_PMON_BOX_CTL 0x438
#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
/* SNR PCIE3 */
#define SNR_PCIE3_PCI_PMON_CTL0 0x508
#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0
/* SNR IMC */ /* SNR IMC */
#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
...@@ -3749,7 +3754,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type) ...@@ -3749,7 +3754,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
ret = skx_iio_get_topology(type); ret = skx_iio_get_topology(type);
if (ret) if (ret)
return ret; goto clear_attr_update;
ret = -ENOMEM;
/* One more for NULL. */ /* One more for NULL. */
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL); attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
...@@ -3781,8 +3788,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type) ...@@ -3781,8 +3788,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
kfree(eas); kfree(eas);
kfree(attrs); kfree(attrs);
kfree(type->topology); kfree(type->topology);
clear_attr_update:
type->attr_update = NULL; type->attr_update = NULL;
return -ENOMEM; return ret;
} }
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
...@@ -4551,12 +4559,46 @@ static struct intel_uncore_type snr_uncore_m2m = { ...@@ -4551,12 +4559,46 @@ static struct intel_uncore_type snr_uncore_m2m = {
.format_group = &snr_m2m_uncore_format_group, .format_group = &snr_m2m_uncore_format_group,
}; };
static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN));
pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
}
static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = {
.init_box = snr_m2m_uncore_pci_init_box,
.disable_box = snbep_uncore_pci_disable_box,
.enable_box = snbep_uncore_pci_enable_box,
.disable_event = snbep_uncore_pci_disable_event,
.enable_event = snr_uncore_pci_enable_event,
.read_counter = snbep_uncore_pci_read_counter,
};
static struct intel_uncore_type snr_uncore_pcie3 = {
.name = "pcie3",
.num_counters = 4,
.num_boxes = 1,
.perf_ctr_bits = 48,
.perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
.event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
.event_mask = SKX_IIO_PMON_RAW_EVENT_MASK,
.event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
.box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
.ops = &snr_pcie3_uncore_pci_ops,
.format_group = &skx_uncore_iio_format_group,
};
enum { enum {
SNR_PCI_UNCORE_M2M, SNR_PCI_UNCORE_M2M,
SNR_PCI_UNCORE_PCIE3,
}; };
static struct intel_uncore_type *snr_pci_uncores[] = { static struct intel_uncore_type *snr_pci_uncores[] = {
[SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
[SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
NULL, NULL,
}; };
...@@ -4573,6 +4615,19 @@ static struct pci_driver snr_uncore_pci_driver = { ...@@ -4573,6 +4615,19 @@ static struct pci_driver snr_uncore_pci_driver = {
.id_table = snr_uncore_pci_ids, .id_table = snr_uncore_pci_ids,
}; };
static const struct pci_device_id snr_uncore_pci_sub_ids[] = {
{ /* PCIe3 RP */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
.driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
},
{ /* end: all zeroes */ }
};
static struct pci_driver snr_uncore_pci_sub_driver = {
.name = "snr_uncore_sub",
.id_table = snr_uncore_pci_sub_ids,
};
int snr_uncore_pci_init(void) int snr_uncore_pci_init(void)
{ {
/* SNR UBOX DID */ /* SNR UBOX DID */
...@@ -4584,6 +4639,7 @@ int snr_uncore_pci_init(void) ...@@ -4584,6 +4639,7 @@ int snr_uncore_pci_init(void)
uncore_pci_uncores = snr_pci_uncores; uncore_pci_uncores = snr_pci_uncores;
uncore_pci_driver = &snr_uncore_pci_driver; uncore_pci_driver = &snr_uncore_pci_driver;
uncore_pci_sub_driver = &snr_uncore_pci_sub_driver;
return 0; return 0;
} }
...@@ -4751,10 +4807,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { ...@@ -4751,10 +4807,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
{ /* end: all zeroes */ }, { /* end: all zeroes */ },
}; };
...@@ -5212,17 +5268,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = { ...@@ -5212,17 +5268,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"), INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"), INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"),
{ /* end: all zeroes */ }, { /* end: all zeroes */ },
}; };
......
...@@ -78,6 +78,7 @@ static bool test_intel(int idx, void *data) ...@@ -78,6 +78,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_TREMONT_D: case INTEL_FAM6_ATOM_TREMONT_D:
case INTEL_FAM6_ATOM_TREMONT: case INTEL_FAM6_ATOM_TREMONT:
case INTEL_FAM6_ATOM_TREMONT_L:
case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM: case INTEL_FAM6_XEON_PHI_KNM:
......
...@@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) ...@@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ #define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */ #define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
static inline bool is_topdown_count(struct perf_event *event)
{
return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
}
static inline bool is_metric_event(struct perf_event *event)
{
u64 config = event->attr.config;
return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
}
static inline bool is_slots_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
}
static inline bool is_topdown_event(struct perf_event *event)
{
return is_metric_event(event) || is_slots_event(event);
}
struct amd_nb { struct amd_nb {
int nb_id; /* NorthBridge id */ int nb_id; /* NorthBridge id */
...@@ -210,6 +235,8 @@ struct cpu_hw_events { ...@@ -210,6 +235,8 @@ struct cpu_hw_events {
they've never been enabled yet */ they've never been enabled yet */
int n_txn; /* the # last events in the below arrays; int n_txn; /* the # last events in the below arrays;
added in the current transaction */ added in the current transaction */
int n_txn_pair;
int n_txn_metric;
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX]; u64 tags[X86_PMC_IDX_MAX];
...@@ -284,6 +311,12 @@ struct cpu_hw_events { ...@@ -284,6 +311,12 @@ struct cpu_hw_events {
*/ */
u64 tfa_shadow; u64 tfa_shadow;
/*
* Perf Metrics
*/
/* number of accepted metrics events */
int n_metric;
/* /*
* AMD specific bits * AMD specific bits
*/ */
...@@ -375,6 +408,19 @@ struct cpu_hw_events { ...@@ -375,6 +408,19 @@ struct cpu_hw_events {
#define FIXED_EVENT_CONSTRAINT(c, n) \ #define FIXED_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
* The special metric counters do not actually exist. They are calculated from
* the combination of the FxCtr3 + MSR_PERF_METRICS.
*
* The special metric counters are mapped to a dummy offset for the scheduler.
* The sharing between multiple users of the same metric without multiplexing
* is not allowed, even though the hardware supports that in principle.
*/
#define METRIC_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \
INTEL_ARCH_EVENT_MASK)
/* /*
* Constraint on the Event code + UMask * Constraint on the Event code + UMask
*/ */
...@@ -537,7 +583,7 @@ union perf_capabilities { ...@@ -537,7 +583,7 @@ union perf_capabilities {
*/ */
u64 full_width_write:1; u64 full_width_write:1;
u64 pebs_baseline:1; u64 pebs_baseline:1;
u64 pebs_metrics_available:1; u64 perf_metrics:1;
u64 pebs_output_pt_available:1; u64 pebs_output_pt_available:1;
}; };
u64 capabilities; u64 capabilities;
...@@ -726,6 +772,12 @@ struct x86_pmu { ...@@ -726,6 +772,12 @@ struct x86_pmu {
*/ */
atomic_t lbr_exclusive[x86_lbr_exclusive_max]; atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
* Intel perf metrics
*/
u64 (*update_topdown_event)(struct perf_event *event);
int (*set_topdown_event_period)(struct perf_event *event);
/* /*
* perf task context (i.e. struct perf_event_context::task_ctx_data) * perf task context (i.e. struct perf_event_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86. * switch helper to bridge calls from perf/core to perf/x86.
......
...@@ -815,6 +815,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { ...@@ -815,6 +815,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h), X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h),
{}, {},
}; };
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
......
...@@ -467,6 +467,7 @@ ...@@ -467,6 +467,7 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b #define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV 0xc0010131
...@@ -860,11 +861,14 @@ ...@@ -860,11 +861,14 @@
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
#define MSR_PERF_METRICS 0x00000329
/* PERF_GLOBAL_OVF_CTL bits */ /* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
......
...@@ -196,13 +196,29 @@ struct x86_pmu_capability { ...@@ -196,13 +196,29 @@ struct x86_pmu_capability {
* Fixed-purpose performance events: * Fixed-purpose performance events:
*/ */
/* RDPMC offset for Fixed PMCs */
#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
/* /*
* All 3 fixed-mode PMCs are configured via this single MSR: * All the fixed-mode PMCs are configured via this single MSR:
*/ */
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/* /*
* The counts are available in three separate MSRs: * There is no event-code assigned to the fixed-mode PMCs.
*
* For a fixed-mode PMC, which has an equivalent event on a general-purpose
* PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
* e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
*
* For a fixed-mode PMC, which doesn't have an equivalent event, a
* pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
* The pseudo event-code for a fixed-mode PMC must be 0x00.
* The pseudo umask-code is 0xX. The X equals the index of the fixed
* counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
*
* The counts are available in separate MSRs:
*/ */
/* Instr_Retired.Any: */ /* Instr_Retired.Any: */
...@@ -213,29 +229,84 @@ struct x86_pmu_capability { ...@@ -213,29 +229,84 @@ struct x86_pmu_capability {
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */ /* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
/* /*
* We model BTS tracing as another fixed-mode PMC. * We model BTS tracing as another fixed-mode PMC.
* *
* We choose a value in the middle of the fixed event range, since lower * We choose the value 47 for the fixed index of BTS, since lower
* values are used by actual fixed events and higher values are used * values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/ */
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) #define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
/*
* The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
* each TopDown metric event.
*
* Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
*/
#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
INTEL_PMC_MSK_FIXED_SLOTS)
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) /*
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62) * There is no event-code assigned to the TopDown events.
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) *
#define GLOBAL_STATUS_ASIF BIT_ULL(60) * For the slots event, use the pseudo code of the fixed counter 3.
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) *
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 * For the metric events, the pseudo event-code is 0x00.
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) * The pseudo umask-code starts from the middle of the pseudo event
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) * space, 0x80.
*/
#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
/* Level 1 metrics */
#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
#define INTEL_TD_METRIC_NUM 4
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
}
static inline bool is_topdown_idx(int idx)
{
return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
}
#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
(~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
#define GLOBAL_CTRL_EN_PERF_METRICS 48
/* /*
* We model guest LBR event tracing as another fixed-mode PMC like BTS. * We model guest LBR event tracing as another fixed-mode PMC like BTS.
* *
...@@ -334,6 +405,7 @@ struct pebs_xmm { ...@@ -334,6 +405,7 @@ struct pebs_xmm {
#define IBS_OP_ENABLE (1ULL<<17) #define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
#define IBS_RIP_INVALID (1ULL<<38) #define IBS_RIP_INVALID (1ULL<<38)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
......
...@@ -212,17 +212,26 @@ struct hw_perf_event { ...@@ -212,17 +212,26 @@ struct hw_perf_event {
*/ */
u64 sample_period; u64 sample_period;
/* union {
* The period we started this sample with. struct { /* Sampling */
*/ /*
u64 last_period; * The period we started this sample with.
*/
u64 last_period;
/* /*
* However much is left of the current period; note that this is * However much is left of the current period;
* a full 64bit value and allows for generation of periods longer * note that this is a full 64bit value and
* than hardware might allow. * allows for generation of periods longer
*/ * than hardware might allow.
local64_t period_left; */
local64_t period_left;
};
struct { /* Topdown events counting for context switch */
u64 saved_metric;
u64 saved_slots;
};
};
/* /*
* State for throttling the event, see __perf_event_overflow() and * State for throttling the event, see __perf_event_overflow() and
...@@ -576,9 +585,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, ...@@ -576,9 +585,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
* PERF_EV_CAP_SOFTWARE: Is a software event. * PERF_EV_CAP_SOFTWARE: Is a software event.
* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
* from any CPU in the package where it is active. * from any CPU in the package where it is active.
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
* cannot be a group leader. If an event with this flag is detached from the
* group it is scheduled out and moved into an unrecoverable ERROR state.
*/ */
#define PERF_EV_CAP_SOFTWARE BIT(0) #define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define PERF_EV_CAP_SIBLING BIT(2)
#define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
...@@ -859,7 +872,6 @@ struct perf_cpu_context { ...@@ -859,7 +872,6 @@ struct perf_cpu_context {
struct list_head cgrp_cpuctx_entry; struct list_head cgrp_cpuctx_entry;
#endif #endif
struct list_head sched_cb_entry;
int sched_cb_usage; int sched_cb_usage;
int online; int online;
......
...@@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex); ...@@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count; static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_mmap_events __read_mostly;
...@@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event) ...@@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active; return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
} }
/*
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
* cannot exist on their own, schedule them out and move them into the ERROR
* state. Also see _perf_event_enable(), it will not be able to recover
* this ERROR state.
*/
static inline void perf_remove_sibling_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
event_sched_out(event, cpuctx, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
}
static void perf_group_detach(struct perf_event *event) static void perf_group_detach(struct perf_event *event)
{ {
struct perf_event *leader = event->group_leader;
struct perf_event *sibling, *tmp; struct perf_event *sibling, *tmp;
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
...@@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event) ...@@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
/* /*
* If this is a sibling, remove it from its group. * If this is a sibling, remove it from its group.
*/ */
if (event->group_leader != event) { if (leader != event) {
list_del_init(&event->sibling_list); list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--; event->group_leader->nr_siblings--;
goto out; goto out;
...@@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event) ...@@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
*/ */
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
perf_remove_sibling_event(sibling);
sibling->group_leader = sibling; sibling->group_leader = sibling;
list_del_init(&sibling->sibling_list); list_del_init(&sibling->sibling_list);
...@@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event) ...@@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
} }
out: out:
perf_event__header_size(event->group_leader); for_each_sibling_event(tmp, leader)
for_each_sibling_event(tmp, event->group_leader)
perf_event__header_size(tmp); perf_event__header_size(tmp);
perf_event__header_size(leader);
} }
static bool is_orphaned_event(struct perf_event *event) static bool is_orphaned_event(struct perf_event *event)
...@@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event) ...@@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
raw_spin_lock_irq(&ctx->lock); raw_spin_lock_irq(&ctx->lock);
if (event->state >= PERF_EVENT_STATE_INACTIVE || if (event->state >= PERF_EVENT_STATE_INACTIVE ||
event->state < PERF_EVENT_STATE_ERROR) { event->state < PERF_EVENT_STATE_ERROR) {
out:
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
return; return;
} }
...@@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event) ...@@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
* has gone back into error state, as distinct from the task having * has gone back into error state, as distinct from the task having
* been scheduled away before the cross-call arrived. * been scheduled away before the cross-call arrived.
*/ */
if (event->state == PERF_EVENT_STATE_ERROR) if (event->state == PERF_EVENT_STATE_ERROR) {
/*
* Detached SIBLING events cannot leave ERROR state.
*/
if (event->event_caps & PERF_EV_CAP_SIBLING &&
event->group_leader == event)
goto out;
event->state = PERF_EVENT_STATE_OFF; event->state = PERF_EVENT_STATE_OFF;
}
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
event_function_call(event, __perf_event_enable, NULL); event_function_call(event, __perf_event_enable, NULL);
...@@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, ...@@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
struct perf_event_context *parent, *next_parent; struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
int do_switch = 1; int do_switch = 1;
struct pmu *pmu;
if (likely(!ctx)) if (likely(!ctx))
return; return;
pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx); cpuctx = __get_cpu_context(ctx);
if (!cpuctx->task_ctx) if (!cpuctx->task_ctx)
return; return;
...@@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, ...@@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock); raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) { if (context_equiv(ctx, next_ctx)) {
struct pmu *pmu = ctx->pmu;
WRITE_ONCE(ctx->task, next); WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task); WRITE_ONCE(next_ctx->task, task);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
/* /*
* PMU specific parts of task perf context can require * PMU specific parts of task perf context can require
* additional synchronization. As an example of such * additional synchronization. As an example of such
...@@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, ...@@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
else else
swap(ctx->task_ctx_data, next_ctx->task_ctx_data); swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
perf_pmu_enable(pmu);
/* /*
* RCU_INIT_POINTER here is safe because we've not * RCU_INIT_POINTER here is safe because we've not
* modified the ctx and the above modification of * modified the ctx and the above modification of
...@@ -3428,21 +3463,22 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, ...@@ -3428,21 +3463,22 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
if (do_switch) { if (do_switch) {
raw_spin_lock(&ctx->lock); raw_spin_lock(&ctx->lock);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL); task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
perf_pmu_enable(pmu);
raw_spin_unlock(&ctx->lock); raw_spin_unlock(&ctx->lock);
} }
} }
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
void perf_sched_cb_dec(struct pmu *pmu) void perf_sched_cb_dec(struct pmu *pmu)
{ {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
this_cpu_dec(perf_sched_cb_usages); --cpuctx->sched_cb_usage;
if (!--cpuctx->sched_cb_usage)
list_del(&cpuctx->sched_cb_entry);
} }
...@@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu) ...@@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
{ {
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
if (!cpuctx->sched_cb_usage++) cpuctx->sched_cb_usage++;
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
this_cpu_inc(perf_sched_cb_usages);
} }
/* /*
...@@ -3464,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu) ...@@ -3464,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu)
* PEBS requires this to provide PID/TID information. This requires we flush * PEBS requires this to provide PID/TID information. This requires we flush
* all queued PEBS records before we context switch to a new task. * all queued PEBS records before we context switch to a new task.
*/ */
static void perf_pmu_sched_task(struct task_struct *prev, static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
struct task_struct *next,
bool sched_in)
{ {
struct perf_cpu_context *cpuctx;
struct pmu *pmu; struct pmu *pmu;
if (prev == next) pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
return;
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) { if (WARN_ON_ONCE(!pmu->sched_task))
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */ return;
if (WARN_ON_ONCE(!pmu->sched_task))
continue;
perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu); perf_pmu_disable(pmu);
pmu->sched_task(cpuctx->task_ctx, sched_in); pmu->sched_task(cpuctx->task_ctx, sched_in);
perf_pmu_enable(pmu); perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx); perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
} }
static void perf_event_switch(struct task_struct *task, static void perf_event_switch(struct task_struct *task,
...@@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task, ...@@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{ {
int ctxn; int ctxn;
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);
if (atomic_read(&nr_switch_events)) if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false); perf_event_switch(task, next, false);
...@@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, ...@@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task) struct task_struct *task)
{ {
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
struct pmu *pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx); cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx) if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
return; return;
}
perf_ctx_lock(cpuctx, ctx); perf_ctx_lock(cpuctx, ctx);
/* /*
...@@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, ...@@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!ctx->nr_events) if (!ctx->nr_events)
goto unlock; goto unlock;
perf_pmu_disable(ctx->pmu); perf_pmu_disable(pmu);
/* /*
* We want to keep the following priority order: * We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned, * cpu pinned (that don't need to move), task pinned,
...@@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, ...@@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task); perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(cpuctx->task_ctx, true);
perf_pmu_enable(pmu);
unlock: unlock:
perf_ctx_unlock(cpuctx, ctx); perf_ctx_unlock(cpuctx, ctx);
...@@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, ...@@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(&nr_switch_events)) if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true); perf_event_switch(task, prev, true);
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
} }
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
...@@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event); ...@@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma) static void perf_mmap_close(struct vm_area_struct *vma)
{ {
struct perf_event *event = vma->vm_file->private_data; struct perf_event *event = vma->vm_file->private_data;
struct perf_buffer *rb = ring_buffer_get(event); struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user; struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked; int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb); unsigned long size = perf_data_size(rb);
bool detach_rest = false;
if (event->pmu->event_unmapped) if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm); event->pmu->event_unmapped(event, vma->vm_mm);
...@@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma) ...@@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex); mutex_unlock(&event->mmap_mutex);
} }
atomic_dec(&rb->mmap_count); if (atomic_dec_and_test(&rb->mmap_count))
detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put; goto out_put;
...@@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) ...@@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex); mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */ /* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count)) if (!detach_rest)
goto out_put; goto out_put;
/* /*
...@@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void) ...@@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF #ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif #endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
} }
} }
......
...@@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) ...@@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
struct module *mod) struct module *mod)
{ {
int len = strlen(mod->name); int len = strlen(module_name(mod));
const char *name = trace_kprobe_symbol(tk); const char *name = trace_kprobe_symbol(tk);
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
} }
static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
...@@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, ...@@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (ret) if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n", pr_warn("Failed to re-register probe %s on %s: %d\n",
trace_probe_name(&tk->tp), trace_probe_name(&tk->tp),
mod->name, ret); module_name(mod), ret);
} }
} }
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment