Commit 91e1c99e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Thomas Gleixner:
 "Core:

   - Allow ftrace to instrument parts of the perf core code

   - Add a new mem_hops field to perf_mem_data_src which allows to
     represent intra-node/package or inter-node/off-package details to
     prepare for next generation systems which have more hieararchy
     within the node/pacakge level.

  Tools:

   - Update for the new mem_hops field in perf_mem_data_src

  Arch:

   - A set of constraints fixes for the Intel uncore PMU

   - The usual set of small fixes and improvements for x86 and PPC"

* tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings
  powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses
  tools/perf: Add mem_hops field in perf_mem_data_src structure
  perf: Add mem_hops field in perf_mem_data_src structure
  perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line
  perf/core: Allow ftrace for functions in kernel/event/core.c
  perf/x86: Add new event for AUX output counter index
  perf/x86: Add compiler barrier after updating BTS
  perf/x86/intel/uncore: Fix Intel SPR M3UPI event constraints
  perf/x86/intel/uncore: Fix Intel SPR M2PCIE event constraints
  perf/x86/intel/uncore: Fix Intel SPR IIO event constraints
  perf/x86/intel/uncore: Fix Intel SPR CHA event constraints
  perf/x86/intel/uncore: Fix Intel ICX IIO event constraints
  perf/x86/intel/uncore: Fix invalid unit check
  perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
parents 5a47ebe9 2de71ee1
...@@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ...@@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
ret |= P(SNOOP, HIT); ret |= P(SNOOP, HIT);
break; break;
case 5: case 5:
ret = PH(LVL, REM_CCE1); if (cpu_has_feature(CPU_FTR_ARCH_31)) {
if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4)) ret = REM | P(HOPS, 0);
ret |= P(SNOOP, HIT);
else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5)) if (sub_idx == 0 || sub_idx == 4)
ret |= P(SNOOP, HITM); ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
else if (sub_idx == 1 || sub_idx == 5)
ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
else if (sub_idx == 2 || sub_idx == 6)
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
else if (sub_idx == 3 || sub_idx == 7)
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
} else {
if (sub_idx == 0)
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
else if (sub_idx == 1)
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
else if (sub_idx == 2 || sub_idx == 4)
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
else if (sub_idx == 3 || sub_idx == 5)
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
}
break; break;
case 6: case 6:
ret = PH(LVL, REM_CCE2); ret = PH(LVL, REM_CCE2);
......
...@@ -273,6 +273,8 @@ ...@@ -273,6 +273,8 @@
#define P(a, b) PERF_MEM_S(a, b) #define P(a, b) PERF_MEM_S(a, b)
#define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PH(a, b) (P(LVL, HIT) | P(a, b))
#define PM(a, b) (P(LVL, MISS) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b))
#define LEVEL(x) P(LVLNUM, x)
#define REM P(REMOTE, REMOTE)
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1); int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
int isa207_compute_mmcr(u64 event[], int n_ev, int isa207_compute_mmcr(u64 event[], int n_ev,
......
...@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); ...@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
...@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, ...@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id(); hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i]; hwc->last_tag = ++cpuc->tags[i];
static_call_cond(x86_pmu_assign)(event, idx);
switch (hwc->idx) { switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS: case INTEL_PMC_IDX_FIXED_BTS:
case INTEL_PMC_IDX_FIXED_VLBR: case INTEL_PMC_IDX_FIXED_VLBR:
...@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void) ...@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_enable, x86_pmu.enable); static_call_update(x86_pmu_enable, x86_pmu.enable);
static_call_update(x86_pmu_disable, x86_pmu.disable); static_call_update(x86_pmu_disable, x86_pmu.disable);
static_call_update(x86_pmu_assign, x86_pmu.assign);
static_call_update(x86_pmu_add, x86_pmu.add); static_call_update(x86_pmu_add, x86_pmu.add);
static_call_update(x86_pmu_del, x86_pmu.del); static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read); static_call_update(x86_pmu_read, x86_pmu.read);
......
...@@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts) ...@@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts)
} else { } else {
local_set(&buf->data_size, head); local_set(&buf->data_size, head);
} }
/*
* Since BTS is coherent, just add compiler barrier to ensure
* BTS updating is ordered against bts::handle::event.
*/
barrier();
} }
static int static int
......
...@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { ...@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
static struct event_constraint intel_icl_event_constraints[] = { static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
...@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { ...@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
static struct event_constraint intel_spr_event_constraints[] = { static struct event_constraint intel_spr_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
...@@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event) ...@@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
intel_pmu_pebs_disable(event); intel_pmu_pebs_disable(event);
} }
static void intel_pmu_assign_event(struct perf_event *event, int idx)
{
if (is_pebs_pt(event))
perf_report_aux_output_id(event, idx);
}
static void intel_pmu_del_event(struct perf_event *event) static void intel_pmu_del_event(struct perf_event *event)
{ {
if (needs_branch_stack(event)) if (needs_branch_stack(event))
...@@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value) ...@@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
} }
static void intel_aux_output_init(void)
{
/* Refer also intel_pmu_aux_output_match() */
if (x86_pmu.intel_cap.pebs_output_pt_available)
x86_pmu.assign = intel_pmu_assign_event;
}
static int intel_pmu_aux_output_match(struct perf_event *event) static int intel_pmu_aux_output_match(struct perf_event *event)
{ {
/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
if (!x86_pmu.intel_cap.pebs_output_pt_available) if (!x86_pmu.intel_cap.pebs_output_pt_available)
return 0; return 0;
...@@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void) ...@@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void)
if (is_hybrid()) if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask); intel_pmu_check_hybrid_pmus((u64)fixed_mask);
intel_aux_output_init();
return 0; return 0;
} }
......
...@@ -923,7 +923,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = { ...@@ -923,7 +923,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
}; };
struct event_constraint intel_icl_pebs_event_constraints[] = { struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
...@@ -943,7 +944,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { ...@@ -943,7 +944,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
}; };
struct event_constraint intel_spr_pebs_event_constraints[] = { struct event_constraint intel_spr_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#define uncore_discovery_invalid_unit(unit) \ #define uncore_discovery_invalid_unit(unit) \
(!unit.table1 || !unit.ctl || !unit.table3 || \ (!unit.table1 || !unit.ctl || \
unit.table1 == -1ULL || unit.ctl == -1ULL || \ unit.table1 == -1ULL || unit.ctl == -1ULL || \
unit.table3 == -1ULL) unit.table3 == -1ULL)
......
...@@ -452,7 +452,7 @@ ...@@ -452,7 +452,7 @@
#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 #define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
/* ICX IMC */ /* ICX IMC */
#define ICX_NUMBER_IMC_CHN 2 #define ICX_NUMBER_IMC_CHN 3
#define ICX_IMC_MEM_STRIDE 0x4 #define ICX_IMC_MEM_STRIDE 0x4
/* SPR */ /* SPR */
...@@ -5076,8 +5076,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = { ...@@ -5076,8 +5076,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x02, 0x3), UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
UNCORE_EVENT_CONSTRAINT(0x03, 0x3), UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
UNCORE_EVENT_CONSTRAINT(0x83, 0x3), UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
EVENT_CONSTRAINT_END EVENT_CONSTRAINT_END
}; };
...@@ -5463,7 +5465,7 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = { ...@@ -5463,7 +5465,7 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = {
static struct intel_uncore_type icx_uncore_imc = { static struct intel_uncore_type icx_uncore_imc = {
.name = "imc", .name = "imc",
.num_counters = 4, .num_counters = 4,
.num_boxes = 8, .num_boxes = 12,
.perf_ctr_bits = 48, .perf_ctr_bits = 48,
.fixed_ctr_bits = 48, .fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
...@@ -5647,6 +5649,7 @@ static struct intel_uncore_type spr_uncore_chabox = { ...@@ -5647,6 +5649,7 @@ static struct intel_uncore_type spr_uncore_chabox = {
.event_mask = SPR_CHA_PMON_EVENT_MASK, .event_mask = SPR_CHA_PMON_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT, .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
.num_shared_regs = 1, .num_shared_regs = 1,
.constraints = skx_uncore_chabox_constraints,
.ops = &spr_uncore_chabox_ops, .ops = &spr_uncore_chabox_ops,
.format_group = &spr_uncore_chabox_format_group, .format_group = &spr_uncore_chabox_format_group,
.attr_update = uncore_alias_groups, .attr_update = uncore_alias_groups,
...@@ -5658,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_iio = { ...@@ -5658,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_iio = {
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
.format_group = &snr_uncore_iio_format_group, .format_group = &snr_uncore_iio_format_group,
.attr_update = uncore_alias_groups, .attr_update = uncore_alias_groups,
.constraints = icx_uncore_iio_constraints,
}; };
static struct attribute *spr_uncore_raw_formats_attr[] = { static struct attribute *spr_uncore_raw_formats_attr[] = {
...@@ -5686,9 +5690,16 @@ static struct intel_uncore_type spr_uncore_irp = { ...@@ -5686,9 +5690,16 @@ static struct intel_uncore_type spr_uncore_irp = {
}; };
static struct event_constraint spr_uncore_m2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type spr_uncore_m2pcie = { static struct intel_uncore_type spr_uncore_m2pcie = {
SPR_UNCORE_COMMON_FORMAT(), SPR_UNCORE_COMMON_FORMAT(),
.name = "m2pcie", .name = "m2pcie",
.constraints = spr_uncore_m2pcie_constraints,
}; };
static struct intel_uncore_type spr_uncore_pcu = { static struct intel_uncore_type spr_uncore_pcu = {
...@@ -5765,6 +5776,7 @@ static struct intel_uncore_type spr_uncore_upi = { ...@@ -5765,6 +5776,7 @@ static struct intel_uncore_type spr_uncore_upi = {
static struct intel_uncore_type spr_uncore_m3upi = { static struct intel_uncore_type spr_uncore_m3upi = {
SPR_UNCORE_PCI_COMMON_FORMAT(), SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "m3upi", .name = "m3upi",
.constraints = icx_uncore_m3upi_constraints,
}; };
static struct intel_uncore_type spr_uncore_mdf = { static struct intel_uncore_type spr_uncore_mdf = {
......
...@@ -726,6 +726,7 @@ struct x86_pmu { ...@@ -726,6 +726,7 @@ struct x86_pmu {
void (*enable_all)(int added); void (*enable_all)(int added);
void (*enable)(struct perf_event *); void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *); void (*disable)(struct perf_event *);
void (*assign)(struct perf_event *event, int idx);
void (*add)(struct perf_event *); void (*add)(struct perf_event *);
void (*del)(struct perf_event *); void (*del)(struct perf_event *);
void (*read)(struct perf_event *event); void (*read)(struct perf_event *event);
......
...@@ -1400,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event) ...@@ -1400,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event)
} }
extern void perf_event_addr_filters_sync(struct perf_event *event); extern void perf_event_addr_filters_sync(struct perf_event *event);
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
extern int perf_output_begin(struct perf_output_handle *handle, extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data, struct perf_sample_data *data,
......
...@@ -1141,6 +1141,21 @@ enum perf_event_type { ...@@ -1141,6 +1141,21 @@ enum perf_event_type {
*/ */
PERF_RECORD_TEXT_POKE = 20, PERF_RECORD_TEXT_POKE = 20,
/*
* Data written to the AUX area by hardware due to aux_output, may need
* to be matched to the event by an architecture-specific hardware ID.
* This records the hardware ID, but requires sample_id to provide the
* event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
* records from multiple events.
*
* struct {
* struct perf_event_header header;
* u64 hw_id;
* struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
PERF_RECORD_MAX, /* non-ABI */ PERF_RECORD_MAX, /* non-ABI */
}; };
...@@ -1210,14 +1225,16 @@ union perf_mem_data_src { ...@@ -1210,14 +1225,16 @@ union perf_mem_data_src {
mem_remote:1, /* remote */ mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_rsvd:21; mem_hops:3, /* hop level */
mem_rsvd:18;
}; };
}; };
#elif defined(__BIG_ENDIAN_BITFIELD) #elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src { union perf_mem_data_src {
__u64 val; __u64 val;
struct { struct {
__u64 mem_rsvd:21, __u64 mem_rsvd:18,
mem_hops:3, /* hop level */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */ mem_remote:1, /* remote */
...@@ -1241,7 +1258,13 @@ union perf_mem_data_src { ...@@ -1241,7 +1258,13 @@ union perf_mem_data_src {
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
#define PERF_MEM_OP_SHIFT 0 #define PERF_MEM_OP_SHIFT 0
/* memory hierarchy (memory level, hit or miss) */ /*
* PERF_MEM_LVL_* namespace being depricated to some extent in the
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
* Supporting this namespace inorder to not break defined ABIs.
*
* memory hierarchy (memory level, hit or miss)
*/
#define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_NA 0x01 /* not available */
#define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */
#define PERF_MEM_LVL_MISS 0x04 /* miss level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */
...@@ -1307,6 +1330,11 @@ union perf_mem_data_src { ...@@ -1307,6 +1330,11 @@ union perf_mem_data_src {
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT 40 #define PERF_MEM_BLK_SHIFT 40
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
/* 2-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \ #define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
endif
obj-y := core.o ring_buffer.o callchain.o obj-y := core.o ring_buffer.o callchain.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_UPROBES) += uprobes.o
...@@ -9099,6 +9099,36 @@ static void perf_log_itrace_start(struct perf_event *event) ...@@ -9099,6 +9099,36 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle); perf_output_end(&handle);
} }
void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
{
struct perf_output_handle handle;
struct perf_sample_data sample;
struct perf_aux_event {
struct perf_event_header header;
u64 hw_id;
} rec;
int ret;
if (event->parent)
event = event->parent;
rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
rec.header.misc = 0;
rec.header.size = sizeof(rec);
rec.hw_id = hw_id;
perf_event_header__init_id(&rec.header, &sample, event);
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
if (ret)
return;
perf_output_put(&handle, rec);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static int static int
__perf_event_account_interrupt(struct perf_event *event, int throttle) __perf_event_account_interrupt(struct perf_event *event, int throttle)
{ {
......
...@@ -1210,14 +1210,16 @@ union perf_mem_data_src { ...@@ -1210,14 +1210,16 @@ union perf_mem_data_src {
mem_remote:1, /* remote */ mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_rsvd:21; mem_hops:3, /* hop level */
mem_rsvd:18;
}; };
}; };
#elif defined(__BIG_ENDIAN_BITFIELD) #elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src { union perf_mem_data_src {
__u64 val; __u64 val;
struct { struct {
__u64 mem_rsvd:21, __u64 mem_rsvd:18,
mem_hops:3, /* hop level */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */ mem_remote:1, /* remote */
...@@ -1241,7 +1243,13 @@ union perf_mem_data_src { ...@@ -1241,7 +1243,13 @@ union perf_mem_data_src {
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
#define PERF_MEM_OP_SHIFT 0 #define PERF_MEM_OP_SHIFT 0
/* memory hierarchy (memory level, hit or miss) */ /*
* PERF_MEM_LVL_* namespace being depricated to some extent in the
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
* Supporting this namespace inorder to not break defined ABIs.
*
* memory hierarchy (memory level, hit or miss)
*/
#define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_NA 0x01 /* not available */
#define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */
#define PERF_MEM_LVL_MISS 0x04 /* miss level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */
...@@ -1307,6 +1315,11 @@ union perf_mem_data_src { ...@@ -1307,6 +1315,11 @@ union perf_mem_data_src {
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT 40 #define PERF_MEM_BLK_SHIFT 40
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
/* 2-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \ #define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
......
...@@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = { ...@@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
[PERF_MEM_LVLNUM_NA] = "N/A", [PERF_MEM_LVLNUM_NA] = "N/A",
}; };
static const char * const mem_hops[] = {
"N/A",
/*
* While printing, 'Remote' will be added to represent
* 'Remote core, same node' accesses as remote field need
* to be set with mem_hops field.
*/
"core, same node",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{ {
size_t i, l = 0; size_t i, l = 0;
...@@ -320,12 +330,14 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) ...@@ -320,12 +330,14 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
/* already taken care of */ /* already taken care of */
m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
if (mem_info && mem_info->data_src.mem_remote) { if (mem_info && mem_info->data_src.mem_remote) {
strcat(out, "Remote "); strcat(out, "Remote ");
l += 7; l += 7;
} }
if (mem_info && mem_info->data_src.mem_hops)
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
printed = 0; printed = 0;
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
if (!(m & 0x1)) if (!(m & 0x1))
...@@ -472,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) ...@@ -472,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
/* /*
* Skylake might report unknown remote level via this * Skylake might report unknown remote level via this
* bit, consider it when evaluating remote HITMs. * bit, consider it when evaluating remote HITMs.
*
* Incase of power, remote field can also be used to denote cache
* accesses from the another core of same node. Hence, setting
* mrem only when HOPS is zero along with set remote field.
*/ */
bool mrem = data_src->mem_remote; bool mrem = (data_src->mem_remote && !data_src->mem_hops);
int err = 0; int err = 0;
#define HITM_INC(__f) \ #define HITM_INC(__f) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment