Commit d310ec03 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance event updates from Ingo Molnar:

 - Add CPU-PMU support for Intel Sapphire Rapids CPUs

 - Extend the perf ABI with PERF_SAMPLE_WEIGHT_STRUCT, to offer
   two-parameter sampling event feedback. Not used yet, but is intended
   for Golden Cove CPU-PMU, which can provide both the instruction
   latency and the cache latency information for memory profiling
   events.

 - Remove experimental, default-disabled perfmon-v4 counter_freezing
   support that could only be enabled via a boot option. The hardware is
   hopelessly broken, we'd like to make sure nobody starts relying on
   this, as it would only end in tears.

 - Fix energy/power events on Intel SPR platforms

 - Simplify the uprobes resume_execution() logic

 - Misc smaller fixes.

* tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/rapl: Fix psys-energy event on Intel SPR platform
  perf/x86/rapl: Only check lower 32bits for RAPL energy counters
  perf/x86/rapl: Add msr mask support
  perf/x86/kvm: Add Cascade Lake Xeon steppings to isolation_ucodes[]
  perf/x86/intel: Support CPUID 10.ECX to disable fixed counters
  perf/x86/intel: Add perf core PMU support for Sapphire Rapids
  perf/x86/intel: Filter unsupported Topdown metrics event
  perf/x86/intel: Factor out intel_update_topdown_event()
  perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT
  perf/intel: Remove Perfmon-v4 counter_freezing support
  x86/perf: Use static_call for x86_pmu.guest_get_msrs
  perf/x86/intel/uncore: With > 8 nodes, get pci bus die id from NUMA info
  perf/x86/intel/uncore: Store the logical die id instead of the physical die id.
  x86/kprobes: Do not decode opcode in resume_execution()
parents 657bd90c 8bcfdd7c
...@@ -945,12 +945,6 @@ ...@@ -945,12 +945,6 @@
causing system reset or hang due to sending causing system reset or hang due to sending
INIT from AP to BSP. INIT from AP to BSP.
perf_v4_pmi= [X86,INTEL]
Format: <bool>
Disable Intel PMU counter freezing feature.
The feature only exists starting from
Arch Perfmon v4 (Skylake and newer).
disable_ddw [PPC/PSERIES] disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this Disable Dynamic DMA Window support. Use this
to workaround buggy firmware. to workaround buggy firmware.
......
...@@ -2195,7 +2195,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -2195,7 +2195,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
ppmu->get_mem_weight) ppmu->get_mem_weight)
ppmu->get_mem_weight(&data.weight); ppmu->get_mem_weight(&data.weight.full);
if (perf_event_overflow(event, &data, regs)) if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0); power_pmu_stop(event, 0);
......
...@@ -81,6 +81,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); ...@@ -81,6 +81,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
u64 __read_mostly hw_cache_event_ids u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_OP_MAX]
...@@ -253,6 +255,8 @@ static bool check_hw_exists(void) ...@@ -253,6 +255,8 @@ static bool check_hw_exists(void)
if (ret) if (ret)
goto msr_fail; goto msr_fail;
for (i = 0; i < x86_pmu.num_counters_fixed; i++) { for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
if (fixed_counter_disabled(i))
continue;
if (val & (0x03 << i*4)) { if (val & (0x03 << i*4)) {
bios_fail = 1; bios_fail = 1;
val_fail = val; val_fail = val;
...@@ -665,6 +669,12 @@ void x86_pmu_disable_all(void) ...@@ -665,6 +669,12 @@ void x86_pmu_disable_all(void)
} }
} }
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
return static_call(x86_pmu_guest_get_msrs)(nr);
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
/* /*
* There may be PMI landing after enabled=0. The PMI hitting could be before or * There may be PMI landing after enabled=0. The PMI hitting could be before or
* after disable_all. * after disable_all.
...@@ -1523,6 +1533,8 @@ void perf_event_print_debug(void) ...@@ -1523,6 +1533,8 @@ void perf_event_print_debug(void)
cpu, idx, prev_left); cpu, idx, prev_left);
} }
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
if (fixed_counter_disabled(idx))
continue;
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
...@@ -1923,6 +1935,8 @@ static void x86_pmu_static_call_update(void) ...@@ -1923,6 +1935,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
} }
static void _x86_pmu_read(struct perf_event *event) static void _x86_pmu_read(struct perf_event *event)
...@@ -1930,6 +1944,13 @@ static void _x86_pmu_read(struct perf_event *event) ...@@ -1930,6 +1944,13 @@ static void _x86_pmu_read(struct perf_event *event)
x86_perf_event_update(event); x86_perf_event_update(event);
} }
static inline struct perf_guest_switch_msr *
perf_guest_get_msrs_nop(int *nr)
{
*nr = 0;
return NULL;
}
static int __init init_hw_perf_events(void) static int __init init_hw_perf_events(void)
{ {
struct x86_pmu_quirk *quirk; struct x86_pmu_quirk *quirk;
...@@ -1995,12 +2016,17 @@ static int __init init_hw_perf_events(void) ...@@ -1995,12 +2016,17 @@ static int __init init_hw_perf_events(void)
pr_info("... generic registers: %d\n", x86_pmu.num_counters); pr_info("... generic registers: %d\n", x86_pmu.num_counters);
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... fixed-purpose events: %lu\n",
hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1)
<< INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl));
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
if (!x86_pmu.read) if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read; x86_pmu.read = _x86_pmu_read;
if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
x86_pmu_static_call_update(); x86_pmu_static_call_update();
/* /*
......
This diff is collapsed.
...@@ -36,7 +36,9 @@ union intel_x86_pebs_dse { ...@@ -36,7 +36,9 @@ union intel_x86_pebs_dse {
unsigned int ld_dse:4; unsigned int ld_dse:4;
unsigned int ld_stlb_miss:1; unsigned int ld_stlb_miss:1;
unsigned int ld_locked:1; unsigned int ld_locked:1;
unsigned int ld_reserved:26; unsigned int ld_data_blk:1;
unsigned int ld_addr_blk:1;
unsigned int ld_reserved:24;
}; };
struct { struct {
unsigned int st_l1d_hit:1; unsigned int st_l1d_hit:1;
...@@ -45,6 +47,12 @@ union intel_x86_pebs_dse { ...@@ -45,6 +47,12 @@ union intel_x86_pebs_dse {
unsigned int st_locked:1; unsigned int st_locked:1;
unsigned int st_reserved2:26; unsigned int st_reserved2:26;
}; };
struct {
unsigned int st_lat_dse:4;
unsigned int st_lat_stlb_miss:1;
unsigned int st_lat_locked:1;
unsigned int ld_reserved3:26;
};
}; };
...@@ -198,6 +206,63 @@ static u64 load_latency_data(u64 status) ...@@ -198,6 +206,63 @@ static u64 load_latency_data(u64 status)
if (dse.ld_locked) if (dse.ld_locked)
val |= P(LOCK, LOCKED); val |= P(LOCK, LOCKED);
/*
* Ice Lake and earlier models do not support block infos.
*/
if (!x86_pmu.pebs_block) {
val |= P(BLK, NA);
return val;
}
/*
* bit 6: load was blocked since its data could not be forwarded
* from a preceding store
*/
if (dse.ld_data_blk)
val |= P(BLK, DATA);
/*
* bit 7: load was blocked due to potential address conflict with
* a preceding store
*/
if (dse.ld_addr_blk)
val |= P(BLK, ADDR);
if (!dse.ld_data_blk && !dse.ld_addr_blk)
val |= P(BLK, NA);
return val;
}
static u64 store_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
dse.val = status;
/*
* use the mapping table for bit 0-3
*/
val = pebs_data_source[dse.st_lat_dse];
/*
* bit 4: TLB access
* 0 = did not miss 2nd level TLB
* 1 = missed 2nd level TLB
*/
if (dse.st_lat_stlb_miss)
val |= P(TLB, MISS) | P(TLB, L2);
else
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
/*
* bit 5: locked prefix
*/
if (dse.st_lat_locked)
val |= P(LOCK, LOCKED);
val |= P(BLK, NA);
return val; return val;
} }
...@@ -870,6 +935,28 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { ...@@ -870,6 +935,28 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END EVENT_CONSTRAINT_END
}; };
struct event_constraint intel_spr_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
/*
* Everything else is handled by PMU_FL_PEBS_ALL, because we
* need the full constraints from the main table.
*/
EVENT_CONSTRAINT_END
};
struct event_constraint *intel_pebs_constraints(struct perf_event *event) struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{ {
struct event_constraint *c; struct event_constraint *c;
...@@ -960,7 +1047,8 @@ static void adaptive_pebs_record_size_update(void) ...@@ -960,7 +1047,8 @@ static void adaptive_pebs_record_size_update(void)
} }
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_WEIGHT_TYPE | \
PERF_SAMPLE_TRANSACTION | \ PERF_SAMPLE_TRANSACTION | \
PERF_SAMPLE_DATA_PAGE_SIZE) PERF_SAMPLE_DATA_PAGE_SIZE)
...@@ -987,7 +1075,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) ...@@ -987,7 +1075,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
(attr->sample_regs_intr & PEBS_GP_REGS); (attr->sample_regs_intr & PEBS_GP_REGS);
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
((attr->config & INTEL_ARCH_EVENT_MASK) == ((attr->config & INTEL_ARCH_EVENT_MASK) ==
x86_pmu.rtm_abort_event); x86_pmu.rtm_abort_event);
...@@ -1331,6 +1419,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux) ...@@ -1331,6 +1419,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
if (fl & PERF_X86_EVENT_PEBS_LDLAT) if (fl & PERF_X86_EVENT_PEBS_LDLAT)
val = load_latency_data(aux); val = load_latency_data(aux);
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
val = store_latency_data(aux);
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
val = precise_datala_hsw(event, aux); val = precise_datala_hsw(event, aux);
else if (fst) else if (fst)
...@@ -1369,8 +1459,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1369,8 +1459,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
/* /*
* Use latency for weight (only avail with PEBS-LL) * Use latency for weight (only avail with PEBS-LL)
*/ */
if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
data->weight = pebs->lat; data->weight.full = pebs->lat;
/* /*
* data.data_src encodes the data source * data.data_src encodes the data source
...@@ -1462,8 +1552,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1462,8 +1552,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 2) { if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */ /* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
data->weight = intel_get_tsx_weight(pebs->tsx_tuning); data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
if (sample_type & PERF_SAMPLE_TRANSACTION) if (sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
...@@ -1507,6 +1597,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, ...@@ -1507,6 +1597,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#endif #endif
} }
#define PEBS_LATENCY_MASK 0xffff
#define PEBS_CACHE_LATENCY_OFFSET 32
/* /*
* With adaptive PEBS the layout depends on what fields are configured. * With adaptive PEBS the layout depends on what fields are configured.
*/ */
...@@ -1577,9 +1670,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, ...@@ -1577,9 +1670,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
} }
if (format_size & PEBS_DATACFG_MEMINFO) { if (format_size & PEBS_DATACFG_MEMINFO) {
if (sample_type & PERF_SAMPLE_WEIGHT) if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
data->weight = meminfo->latency ?: u64 weight = meminfo->latency;
intel_get_tsx_weight(meminfo->tsx_tuning);
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
data->weight.var2_w = weight & PEBS_LATENCY_MASK;
weight >>= PEBS_CACHE_LATENCY_OFFSET;
}
/*
* Although meminfo::latency is defined as a u64,
* only the lower 32 bits include the valid data
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT) {
data->weight.full = weight ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
} else {
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
}
if (sample_type & PERF_SAMPLE_DATA_SRC) if (sample_type & PERF_SAMPLE_DATA_SRC)
data->data_src.val = get_data_src(event, meminfo->aux); data->data_src.val = get_data_src(event, meminfo->aux);
......
...@@ -31,21 +31,21 @@ struct event_constraint uncore_constraint_empty = ...@@ -31,21 +31,21 @@ struct event_constraint uncore_constraint_empty =
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
int uncore_pcibus_to_physid(struct pci_bus *bus) int uncore_pcibus_to_dieid(struct pci_bus *bus)
{ {
struct pci2phy_map *map; struct pci2phy_map *map;
int phys_id = -1; int die_id = -1;
raw_spin_lock(&pci2phy_map_lock); raw_spin_lock(&pci2phy_map_lock);
list_for_each_entry(map, &pci2phy_map_head, list) { list_for_each_entry(map, &pci2phy_map_head, list) {
if (map->segment == pci_domain_nr(bus)) { if (map->segment == pci_domain_nr(bus)) {
phys_id = map->pbus_to_physid[bus->number]; die_id = map->pbus_to_dieid[bus->number];
break; break;
} }
} }
raw_spin_unlock(&pci2phy_map_lock); raw_spin_unlock(&pci2phy_map_lock);
return phys_id; return die_id;
} }
static void uncore_free_pcibus_map(void) static void uncore_free_pcibus_map(void)
...@@ -86,7 +86,7 @@ struct pci2phy_map *__find_pci2phy_map(int segment) ...@@ -86,7 +86,7 @@ struct pci2phy_map *__find_pci2phy_map(int segment)
alloc = NULL; alloc = NULL;
map->segment = segment; map->segment = segment;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
map->pbus_to_physid[i] = -1; map->pbus_to_dieid[i] = -1;
list_add_tail(&map->list, &pci2phy_map_head); list_add_tail(&map->list, &pci2phy_map_head);
end: end:
...@@ -332,7 +332,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, ...@@ -332,7 +332,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box); uncore_pmu_init_hrtimer(box);
box->cpu = -1; box->cpu = -1;
box->pci_phys_id = -1;
box->dieid = -1; box->dieid = -1;
/* set default hrtimer timeout */ /* set default hrtimer timeout */
...@@ -993,18 +992,11 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) ...@@ -993,18 +992,11 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
/* /*
* Get the die information of a PCI device. * Get the die information of a PCI device.
* @pdev: The PCI device. * @pdev: The PCI device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to. * @die: The die id which the device maps to.
*/ */
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
int *phys_id, int *die)
{ {
*phys_id = uncore_pcibus_to_physid(pdev->bus); *die = uncore_pcibus_to_dieid(pdev->bus);
if (*phys_id < 0)
return -ENODEV;
*die = (topology_max_die_per_package() > 1) ? *phys_id :
topology_phys_to_logical_pkg(*phys_id);
if (*die < 0) if (*die < 0)
return -EINVAL; return -EINVAL;
...@@ -1046,13 +1038,12 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) ...@@ -1046,13 +1038,12 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
* @pdev: The PCI device. * @pdev: The PCI device.
* @type: The corresponding PMU type of the device. * @type: The corresponding PMU type of the device.
* @pmu: The corresponding PMU of the device. * @pmu: The corresponding PMU of the device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to. * @die: The die id which the device maps to.
*/ */
static int uncore_pci_pmu_register(struct pci_dev *pdev, static int uncore_pci_pmu_register(struct pci_dev *pdev,
struct intel_uncore_type *type, struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu, struct intel_uncore_pmu *pmu,
int phys_id, int die) int die)
{ {
struct intel_uncore_box *box; struct intel_uncore_box *box;
int ret; int ret;
...@@ -1070,7 +1061,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev, ...@@ -1070,7 +1061,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev,
WARN_ON_ONCE(pmu->func_id != pdev->devfn); WARN_ON_ONCE(pmu->func_id != pdev->devfn);
atomic_inc(&box->refcnt); atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
box->dieid = die; box->dieid = die;
box->pci_dev = pdev; box->pci_dev = pdev;
box->pmu = pmu; box->pmu = pmu;
...@@ -1097,9 +1087,9 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1097,9 +1087,9 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
{ {
struct intel_uncore_type *type; struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_pmu *pmu = NULL;
int phys_id, die, ret; int die, ret;
ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die); ret = uncore_pci_get_dev_die_info(pdev, &die);
if (ret) if (ret)
return ret; return ret;
...@@ -1132,7 +1122,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1132,7 +1122,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
} }
ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die); ret = uncore_pci_pmu_register(pdev, type, pmu, die);
pci_set_drvdata(pdev, pmu->boxes[die]); pci_set_drvdata(pdev, pmu->boxes[die]);
...@@ -1142,17 +1132,12 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ...@@ -1142,17 +1132,12 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
/* /*
* Unregister the PMU of a PCI device * Unregister the PMU of a PCI device
* @pmu: The corresponding PMU is unregistered. * @pmu: The corresponding PMU is unregistered.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to. * @die: The die id which the device maps to.
*/ */
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
int phys_id, int die)
{ {
struct intel_uncore_box *box = pmu->boxes[die]; struct intel_uncore_box *box = pmu->boxes[die];
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pmu->boxes[die] = NULL; pmu->boxes[die] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0) if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu); uncore_pmu_unregister(pmu);
...@@ -1164,9 +1149,9 @@ static void uncore_pci_remove(struct pci_dev *pdev) ...@@ -1164,9 +1149,9 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{ {
struct intel_uncore_box *box; struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu; struct intel_uncore_pmu *pmu;
int i, phys_id, die; int i, die;
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die)) if (uncore_pci_get_dev_die_info(pdev, &die))
return; return;
box = pci_get_drvdata(pdev); box = pci_get_drvdata(pdev);
...@@ -1185,7 +1170,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) ...@@ -1185,7 +1170,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
uncore_pci_pmu_unregister(pmu, phys_id, die); uncore_pci_pmu_unregister(pmu, die);
} }
static int uncore_bus_notify(struct notifier_block *nb, static int uncore_bus_notify(struct notifier_block *nb,
...@@ -1194,7 +1179,7 @@ static int uncore_bus_notify(struct notifier_block *nb, ...@@ -1194,7 +1179,7 @@ static int uncore_bus_notify(struct notifier_block *nb,
struct device *dev = data; struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
struct intel_uncore_pmu *pmu; struct intel_uncore_pmu *pmu;
int phys_id, die; int die;
/* Unregister the PMU when the device is going to be deleted. */ /* Unregister the PMU when the device is going to be deleted. */
if (action != BUS_NOTIFY_DEL_DEVICE) if (action != BUS_NOTIFY_DEL_DEVICE)
...@@ -1204,10 +1189,10 @@ static int uncore_bus_notify(struct notifier_block *nb, ...@@ -1204,10 +1189,10 @@ static int uncore_bus_notify(struct notifier_block *nb,
if (!pmu) if (!pmu)
return NOTIFY_DONE; return NOTIFY_DONE;
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die)) if (uncore_pci_get_dev_die_info(pdev, &die))
return NOTIFY_DONE; return NOTIFY_DONE;
uncore_pci_pmu_unregister(pmu, phys_id, die); uncore_pci_pmu_unregister(pmu, die);
return NOTIFY_OK; return NOTIFY_OK;
} }
...@@ -1224,7 +1209,7 @@ static void uncore_pci_sub_driver_init(void) ...@@ -1224,7 +1209,7 @@ static void uncore_pci_sub_driver_init(void)
struct pci_dev *pci_sub_dev; struct pci_dev *pci_sub_dev;
bool notify = false; bool notify = false;
unsigned int devfn; unsigned int devfn;
int phys_id, die; int die;
while (ids && ids->vendor) { while (ids && ids->vendor) {
pci_sub_dev = NULL; pci_sub_dev = NULL;
...@@ -1244,12 +1229,11 @@ static void uncore_pci_sub_driver_init(void) ...@@ -1244,12 +1229,11 @@ static void uncore_pci_sub_driver_init(void)
if (!pmu) if (!pmu)
continue; continue;
if (uncore_pci_get_dev_die_info(pci_sub_dev, if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
&phys_id, &die))
continue; continue;
if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
phys_id, die)) die))
notify = true; notify = true;
} }
ids++; ids++;
......
...@@ -124,7 +124,6 @@ struct intel_uncore_extra_reg { ...@@ -124,7 +124,6 @@ struct intel_uncore_extra_reg {
}; };
struct intel_uncore_box { struct intel_uncore_box {
int pci_phys_id;
int dieid; /* Logical die ID */ int dieid; /* Logical die ID */
int n_active; /* number of active events */ int n_active; /* number of active events */
int n_events; int n_events;
...@@ -173,11 +172,11 @@ struct freerunning_counters { ...@@ -173,11 +172,11 @@ struct freerunning_counters {
struct pci2phy_map { struct pci2phy_map {
struct list_head list; struct list_head list;
int segment; int segment;
int pbus_to_physid[256]; int pbus_to_dieid[256];
}; };
struct pci2phy_map *__find_pci2phy_map(int segment); struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_physid(struct pci_bus *bus); int uncore_pcibus_to_dieid(struct pci_bus *bus);
ssize_t uncore_event_show(struct device *dev, ssize_t uncore_event_show(struct device *dev,
struct device_attribute *attr, char *buf); struct device_attribute *attr, char *buf);
......
...@@ -657,7 +657,7 @@ int snb_pci2phy_map_init(int devid) ...@@ -657,7 +657,7 @@ int snb_pci2phy_map_init(int devid)
pci_dev_put(dev); pci_dev_put(dev);
return -ENOMEM; return -ENOMEM;
} }
map->pbus_to_physid[bus] = 0; map->pbus_to_dieid[bus] = 0;
raw_spin_unlock(&pci2phy_map_lock); raw_spin_unlock(&pci2phy_map_lock);
pci_dev_put(dev); pci_dev_put(dev);
......
...@@ -1359,7 +1359,7 @@ static struct pci_driver snbep_uncore_pci_driver = { ...@@ -1359,7 +1359,7 @@ static struct pci_driver snbep_uncore_pci_driver = {
static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse) static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
{ {
struct pci_dev *ubox_dev = NULL; struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid, segment; int i, bus, nodeid, segment, die_id;
struct pci2phy_map *map; struct pci2phy_map *map;
int err = 0; int err = 0;
u32 config = 0; u32 config = 0;
...@@ -1370,36 +1370,77 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool ...@@ -1370,36 +1370,77 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
if (!ubox_dev) if (!ubox_dev)
break; break;
bus = ubox_dev->bus->number; bus = ubox_dev->bus->number;
/* get the Node ID of the local register */ /*
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); * The nodeid and idmap registers only contain enough
if (err) * information to handle 8 nodes. On systems with more
break; * than 8 nodes, we need to rely on NUMA information,
nodeid = config & NODE_ID_MASK; * filled in from BIOS supplied information, to determine
/* get the Node ID mapping */ * the topology.
err = pci_read_config_dword(ubox_dev, idmap_loc, &config); */
if (err) if (nr_node_ids <= 8) {
break; /* get the Node ID of the local register */
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
if (err)
break;
nodeid = config & NODE_ID_MASK;
/* get the Node ID mapping */
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
if (err)
break;
segment = pci_domain_nr(ubox_dev->bus); segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock); raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment); map = __find_pci2phy_map(segment);
if (!map) { if (!map) {
raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM;
break;
}
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
*/
for (i = 0; i < 8; i++) {
if (nodeid == ((config >> (3 * i)) & 0x7)) {
if (topology_max_die_per_package() > 1)
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
map->pbus_to_dieid[bus] = die_id;
break;
}
}
raw_spin_unlock(&pci2phy_map_lock); raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM; } else {
break; int node = pcibus_to_node(ubox_dev->bus);
} int cpu;
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
if (!map) {
raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM;
break;
}
/* die_id = -1;
* every three bits in the Node ID mapping register maps for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
* to a particular node. struct cpuinfo_x86 *c = &cpu_data(cpu);
*/
for (i = 0; i < 8; i++) { if (c->initialized && cpu_to_node(cpu) == node) {
if (nodeid == ((config >> (3 * i)) & 0x7)) { map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
map->pbus_to_physid[bus] = i; break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
if (WARN_ON_ONCE(die_id == -1)) {
err = -EINVAL;
break; break;
} }
} }
raw_spin_unlock(&pci2phy_map_lock);
} }
if (!err) { if (!err) {
...@@ -1412,17 +1453,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool ...@@ -1412,17 +1453,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
i = -1; i = -1;
if (reverse) { if (reverse) {
for (bus = 255; bus >= 0; bus--) { for (bus = 255; bus >= 0; bus--) {
if (map->pbus_to_physid[bus] >= 0) if (map->pbus_to_dieid[bus] >= 0)
i = map->pbus_to_physid[bus]; i = map->pbus_to_dieid[bus];
else else
map->pbus_to_physid[bus] = i; map->pbus_to_dieid[bus] = i;
} }
} else { } else {
for (bus = 0; bus <= 255; bus++) { for (bus = 0; bus <= 255; bus++) {
if (map->pbus_to_physid[bus] >= 0) if (map->pbus_to_dieid[bus] >= 0)
i = map->pbus_to_physid[bus]; i = map->pbus_to_dieid[bus];
else else
map->pbus_to_physid[bus] = i; map->pbus_to_dieid[bus] = i;
} }
} }
} }
...@@ -4646,19 +4687,14 @@ int snr_uncore_pci_init(void) ...@@ -4646,19 +4687,14 @@ int snr_uncore_pci_init(void)
static struct pci_dev *snr_uncore_get_mc_dev(int id) static struct pci_dev *snr_uncore_get_mc_dev(int id)
{ {
struct pci_dev *mc_dev = NULL; struct pci_dev *mc_dev = NULL;
int phys_id, pkg; int pkg;
while (1) { while (1) {
mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
if (!mc_dev) if (!mc_dev)
break; break;
phys_id = uncore_pcibus_to_physid(mc_dev->bus); pkg = uncore_pcibus_to_dieid(mc_dev->bus);
if (phys_id < 0) if (pkg == id)
continue;
pkg = topology_phys_to_logical_pkg(phys_id);
if (pkg < 0)
continue;
else if (pkg == id)
break; break;
} }
return mc_dev; return mc_dev;
......
...@@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) ...@@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ #define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */ #define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */ #define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
static inline bool is_topdown_count(struct perf_event *event) static inline bool is_topdown_count(struct perf_event *event)
{ {
...@@ -443,6 +444,10 @@ struct cpu_hw_events { ...@@ -443,6 +444,10 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PSD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT)
#define INTEL_PST_CONSTRAINT(c, n) \ #define INTEL_PST_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
...@@ -682,8 +687,7 @@ struct x86_pmu { ...@@ -682,8 +687,7 @@ struct x86_pmu {
/* PMI handler bits */ /* PMI handler bits */
unsigned int late_ack :1, unsigned int late_ack :1,
enabled_ack :1, enabled_ack :1;
counter_freezing :1;
/* /*
* sysfs attrs * sysfs attrs
*/ */
...@@ -724,7 +728,8 @@ struct x86_pmu { ...@@ -724,7 +728,8 @@ struct x86_pmu {
pebs_broken :1, pebs_broken :1,
pebs_prec_dist :1, pebs_prec_dist :1,
pebs_no_tlb :1, pebs_no_tlb :1,
pebs_no_isolation :1; pebs_no_isolation :1,
pebs_block :1;
int pebs_record_size; int pebs_record_size;
int pebs_buffer_size; int pebs_buffer_size;
int max_pebs_events; int max_pebs_events;
...@@ -776,6 +781,7 @@ struct x86_pmu { ...@@ -776,6 +781,7 @@ struct x86_pmu {
/* /*
* Intel perf metrics * Intel perf metrics
*/ */
int num_topdown_events;
u64 (*update_topdown_event)(struct perf_event *event); u64 (*update_topdown_event)(struct perf_event *event);
int (*set_topdown_event_period)(struct perf_event *event); int (*set_topdown_event_period)(struct perf_event *event);
...@@ -871,6 +877,8 @@ do { \ ...@@ -871,6 +877,8 @@ do { \
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define EVENT_VAR(_id) event_attr_##_id #define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
...@@ -1060,6 +1068,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, ...@@ -1060,6 +1068,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page); char *page);
static inline bool fixed_counter_disabled(int i)
{
return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
}
#ifdef CONFIG_CPU_SUP_AMD #ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void); int amd_pmu_init(void);
...@@ -1157,6 +1170,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; ...@@ -1157,6 +1170,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
extern struct event_constraint intel_icl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[];
extern struct event_constraint intel_spr_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event); struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event);
......
...@@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) ...@@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
for (bit = 0; bit < cnt; bit++) { for (bit = 0; bit < cnt; bit++) {
if (!msr[bit].no_check) { if (!msr[bit].no_check) {
struct attribute_group *grp = msr[bit].grp; struct attribute_group *grp = msr[bit].grp;
u64 mask;
/* skip entry with no group */ /* skip entry with no group */
if (!grp) if (!grp)
...@@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) ...@@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
/* Virt sucks; you cannot tell if a R/O MSR is present :/ */ /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
if (rdmsrl_safe(msr[bit].msr, &val)) if (rdmsrl_safe(msr[bit].msr, &val))
continue; continue;
mask = msr[bit].mask;
if (!mask)
mask = ~0ULL;
/* Disable zero counters if requested. */ /* Disable zero counters if requested. */
if (!zero && !val) if (!zero && !(val & mask))
continue; continue;
grp->is_visible = NULL; grp->is_visible = NULL;
......
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
#include <linux/sysfs.h> #include <linux/sysfs.h>
struct perf_msr { struct perf_msr {
u64 msr; u64 msr;
struct attribute_group *grp; struct attribute_group *grp;
bool (*test)(int idx, void *data); bool (*test)(int idx, void *data);
bool no_check; bool no_check;
u64 mask;
}; };
unsigned long unsigned long
......
...@@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = { ...@@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = {
NULL, NULL,
}; };
static umode_t
rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return 0;
}
static struct attribute_group rapl_events_cores_group = { static struct attribute_group rapl_events_cores_group = {
.name = "events", .name = "events",
.attrs = rapl_events_cores, .attrs = rapl_events_cores,
.is_visible = rapl_not_visible,
}; };
static struct attribute *rapl_events_pkg[] = { static struct attribute *rapl_events_pkg[] = {
...@@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = { ...@@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = {
static struct attribute_group rapl_events_pkg_group = { static struct attribute_group rapl_events_pkg_group = {
.name = "events", .name = "events",
.attrs = rapl_events_pkg, .attrs = rapl_events_pkg,
.is_visible = rapl_not_visible,
}; };
static struct attribute *rapl_events_ram[] = { static struct attribute *rapl_events_ram[] = {
...@@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = { ...@@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = {
static struct attribute_group rapl_events_ram_group = { static struct attribute_group rapl_events_ram_group = {
.name = "events", .name = "events",
.attrs = rapl_events_ram, .attrs = rapl_events_ram,
.is_visible = rapl_not_visible,
}; };
static struct attribute *rapl_events_gpu[] = { static struct attribute *rapl_events_gpu[] = {
...@@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = { ...@@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = {
static struct attribute_group rapl_events_gpu_group = { static struct attribute_group rapl_events_gpu_group = {
.name = "events", .name = "events",
.attrs = rapl_events_gpu, .attrs = rapl_events_gpu,
.is_visible = rapl_not_visible,
}; };
static struct attribute *rapl_events_psys[] = { static struct attribute *rapl_events_psys[] = {
...@@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = { ...@@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = {
static struct attribute_group rapl_events_psys_group = { static struct attribute_group rapl_events_psys_group = {
.name = "events", .name = "events",
.attrs = rapl_events_psys, .attrs = rapl_events_psys,
.is_visible = rapl_not_visible,
}; };
static bool test_msr(int idx, void *data) static bool test_msr(int idx, void *data)
...@@ -523,12 +512,23 @@ static bool test_msr(int idx, void *data) ...@@ -523,12 +512,23 @@ static bool test_msr(int idx, void *data)
return test_bit(idx, (unsigned long *) data); return test_bit(idx, (unsigned long *) data);
} }
/* Only lower 32bits of the MSR represents the energy counter */
#define RAPL_MSR_MASK 0xFFFFFFFF
static struct perf_msr intel_rapl_msrs[] = { static struct perf_msr intel_rapl_msrs[] = {
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK },
};
static struct perf_msr intel_rapl_spr_msrs[] = {
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK },
}; };
/* /*
...@@ -761,7 +761,7 @@ static struct rapl_model model_spr = { ...@@ -761,7 +761,7 @@ static struct rapl_model model_spr = {
BIT(PERF_RAPL_PSYS), BIT(PERF_RAPL_PSYS),
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
.msr_power_unit = MSR_RAPL_POWER_UNIT, .msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs, .rapl_msrs = intel_rapl_spr_msrs,
}; };
static struct rapl_model model_amd_fam17h = { static struct rapl_model model_amd_fam17h = {
......
...@@ -58,14 +58,17 @@ struct arch_specific_insn { ...@@ -58,14 +58,17 @@ struct arch_specific_insn {
/* copy of the original instruction */ /* copy of the original instruction */
kprobe_opcode_t *insn; kprobe_opcode_t *insn;
/* /*
* boostable = false: This instruction type is not boostable. * boostable = 0: This instruction type is not boostable.
* boostable = true: This instruction has been boosted: we have * boostable = 1: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn, * added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's * so no single-step and fixup are needed (unless there's
* a post_handler). * a post_handler).
*/ */
bool boostable; unsigned boostable:1;
bool if_modifier; unsigned if_modifier:1;
unsigned is_call:1;
unsigned is_pushf:1;
unsigned is_abs_ip:1;
/* Number of bytes of text poked */ /* Number of bytes of text poked */
int tp_len; int tp_len;
}; };
......
...@@ -261,8 +261,12 @@ struct x86_pmu_capability { ...@@ -261,8 +261,12 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1) #define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2) #define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3) #define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND #define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4)
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \ #define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5)
#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6)
#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND
#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
INTEL_PMC_MSK_FIXED_SLOTS) INTEL_PMC_MSK_FIXED_SLOTS)
/* /*
...@@ -280,8 +284,14 @@ struct x86_pmu_capability { ...@@ -280,8 +284,14 @@ struct x86_pmu_capability {
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */ #define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */ #define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */ #define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND /* Level 2 metrics */
#define INTEL_TD_METRIC_NUM 4 #define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */
#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */
#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */
#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
static inline bool is_metric_idx(int idx) static inline bool is_metric_idx(int idx)
{ {
...@@ -483,11 +493,7 @@ static inline void perf_check_microcode(void) { } ...@@ -483,11 +493,7 @@ static inline void perf_check_microcode(void) { }
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr); extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
#else #else
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
{
*nr = 0;
return NULL;
}
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
{ {
return -1; return -1;
......
...@@ -132,26 +132,6 @@ void synthesize_relcall(void *dest, void *from, void *to) ...@@ -132,26 +132,6 @@ void synthesize_relcall(void *dest, void *from, void *to)
} }
NOKPROBE_SYMBOL(synthesize_relcall); NOKPROBE_SYMBOL(synthesize_relcall);
/*
* Skip the prefixes of the instruction.
*/
static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
{
insn_attr_t attr;
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
while (inat_is_legacy_prefix(attr)) {
insn++;
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
}
#ifdef CONFIG_X86_64
if (inat_is_rex_prefix(attr))
insn++;
#endif
return insn;
}
NOKPROBE_SYMBOL(skip_prefixes);
/* /*
* Returns non-zero if INSN is boostable. * Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode * RIP relative instructions are adjusted at copying time in 64 bits mode
...@@ -311,25 +291,6 @@ static int can_probe(unsigned long paddr) ...@@ -311,25 +291,6 @@ static int can_probe(unsigned long paddr)
return (addr == paddr); return (addr == paddr);
} }
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
static int is_IF_modifier(kprobe_opcode_t *insn)
{
/* Skip prefixes */
insn = skip_prefixes(insn);
switch (*insn) {
case 0xfa: /* cli */
case 0xfb: /* sti */
case 0xcf: /* iret/iretd */
case 0x9d: /* popf/popfd */
return 1;
}
return 0;
}
/* /*
* Copy an instruction with recovering modified instruction by kprobes * Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative * and adjust the displacement if the instruction uses the %rip-relative
...@@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, ...@@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
synthesize_reljump(buf + len, p->ainsn.insn + len, synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length); p->addr + insn->length);
len += JMP32_INSN_SIZE; len += JMP32_INSN_SIZE;
p->ainsn.boostable = true; p->ainsn.boostable = 1;
} else { } else {
p->ainsn.boostable = false; p->ainsn.boostable = 0;
} }
return len; return len;
...@@ -450,6 +411,67 @@ void free_insn_page(void *page) ...@@ -450,6 +411,67 @@ void free_insn_page(void *page)
module_memfree(page); module_memfree(page);
} }
static void set_resume_flags(struct kprobe *p, struct insn *insn)
{
insn_byte_t opcode = insn->opcode.bytes[0];
switch (opcode) {
case 0xfa: /* cli */
case 0xfb: /* sti */
case 0x9d: /* popf/popfd */
/* Check whether the instruction modifies Interrupt Flag or not */
p->ainsn.if_modifier = 1;
break;
case 0x9c: /* pushfl */
p->ainsn.is_pushf = 1;
break;
case 0xcf: /* iret */
p->ainsn.if_modifier = 1;
fallthrough;
case 0xc2: /* ret/lret */
case 0xc3:
case 0xca:
case 0xcb:
case 0xea: /* jmp absolute -- ip is correct */
/* ip is already adjusted, no more changes required */
p->ainsn.is_abs_ip = 1;
/* Without resume jump, this is boostable */
p->ainsn.boostable = 1;
break;
case 0xe8: /* call relative - Fix return addr */
p->ainsn.is_call = 1;
break;
#ifdef CONFIG_X86_32
case 0x9a: /* call absolute -- same as call absolute, indirect */
p->ainsn.is_call = 1;
p->ainsn.is_abs_ip = 1;
break;
#endif
case 0xff:
opcode = insn->opcode.bytes[1];
if ((opcode & 0x30) == 0x10) {
/*
* call absolute, indirect
* Fix return addr; ip is correct.
* But this is not boostable
*/
p->ainsn.is_call = 1;
p->ainsn.is_abs_ip = 1;
break;
} else if (((opcode & 0x31) == 0x20) ||
((opcode & 0x31) == 0x21)) {
/*
* jmp near and far, absolute indirect
* ip is correct.
*/
p->ainsn.is_abs_ip = 1;
/* Without resume jump, this is boostable */
p->ainsn.boostable = 1;
}
break;
}
}
static int arch_copy_kprobe(struct kprobe *p) static int arch_copy_kprobe(struct kprobe *p)
{ {
struct insn insn; struct insn insn;
...@@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p) ...@@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p)
*/ */
len = prepare_boost(buf, p, &insn); len = prepare_boost(buf, p, &insn);
/* Check whether the instruction modifies Interrupt Flag or not */ /* Analyze the opcode and set resume flags */
p->ainsn.if_modifier = is_IF_modifier(buf); set_resume_flags(p, &insn);
/* Also, displacement change doesn't affect the first byte */ /* Also, displacement change doesn't affect the first byte */
p->opcode = buf[0]; p->opcode = buf[0];
...@@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p) ...@@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p)
if (!can_probe((unsigned long)p->addr)) if (!can_probe((unsigned long)p->addr))
return -EILSEQ; return -EILSEQ;
memset(&p->ainsn, 0, sizeof(p->ainsn));
/* insn: must be on special executable page on x86. */ /* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot(); p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn) if (!p->ainsn.insn)
...@@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler); ...@@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
* 2) If the single-stepped instruction was a call, the return address * 2) If the single-stepped instruction was a call, the return address
* that is atop the stack is the address following the copied instruction. * that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction. * We need to make it the address following the original instruction.
*
* If this is the first time we've single-stepped the instruction at
* this probepoint, and the instruction is boostable, boost it: add a
* jump instruction after the copied instruction, that jumps to the next
* instruction after the probepoint.
*/ */
static void resume_execution(struct kprobe *p, struct pt_regs *regs, static void resume_execution(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb) struct kprobe_ctlblk *kcb)
...@@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, ...@@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
unsigned long *tos = stack_addr(regs); unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn; unsigned long copy_ip = (unsigned long)p->ainsn.insn;
unsigned long orig_ip = (unsigned long)p->addr; unsigned long orig_ip = (unsigned long)p->addr;
kprobe_opcode_t *insn = p->ainsn.insn;
/* Skip prefixes */
insn = skip_prefixes(insn);
regs->flags &= ~X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_TF;
switch (*insn) {
case 0x9c: /* pushfl */ /* Fixup the contents of top of stack */
if (p->ainsn.is_pushf) {
*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
*tos |= kcb->kprobe_old_flags; *tos |= kcb->kprobe_old_flags;
break; } else if (p->ainsn.is_call) {
case 0xc2: /* iret/ret/lret */
case 0xc3:
case 0xca:
case 0xcb:
case 0xcf:
case 0xea: /* jmp absolute -- ip is correct */
/* ip is already adjusted, no more changes required */
p->ainsn.boostable = true;
goto no_change;
case 0xe8: /* call relative - Fix return addr */
*tos = orig_ip + (*tos - copy_ip); *tos = orig_ip + (*tos - copy_ip);
break;
#ifdef CONFIG_X86_32
case 0x9a: /* call absolute -- same as call absolute, indirect */
*tos = orig_ip + (*tos - copy_ip);
goto no_change;
#endif
case 0xff:
if ((insn[1] & 0x30) == 0x10) {
/*
* call absolute, indirect
* Fix return addr; ip is correct.
* But this is not boostable
*/
*tos = orig_ip + (*tos - copy_ip);
goto no_change;
} else if (((insn[1] & 0x31) == 0x20) ||
((insn[1] & 0x31) == 0x21)) {
/*
* jmp near and far, absolute indirect
* ip is correct. And this is boostable
*/
p->ainsn.boostable = true;
goto no_change;
}
break;
default:
break;
} }
regs->ip += orig_ip - copy_ip; if (!p->ainsn.is_abs_ip)
regs->ip += orig_ip - copy_ip;
no_change:
restore_btf(); restore_btf();
} }
NOKPROBE_SYMBOL(resume_execution); NOKPROBE_SYMBOL(resume_execution);
......
...@@ -998,7 +998,7 @@ struct perf_sample_data { ...@@ -998,7 +998,7 @@ struct perf_sample_data {
struct perf_raw_record *raw; struct perf_raw_record *raw;
struct perf_branch_stack *br_stack; struct perf_branch_stack *br_stack;
u64 period; u64 period;
u64 weight; union perf_sample_weight weight;
u64 txn; u64 txn;
union perf_mem_data_src data_src; union perf_mem_data_src data_src;
...@@ -1047,7 +1047,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, ...@@ -1047,7 +1047,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->raw = NULL; data->raw = NULL;
data->br_stack = NULL; data->br_stack = NULL;
data->period = period; data->period = period;
data->weight = 0; data->weight.full = 0;
data->data_src.val = PERF_MEM_NA; data->data_src.val = PERF_MEM_NA;
data->txn = 0; data->txn = 0;
} }
......
...@@ -145,12 +145,14 @@ enum perf_event_sample_format { ...@@ -145,12 +145,14 @@ enum perf_event_sample_format {
PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_CGROUP = 1U << 21,
PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22,
PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23,
PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
}; };
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
/* /*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
* *
...@@ -912,7 +914,24 @@ enum perf_event_type { ...@@ -912,7 +914,24 @@ enum perf_event_type {
* char data[size]; * char data[size];
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
* *
* { u64 weight; } && PERF_SAMPLE_WEIGHT * { union perf_sample_weight
* {
* u64 full; && PERF_SAMPLE_WEIGHT
* #if defined(__LITTLE_ENDIAN_BITFIELD)
* struct {
* u32 var1_dw;
* u16 var2_w;
* u16 var3_w;
* } && PERF_SAMPLE_WEIGHT_STRUCT
* #elif defined(__BIG_ENDIAN_BITFIELD)
* struct {
* u16 var3_w;
* u16 var2_w;
* u32 var1_dw;
* } && PERF_SAMPLE_WEIGHT_STRUCT
* #endif
* }
* }
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi * { u64 abi; # enum perf_sample_regs_abi
...@@ -1159,14 +1178,16 @@ union perf_mem_data_src { ...@@ -1159,14 +1178,16 @@ union perf_mem_data_src {
mem_lvl_num:4, /* memory hierarchy level number */ mem_lvl_num:4, /* memory hierarchy level number */
mem_remote:1, /* remote */ mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_rsvd:24; mem_blk:3, /* access blocked */
mem_rsvd:21;
}; };
}; };
#elif defined(__BIG_ENDIAN_BITFIELD) #elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src { union perf_mem_data_src {
__u64 val; __u64 val;
struct { struct {
__u64 mem_rsvd:24, __u64 mem_rsvd:21,
mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */ mem_remote:1, /* remote */
mem_lvl_num:4, /* memory hierarchy level number */ mem_lvl_num:4, /* memory hierarchy level number */
...@@ -1249,6 +1270,12 @@ union perf_mem_data_src { ...@@ -1249,6 +1270,12 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
#define PERF_MEM_TLB_SHIFT 26 #define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
#define PERF_MEM_BLK_NA 0x01 /* not available */
#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT 40
#define PERF_MEM_S(a, s) \ #define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
...@@ -1280,4 +1307,23 @@ struct perf_branch_entry { ...@@ -1280,4 +1307,23 @@ struct perf_branch_entry {
reserved:40; reserved:40;
}; };
union perf_sample_weight {
__u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
__u32 var1_dw;
__u16 var2_w;
__u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
__u16 var3_w;
__u16 var2_w;
__u32 var1_dw;
};
#else
#error "Unknown endianness"
#endif
};
#endif /* _UAPI_LINUX_PERF_EVENT_H */ #endif /* _UAPI_LINUX_PERF_EVENT_H */
...@@ -1866,8 +1866,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) ...@@ -1866,8 +1866,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_PERIOD) if (sample_type & PERF_SAMPLE_PERIOD)
size += sizeof(data->period); size += sizeof(data->period);
if (sample_type & PERF_SAMPLE_WEIGHT) if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
size += sizeof(data->weight); size += sizeof(data->weight.full);
if (sample_type & PERF_SAMPLE_READ) if (sample_type & PERF_SAMPLE_READ)
size += event->read_size; size += event->read_size;
...@@ -6896,8 +6896,8 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -6896,8 +6896,8 @@ void perf_output_sample(struct perf_output_handle *handle,
data->regs_user.regs); data->regs_user.regs);
} }
if (sample_type & PERF_SAMPLE_WEIGHT) if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
perf_output_put(handle, data->weight); perf_output_put(handle, data->weight.full);
if (sample_type & PERF_SAMPLE_DATA_SRC) if (sample_type & PERF_SAMPLE_DATA_SRC)
perf_output_put(handle, data->data_src.val); perf_output_put(handle, data->data_src.val);
...@@ -11573,6 +11573,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, ...@@ -11573,6 +11573,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->sample_type & PERF_SAMPLE_CGROUP) if (attr->sample_type & PERF_SAMPLE_CGROUP)
return -EINVAL; return -EINVAL;
#endif #endif
if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
(attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
return -EINVAL;
out: out:
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment