Commit a72594ca authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.16-20180117' of...

Merge tag 'perf-core-for-mingo-4.16-20180117' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Fix various per event 'max-stack' and 'call-graph=dwarf' issues,
  mostly in 'perf trace', allowing to use 'perf trace --call-graph' with
  'dwarf' and 'fp' to setup the callgraph details for the syscall events
  and make that apply to other events, whilhe allowing to override that on
  a per-event basis, using '-e sched:*switch/call-graph=dwarf/' for
  instance (Arnaldo Carvalho de Melo)

- Improve the --time percent support in record/report/script (Jin Yao)

- Fix copyfile_offset update of output offset (Jiri Olsa)

- Add python script to profile and resolve physical mem type (Kan Liang)

- Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips)

- Remove trailing semicolon in the evlist code (Luis de Bethencourt)

- Fix incorrect handling of type _TERM_DRV_CFG (Mathieu Poirier)

- Use asprintf when possible in libtraceevent (Federico Vaga)

- Fix bad force_token escape sequence in libtraceevent (Michael Sartain)

- Add UL suffix to MISSING_EVENTS in libtraceevent (Michael Sartain)

- value of unknown symbolic fields in libtraceevent (Jan Kiszka)

- libtraceevent updates: (Steven Rostedt)
  o Show value of flags that have not been parsed
  o Simplify pointer print logic and fix %pF
  o Handle new pointer processing of bprint strings
  o Show contents (in hex) of data of unrecognized type records
  o Fix get_field_str() for dynamic strings

- Add missing break in FALSE case of pevent_filter_clear_trivial() (Taeung Song)

- Fix failed memory allocation for get_cpuid_str (Thomas Richter)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 7a7368a5 81fccd6c
......@@ -1094,7 +1094,7 @@ static enum event_type __read_token(char **tok)
if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
free(*tok);
*tok = NULL;
return force_token("\"\%s\" ", tok);
return force_token("\"%s\" ", tok);
} else if (strcmp(*tok, "STA_PR_FMT") == 0) {
free(*tok);
*tok = NULL;
......@@ -3970,6 +3970,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
val &= ~fval;
}
}
if (val) {
if (print && arg->flags.delim)
trace_seq_puts(s, arg->flags.delim);
trace_seq_printf(s, "0x%llx", val);
}
break;
case PRINT_SYMBOL:
val = eval_num_arg(data, size, event, arg->symbol.field);
......@@ -3980,6 +3985,8 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
break;
}
}
if (!flag)
trace_seq_printf(s, "0x%llx", val);
break;
case PRINT_HEX:
case PRINT_HEX_STR:
......@@ -4293,6 +4300,26 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
goto process_again;
case 'p':
ls = 1;
if (isalnum(ptr[1])) {
ptr++;
/* Check for special pointers */
switch (*ptr) {
case 's':
case 'S':
case 'f':
case 'F':
break;
default:
/*
* Older kernels do not process
* dereferenced pointers.
* Only process if the pointer
* value is a printable.
*/
if (isprint(*(char *)bptr))
goto process_string;
}
}
/* fall through */
case 'd':
case 'u':
......@@ -4345,6 +4372,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
break;
case 's':
process_string:
arg = alloc_arg();
if (!arg) {
do_warning_event(event, "%s(%d): not enough memory!",
......@@ -4949,21 +4977,27 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
else
ls = 2;
if (*(ptr+1) == 'F' || *(ptr+1) == 'f' ||
*(ptr+1) == 'S' || *(ptr+1) == 's') {
if (isalnum(ptr[1]))
ptr++;
if (arg->type == PRINT_BSTRING) {
trace_seq_puts(s, arg->string.string);
break;
}
if (*ptr == 'F' || *ptr == 'f' ||
*ptr == 'S' || *ptr == 's') {
show_func = *ptr;
} else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') {
print_mac_arg(s, *(ptr+1), data, size, event, arg);
ptr++;
} else if (*ptr == 'M' || *ptr == 'm') {
print_mac_arg(s, *ptr, data, size, event, arg);
arg = arg->next;
break;
} else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') {
} else if (*ptr == 'I' || *ptr == 'i') {
int n;
n = print_ip_arg(s, ptr+1, data, size, event, arg);
n = print_ip_arg(s, ptr, data, size, event, arg);
if (n > 0) {
ptr += n;
ptr += n - 1;
arg = arg->next;
break;
}
......@@ -5532,8 +5566,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
event = pevent_find_event_by_record(pevent, record);
if (!event) {
do_warning("ug! no event found for type %d",
trace_parse_common_type(pevent, record->data));
int i;
int type = trace_parse_common_type(pevent, record->data);
do_warning("ug! no event found for type %d", type);
trace_seq_printf(s, "[UNKNOWN TYPE %d]", type);
for (i = 0; i < record->size; i++)
trace_seq_printf(s, " %02x",
((unsigned char *)record->data)[i]);
return;
}
......
......@@ -120,12 +120,12 @@ char **traceevent_plugin_list_options(void)
for (op = reg->options; op->name; op++) {
char *alias = op->plugin_alias ? op->plugin_alias : op->file;
char **temp = list;
int ret;
name = malloc(strlen(op->name) + strlen(alias) + 2);
if (!name)
ret = asprintf(&name, "%s:%s", alias, op->name);
if (ret < 0)
goto err;
sprintf(name, "%s:%s", alias, op->name);
list = realloc(list, count + 2);
if (!list) {
list = temp;
......@@ -290,17 +290,14 @@ load_plugin(struct pevent *pevent, const char *path,
const char *alias;
char *plugin;
void *handle;
int ret;
plugin = malloc(strlen(path) + strlen(file) + 2);
if (!plugin) {
ret = asprintf(&plugin, "%s/%s", path, file);
if (ret < 0) {
warning("could not allocate plugin memory\n");
return;
}
strcpy(plugin, path);
strcat(plugin, "/");
strcat(plugin, file);
handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
if (!handle) {
warning("could not load plugin '%s'\n%s\n",
......@@ -391,6 +388,7 @@ load_plugins(struct pevent *pevent, const char *suffix,
char *home;
char *path;
char *envdir;
int ret;
if (pevent->flags & PEVENT_DISABLE_PLUGINS)
return;
......@@ -421,16 +419,12 @@ load_plugins(struct pevent *pevent, const char *suffix,
if (!home)
return;
path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2);
if (!path) {
ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
if (ret < 0) {
warning("could not allocate plugin memory\n");
return;
}
strcpy(path, home);
strcat(path, "/");
strcat(path, LOCAL_PLUGIN_DIR);
load_plugins_dir(pevent, suffix, path, load_plugin, data);
free(path);
......
......@@ -24,8 +24,8 @@
#include "kbuffer.h"
#define MISSING_EVENTS (1 << 31)
#define MISSING_STORED (1 << 30)
#define MISSING_EVENTS (1UL << 31)
#define MISSING_STORED (1UL << 30)
#define COMMIT_MASK ((1 << 27) - 1)
......
......@@ -287,12 +287,10 @@ find_event(struct pevent *pevent, struct event_list **events,
sys_name = NULL;
}
reg = malloc(strlen(event_name) + 3);
if (reg == NULL)
ret = asprintf(&reg, "^%s$", event_name);
if (ret < 0)
return PEVENT_ERRNO__MEM_ALLOC_FAILED;
sprintf(reg, "^%s$", event_name);
ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
free(reg);
......@@ -300,13 +298,12 @@ find_event(struct pevent *pevent, struct event_list **events,
return PEVENT_ERRNO__INVALID_EVENT_NAME;
if (sys_name) {
reg = malloc(strlen(sys_name) + 3);
if (reg == NULL) {
ret = asprintf(&reg, "^%s$", sys_name);
if (ret < 0) {
regfree(&ereg);
return PEVENT_ERRNO__MEM_ALLOC_FAILED;
}
sprintf(reg, "^%s$", sys_name);
ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
free(reg);
if (ret) {
......@@ -1634,6 +1631,7 @@ int pevent_filter_clear_trivial(struct event_filter *filter,
case FILTER_TRIVIAL_FALSE:
if (filter_type->filter->boolean.value)
continue;
break;
case FILTER_TRIVIAL_TRUE:
if (!filter_type->filter->boolean.value)
continue;
......@@ -1879,17 +1877,25 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r
struct pevent *pevent;
unsigned long long addr;
const char *val = NULL;
unsigned int size;
char hex[64];
/* If the field is not a string convert it */
if (arg->str.field->flags & FIELD_IS_STRING) {
val = record->data + arg->str.field->offset;
size = arg->str.field->size;
if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
addr = *(unsigned int *)val;
val = record->data + (addr & 0xffff);
size = addr >> 16;
}
/*
* We need to copy the data since we can't be sure the field
* is null terminated.
*/
if (*(val + arg->str.field->size - 1)) {
if (*(val + size - 1)) {
/* copy it */
memcpy(arg->str.buffer, val, arg->str.field->size);
/* the buffer is already NULL terminated */
......
......@@ -403,7 +403,7 @@ OPTIONS
to end of file.
Also support time percent with multiple time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
Select the second 10% time slice:
......
......@@ -351,19 +351,19 @@ include::itrace.txt[]
to end of file.
Also support time percent with multipe time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
Select the second 10% time slice
Select the second 10% time slice:
perf script --time 10%/2
Select from 0% to 10% time slice
Select from 0% to 10% time slice:
perf script --time 0%-10%
Select the first and second 10% time slices
Select the first and second 10% time slices:
perf script --time 10%/1,10%/2
Select from 0% to 10% and 30% to 40% slices
Select from 0% to 10% and 30% to 40% slices:
perf script --time 0%-10%,30%-40%
--max-blocks::
......
......@@ -22,6 +22,42 @@
#include "../../util/evlist.h"
#include "../../util/pmu.h"
#include "cs-etm.h"
#include "arm-spe.h"
static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
{
struct perf_pmu **arm_spe_pmus = NULL;
int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
/* arm_spe_xxxxxxxxx\0 */
char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
if (!arm_spe_pmus) {
pr_err("spes alloc failed\n");
*err = -ENOMEM;
return NULL;
}
for (i = 0; i < nr_cpus; i++) {
ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
if (ret < 0) {
pr_err("sprintf failed\n");
*err = -ENOMEM;
return NULL;
}
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
if (arm_spe_pmus[*nr_spes]) {
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
__func__, __LINE__, *nr_spes,
arm_spe_pmus[*nr_spes]->type,
arm_spe_pmus[*nr_spes]->name);
(*nr_spes)++;
}
}
return arm_spe_pmus;
}
struct auxtrace_record
*auxtrace_record__init(struct perf_evlist *evlist, int *err)
......@@ -29,22 +65,51 @@ struct auxtrace_record
struct perf_pmu *cs_etm_pmu;
struct perf_evsel *evsel;
bool found_etm = false;
bool found_spe = false;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
int i;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
if (evlist) {
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
evsel->attr.type == cs_etm_pmu->type)
found_etm = true;
if (!arm_spe_pmus)
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
evsel->attr.type == cs_etm_pmu->type)
found_etm = true;
if (!nr_spes)
continue;
for (i = 0; i < nr_spes; i++) {
if (evsel->attr.type == arm_spe_pmus[i]->type) {
found_spe = true;
break;
}
}
}
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
*err = -EOPNOTSUPP;
return NULL;
}
if (found_etm)
return cs_etm_record_init(err);
#if defined(__aarch64__)
if (found_spe)
return arm_spe_recording_init(err, arm_spe_pmus[i]);
#endif
/*
* Clear 'err' even if we haven't found a cs_etm event - that way perf
* Clear 'err' even if we haven't found an event - that way perf
* record can still be used even if tracers aren't present. The NULL
* return value will take care of telling the infrastructure HW tracing
* isn't available.
......
......@@ -20,6 +20,7 @@
#include <linux/perf_event.h>
#include "cs-etm.h"
#include "arm-spe.h"
#include "../../util/pmu.h"
struct perf_event_attr
......@@ -30,7 +31,12 @@ struct perf_event_attr
/* add ETM default config here */
pmu->selectable = true;
pmu->set_drv_config = cs_etm_set_drv_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
return arm_spe_pmu_default_config(pmu);
#endif
}
#endif
return NULL;
}
......@@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o
../../arm/util/cs-etm.o \
arm-spe.o
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <time.h>
#include "../../util/cpumap.h"
#include "../../util/evsel.h"
#include "../../util/evlist.h"
#include "../../util/session.h"
#include "../../util/util.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
#include "../../util/auxtrace.h"
#include "../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct perf_evlist *evlist;
};
static size_t
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return ARM_SPE_AUXTRACE_PRIV_SIZE;
}
static int arm_spe_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
size_t priv_size)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
return -EINVAL;
if (!session->evlist->nr_mmaps)
return -EINVAL;
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
return 0;
}
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct perf_evsel *evsel, *arm_spe_evsel = NULL;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
struct perf_evsel *tracking_evsel;
int err;
sper->evlist = evlist;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == arm_spe_pmu->type) {
if (arm_spe_evsel) {
pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
}
if (!opts->full_auxtrace)
return 0;
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages */
if (opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz = KiB(8);
if (sz < min_sz || !is_power_of_2(sz)) {
pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
min_sz / 1024);
return -EINVAL;
}
}
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
perf_evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
perf_evsel__set_sample_bit(tracking_evsel, TIME);
perf_evsel__set_sample_bit(tracking_evsel, CPU);
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec ^ ts.tv_nsec;
}
static void arm_spe_recording_free(struct auxtrace_record *itr)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
free(sper);
}
static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(sper->evlist, evsel) {
if (evsel->attr.type == sper->arm_spe_pmu->type)
return perf_evlist__enable_event_idx(sper->evlist,
evsel, idx);
}
return -EINVAL;
}
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
struct arm_spe_recording *sper;
if (!arm_spe_pmu) {
*err = -ENODEV;
return NULL;
}
sper = zalloc(sizeof(struct arm_spe_recording));
if (!sper) {
*err = -ENOMEM;
return NULL;
}
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
sper->itr.read_finish = arm_spe_read_finish;
sper->itr.alignment = 0;
return &sper->itr;
}
struct perf_event_attr
*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
{
struct perf_event_attr *attr;
attr = zalloc(sizeof(struct perf_event_attr));
if (!attr) {
pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
return NULL;
}
/*
* If kernel driver doesn't advertise a minimum,
* use max allowable by PMSIDR_EL1.INTERVAL
*/
if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
&attr->sample_period) != 1) {
pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
attr->sample_period = 4096;
}
arm_spe_pmu->selectable = true;
arm_spe_pmu->is_uncore = false;
return attr;
}
......@@ -70,7 +70,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
free(buf);
return NULL;
}
......
......@@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER))
(sample_type & PERF_SAMPLE_STACK_USER)) {
mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
mode = CALLCHAIN_LBR;
else if (sample_type & PERF_SAMPLE_CALLCHAIN)
mode = CALLCHAIN_FP;
......
......@@ -54,8 +54,6 @@
#include <unistd.h>
#include <linux/mman.h>
#define PTIME_RANGE_MAX 10
struct report {
struct perf_tool tool;
struct perf_session *session;
......@@ -76,7 +74,8 @@ struct report {
const char *cpu_list;
const char *symbol_filter_str;
const char *time_str;
struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
struct perf_time_interval *ptime_range;
int range_size;
int range_num;
float min_percent;
u64 nr_entries;
......@@ -338,9 +337,10 @@ static int report__setup_sample_type(struct report *rep)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER))
(sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
......@@ -403,6 +403,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname);
if (rep->time_str)
ret += fprintf(fp, " (time slices: %s)", rep->time_str);
if (symbol_conf.show_ref_callgraph &&
strstr(evname, "call-graph=no")) {
ret += fprintf(fp, ", show reference callgraph");
......@@ -1296,22 +1299,33 @@ int cmd_report(int argc, const char **argv)
if (symbol__init(&session->header.env) < 0)
goto error;
report.ptime_range = perf_time__range_alloc(report.time_str,
&report.range_size);
if (!report.ptime_range) {
ret = -ENOMEM;
goto error;
}
if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) {
if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) {
pr_err("No first/last sample time in perf data\n");
return -EINVAL;
pr_err("HINT: no first/last sample time found in perf data.\n"
"Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
ret = -EINVAL;
goto error;
}
report.range_num = perf_time__percent_parse_str(
report.ptime_range, PTIME_RANGE_MAX,
report.ptime_range, report.range_size,
report.time_str,
session->evlist->first_sample_time,
session->evlist->last_sample_time);
if (report.range_num < 0) {
pr_err("Invalid time string\n");
return -EINVAL;
ret = -EINVAL;
goto error;
}
} else {
report.range_num = 1;
......@@ -1327,6 +1341,8 @@ int cmd_report(int argc, const char **argv)
ret = 0;
error:
zfree(&report.ptime_range);
perf_session__delete(session);
return ret;
}
......@@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return 0;
}
#define PTIME_RANGE_MAX 10
struct perf_script {
struct perf_tool tool;
struct perf_session *session;
......@@ -1496,7 +1494,8 @@ struct perf_script {
struct thread_map *threads;
int name_width;
const char *time_str;
struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
struct perf_time_interval *ptime_range;
int range_size;
int range_num;
};
......@@ -2919,9 +2918,10 @@ static void script__setup_sample_type(struct perf_script *script)
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER))
(sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
......@@ -3444,17 +3444,26 @@ int cmd_script(int argc, const char **argv)
if (err < 0)
goto out_delete;
script.ptime_range = perf_time__range_alloc(script.time_str,
&script.range_size);
if (!script.ptime_range) {
err = -ENOMEM;
goto out_delete;
}
/* needs to be parsed after looking up reference time */
if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) {
pr_err("No first/last sample time in perf data\n");
pr_err("HINT: no first/last sample time found in perf data.\n"
"Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
err = -EINVAL;
goto out_delete;
}
script.range_num = perf_time__percent_parse_str(
script.ptime_range, PTIME_RANGE_MAX,
script.ptime_range, script.range_size,
script.time_str,
session->evlist->first_sample_time,
session->evlist->last_sample_time);
......@@ -3473,6 +3482,8 @@ int cmd_script(int argc, const char **argv)
flush_scripting();
out_delete:
zfree(&script.ptime_range);
perf_evlist__free_stats(session->evlist);
perf_session__delete(session);
......
......@@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
struct addr_location al;
if (machine__resolve(trace->host, &al, sample) < 0 ||
thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
return -1;
return 0;
......@@ -2222,6 +2222,9 @@ static int trace__add_syscall_newtp(struct trace *trace)
if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
goto out_delete_sys_exit;
perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
perf_evlist__add(evlist, sys_enter);
perf_evlist__add(evlist, sys_exit);
......@@ -2318,6 +2321,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
if (pgfault_maj == NULL)
goto out_error_mem;
perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
perf_evlist__add(evlist, pgfault_maj);
}
......@@ -2325,6 +2329,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
if (pgfault_min == NULL)
goto out_error_mem;
perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
perf_evlist__add(evlist, pgfault_min);
}
......@@ -2345,45 +2350,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist;
}
perf_evlist__config(evlist, &trace->opts, NULL);
if (callchain_param.enabled) {
bool use_identifier = false;
if (trace->syscalls.events.sys_exit) {
perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
&trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_maj) {
perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_min) {
perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
use_identifier = true;
}
if (use_identifier) {
/*
* Now we have evsels with different sample_ids, use
* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
* from a fixed position in each ring buffer record.
*
* As of this the changeset introducing this comment, this
* isn't strictly needed, as the fields that can come before
* PERF_SAMPLE_ID are all used, but we'll probably disable
* some of those for things like copying the payload of
* pointer syscall arguments, and for vfs_getname we don't
* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
*/
perf_evlist__set_sample_bit(evlist, IDENTIFIER);
perf_evlist__reset_sample_bit(evlist, ID);
}
}
perf_evlist__config(evlist, &trace->opts, &callchain_param);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
......@@ -2456,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit;
/*
* Now that we already used evsel->attr to ask the kernel to setup the
* events, lets reuse evsel->attr.sample_max_stack as the limit in
* trace__resolve_callchain(), allowing per-event max-stack settings
* to override an explicitely set --max-stack global setting.
*/
evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
evsel->attr.sample_max_stack == 0)
evsel->attr.sample_max_stack = trace->max_stack;
}
again:
before = trace->nr_events;
......@@ -3098,8 +3077,9 @@ int cmd_trace(int argc, const char **argv)
}
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
}
#endif
if (callchain_param.enabled) {
......
#!/bin/bash
#
# Profiling physical memory by all retired load instructions/uops event
# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS
#
load=`perf list | grep mem_inst_retired.all_loads`
if [ -z "$load" ]; then
load=`perf list | grep mem_uops_retired.all_loads`
fi
if [ -z "$load" ]; then
echo "There is no event to count all retired load instructions/uops."
exit 1
fi
arg=$(echo $load | tr -d ' ')
arg="$arg:P"
perf record --phys-data -e $arg $@
#!/bin/bash
# description: resolve physical address samples
perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py
# mem-phys-addr.py: Resolve physical address samples
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2018, Intel Corporation.
from __future__ import division
import os
import sys
import struct
import re
import bisect
import collections
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
#physical address ranges for System RAM
system_ram = []
#physical address ranges for Persistent Memory
pmem = []
#file object for proc iomem
f = None
#Count for each type of memory
load_mem_type_cnt = collections.Counter()
#perf event name
event_name = None
def parse_iomem():
global f
f = open('/proc/iomem', 'r')
for i, j in enumerate(f):
m = re.split('-|:',j,2)
if m[2].strip() == 'System RAM':
system_ram.append(long(m[0], 16))
system_ram.append(long(m[1], 16))
if m[2].strip() == 'Persistent Memory':
pmem.append(long(m[0], 16))
pmem.append(long(m[1], 16))
def print_memory_type():
print "Event: %s" % (event_name)
print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"),
print "%-40s %10s %10s\n" % ("----------------------------------------", \
"-----------", "-----------"),
total = sum(load_mem_type_cnt.values())
for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
key = lambda(k, v): (v, k), reverse = True):
print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
def trace_begin():
parse_iomem()
def trace_end():
print_memory_type()
f.close()
def is_system_ram(phys_addr):
#/proc/iomem is sorted
position = bisect.bisect(system_ram, phys_addr)
if position % 2 == 0:
return False
return True
def is_persistent_mem(phys_addr):
position = bisect.bisect(pmem, phys_addr)
if position % 2 == 0:
return False
return True
def find_memory_type(phys_addr):
if phys_addr == 0:
return "N/A"
if is_system_ram(phys_addr):
return "System RAM"
if is_persistent_mem(phys_addr):
return "Persistent Memory"
#slow path, search all
f.seek(0, 0)
for j in f:
m = re.split('-|:',j,2)
if long(m[0], 16) <= phys_addr <= long(m[1], 16):
return m[2]
return "N/A"
def process_event(param_dict):
name = param_dict["ev_name"]
sample = param_dict["sample"]
phys_addr = sample["phys_addr"]
global event_name
if event_name == None:
event_name = name
load_mem_type_cnt[find_memory_type(phys_addr)] += 1
......@@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
}
callchain_param.record_mode = CALLCHAIN_DWARF;
dwarf_callchain_users = true;
if (init_live_machine(machine)) {
pr_err("Could not init machine\n");
......
......@@ -86,6 +86,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-$(CONFIG_AUXTRACE) += arm-spe.o
libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
libperf-y += parse-branch-options.o
libperf-y += dump-insn.o
libperf-y += parse-regs-options.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <stdio.h>
#include <string.h>
#include <endian.h>
#include <byteswap.h>
#include "arm-spe-pkt-decoder.h"
#define BIT(n) (1ULL << (n))
#define NS_FLAG BIT(63)
#define EL_FLAG (BIT(62) | BIT(61))
#define SPE_HEADER0_PAD 0x0
#define SPE_HEADER0_END 0x1
#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */
#define SPE_HEADER0_ADDRESS_MASK 0x38
#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */
#define SPE_HEADER0_COUNTER_MASK 0x38
#define SPE_HEADER0_TIMESTAMP 0x71
#define SPE_HEADER0_TIMESTAMP 0x71
#define SPE_HEADER0_EVENTS 0x2
#define SPE_HEADER0_EVENTS_MASK 0xf
#define SPE_HEADER0_SOURCE 0x3
#define SPE_HEADER0_SOURCE_MASK 0xf
#define SPE_HEADER0_CONTEXT 0x24
#define SPE_HEADER0_CONTEXT_MASK 0x3c
#define SPE_HEADER0_OP_TYPE 0x8
#define SPE_HEADER0_OP_TYPE_MASK 0x3c
#define SPE_HEADER1_ALIGNMENT 0x0
#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */
#define SPE_HEADER1_ADDRESS_MASK 0xf8
#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */
#define SPE_HEADER1_COUNTER_MASK 0xf8
#if __BYTE_ORDER == __BIG_ENDIAN
#define le16_to_cpu bswap_16
#define le32_to_cpu bswap_32
#define le64_to_cpu bswap_64
#define memcpy_le64(d, s, n) do { \
memcpy((d), (s), (n)); \
*(d) = le64_to_cpu(*(d)); \
} while (0)
#else
#define le16_to_cpu
#define le32_to_cpu
#define le64_to_cpu
#define memcpy_le64 memcpy
#endif
static const char * const arm_spe_packet_name[] = {
[ARM_SPE_PAD] = "PAD",
[ARM_SPE_END] = "END",
[ARM_SPE_TIMESTAMP] = "TS",
[ARM_SPE_ADDRESS] = "ADDR",
[ARM_SPE_COUNTER] = "LAT",
[ARM_SPE_CONTEXT] = "CONTEXT",
[ARM_SPE_OP_TYPE] = "OP-TYPE",
[ARM_SPE_EVENTS] = "EVENTS",
[ARM_SPE_DATA_SOURCE] = "DATA-SOURCE",
};
const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
{
return arm_spe_packet_name[type];
}
/* return ARM SPE payload size from its encoding,
* which is in bits 5:4 of the byte.
* 00 : byte
* 01 : halfword (2)
* 10 : word (4)
* 11 : doubleword (8)
*/
static int payloadlen(unsigned char byte)
{
return 1 << ((byte & 0x30) >> 4);
}
static int arm_spe_get_payload(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
size_t payload_len = payloadlen(buf[0]);
if (len < 1 + payload_len)
return ARM_SPE_NEED_MORE_BYTES;
buf++;
switch (payload_len) {
case 1: packet->payload = *(uint8_t *)buf; break;
case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
default: return ARM_SPE_BAD_PACKET;
}
return 1 + payload_len;
}
static int arm_spe_get_pad(struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_PAD;
return 1;
}
static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
if (len < alignment)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_PAD;
return alignment - (((uintptr_t)buf) & (alignment - 1));
}
static int arm_spe_get_end(struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_END;
return 1;
}
static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_TIMESTAMP;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_events(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
int ret = arm_spe_get_payload(buf, len, packet);
packet->type = ARM_SPE_EVENTS;
/* we use index to identify Events with a less number of
* comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
* LLC-REFILL, and REMOTE-ACCESS events are identified iff
* index > 1.
*/
packet->index = ret - 1;
return ret;
}
static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_DATA_SOURCE;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_context(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_CONTEXT;
packet->index = buf[0] & 0x3;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
packet->type = ARM_SPE_OP_TYPE;
packet->index = buf[0] & 0x3;
return arm_spe_get_payload(buf, len, packet);
}
static int arm_spe_get_counter(const unsigned char *buf, size_t len,
const unsigned char ext_hdr, struct arm_spe_pkt *packet)
{
if (len < 2)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_COUNTER;
if (ext_hdr)
packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
else
packet->index = buf[0] & 0x7;
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
return 1 + ext_hdr + 2;
}
static int arm_spe_get_addr(const unsigned char *buf, size_t len,
const unsigned char ext_hdr, struct arm_spe_pkt *packet)
{
if (len < 8)
return ARM_SPE_NEED_MORE_BYTES;
packet->type = ARM_SPE_ADDRESS;
if (ext_hdr)
packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
else
packet->index = buf[0] & 0x7;
memcpy_le64(&packet->payload, buf + 1, 8);
return 1 + ext_hdr + 8;
}
static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
unsigned int byte;
memset(packet, 0, sizeof(struct arm_spe_pkt));
if (!len)
return ARM_SPE_NEED_MORE_BYTES;
byte = buf[0];
if (byte == SPE_HEADER0_PAD)
return arm_spe_get_pad(packet);
else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
return arm_spe_get_end(packet);
else if (byte & 0xc0 /* 0y11xxxxxx */) {
if (byte & 0x80) {
if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
return arm_spe_get_addr(buf, len, 0, packet);
if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
return arm_spe_get_counter(buf, len, 0, packet);
} else
if (byte == SPE_HEADER0_TIMESTAMP)
return arm_spe_get_timestamp(buf, len, packet);
else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
return arm_spe_get_events(buf, len, packet);
else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
return arm_spe_get_data_source(buf, len, packet);
else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
return arm_spe_get_context(buf, len, packet);
else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
return arm_spe_get_op_type(buf, len, packet);
} else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
/* 16-bit header */
byte = buf[1];
if (byte == SPE_HEADER1_ALIGNMENT)
return arm_spe_get_alignment(buf, len, packet);
else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
return arm_spe_get_addr(buf, len, 1, packet);
else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
return arm_spe_get_counter(buf, len, 1, packet);
}
return ARM_SPE_BAD_PACKET;
}
int arm_spe_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet)
{
int ret;
ret = arm_spe_do_get_packet(buf, len, packet);
/* put multiple consecutive PADs on the same line, up to
* the fixed-width output format of 16 bytes per line.
*/
if (ret > 0 && packet->type == ARM_SPE_PAD) {
while (ret < 16 && len > (size_t)ret && !buf[ret])
ret += 1;
}
return ret;
}
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
size_t buf_len)
{
int ret, ns, el, idx = packet->index;
unsigned long long payload = packet->payload;
const char *name = arm_spe_pkt_name(packet->type);
switch (packet->type) {
case ARM_SPE_BAD:
case ARM_SPE_PAD:
case ARM_SPE_END:
return snprintf(buf, buf_len, "%s", name);
case ARM_SPE_EVENTS: {
size_t blen = buf_len;
ret = 0;
ret = snprintf(buf, buf_len, "EV");
buf += ret;
blen -= ret;
if (payload & 0x1) {
ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
buf += ret;
blen -= ret;
}
if (payload & 0x2) {
ret = snprintf(buf, buf_len, " RETIRED");
buf += ret;
blen -= ret;
}
if (payload & 0x4) {
ret = snprintf(buf, buf_len, " L1D-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x8) {
ret = snprintf(buf, buf_len, " L1D-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x10) {
ret = snprintf(buf, buf_len, " TLB-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x20) {
ret = snprintf(buf, buf_len, " TLB-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x40) {
ret = snprintf(buf, buf_len, " NOT-TAKEN");
buf += ret;
blen -= ret;
}
if (payload & 0x80) {
ret = snprintf(buf, buf_len, " MISPRED");
buf += ret;
blen -= ret;
}
if (idx > 1) {
if (payload & 0x100) {
ret = snprintf(buf, buf_len, " LLC-ACCESS");
buf += ret;
blen -= ret;
}
if (payload & 0x200) {
ret = snprintf(buf, buf_len, " LLC-REFILL");
buf += ret;
blen -= ret;
}
if (payload & 0x400) {
ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
buf += ret;
blen -= ret;
}
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
case ARM_SPE_OP_TYPE:
switch (idx) {
case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ?
"COND-SELECT" : "INSN-OTHER");
case 1: {
size_t blen = buf_len;
if (payload & 0x1)
ret = snprintf(buf, buf_len, "ST");
else
ret = snprintf(buf, buf_len, "LD");
buf += ret;
blen -= ret;
if (payload & 0x2) {
if (payload & 0x4) {
ret = snprintf(buf, buf_len, " AT");
buf += ret;
blen -= ret;
}
if (payload & 0x8) {
ret = snprintf(buf, buf_len, " EXCL");
buf += ret;
blen -= ret;
}
if (payload & 0x10) {
ret = snprintf(buf, buf_len, " AR");
buf += ret;
blen -= ret;
}
} else if (payload & 0x4) {
ret = snprintf(buf, buf_len, " SIMD-FP");
buf += ret;
blen -= ret;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
case 2: {
size_t blen = buf_len;
ret = snprintf(buf, buf_len, "B");
buf += ret;
blen -= ret;
if (payload & 0x1) {
ret = snprintf(buf, buf_len, " COND");
buf += ret;
blen -= ret;
}
if (payload & 0x2) {
ret = snprintf(buf, buf_len, " IND");
buf += ret;
blen -= ret;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
default: return 0;
}
case ARM_SPE_DATA_SOURCE:
case ARM_SPE_TIMESTAMP:
return snprintf(buf, buf_len, "%s %lld", name, payload);
case ARM_SPE_ADDRESS:
switch (idx) {
case 0:
case 1: ns = !!(packet->payload & NS_FLAG);
el = (packet->payload & EL_FLAG) >> 61;
payload &= ~(0xffULL << 56);
return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
(idx == 1) ? "TGT" : "PC", payload, el, ns);
case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload);
case 3: ns = !!(packet->payload & NS_FLAG);
payload &= ~(0xffULL << 56);
return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
payload, ns);
default: return 0;
}
case ARM_SPE_CONTEXT:
return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
(unsigned long)payload, idx + 1);
case ARM_SPE_COUNTER: {
size_t blen = buf_len;
ret = snprintf(buf, buf_len, "%s %d ", name,
(unsigned short)payload);
buf += ret;
blen -= ret;
switch (idx) {
case 0: ret = snprintf(buf, buf_len, "TOT"); break;
case 1: ret = snprintf(buf, buf_len, "ISSUE"); break;
case 2: ret = snprintf(buf, buf_len, "XLAT"); break;
default: ret = 0;
}
if (ret < 0)
return ret;
blen -= ret;
return buf_len - blen;
}
default:
break;
}
return snprintf(buf, buf_len, "%s 0x%llx (%d)",
name, payload, packet->index);
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
#include <stddef.h>
#include <stdint.h>
#define ARM_SPE_PKT_DESC_MAX 256
#define ARM_SPE_NEED_MORE_BYTES -1
#define ARM_SPE_BAD_PACKET -2
enum arm_spe_pkt_type {
ARM_SPE_BAD,
ARM_SPE_PAD,
ARM_SPE_END,
ARM_SPE_TIMESTAMP,
ARM_SPE_ADDRESS,
ARM_SPE_COUNTER,
ARM_SPE_CONTEXT,
ARM_SPE_OP_TYPE,
ARM_SPE_EVENTS,
ARM_SPE_DATA_SOURCE,
};
struct arm_spe_pkt {
enum arm_spe_pkt_type type;
unsigned char index;
uint64_t payload;
};
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
int arm_spe_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet);
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
#endif
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <endian.h>
#include <errno.h>
#include <byteswap.h>
#include <inttypes.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include "cpumap.h"
#include "color.h"
#include "evsel.h"
#include "evlist.h"
#include "machine.h"
#include "session.h"
#include "util.h"
#include "thread.h"
#include "debug.h"
#include "auxtrace.h"
#include "arm-spe.h"
#include "arm-spe-pkt-decoder.h"
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
};
struct arm_spe_queue {
struct arm_spe *spe;
unsigned int queue_nr;
struct auxtrace_buffer *buffer;
bool on_heap;
bool done;
pid_t pid;
pid_t tid;
int cpu;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
unsigned char *buf, size_t len)
{
struct arm_spe_pkt packet;
size_t pos = 0;
int ret, pkt_len, i;
char desc[ARM_SPE_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
color_fprintf(stdout, color,
". ... ARM SPE data: size %zu bytes\n",
len);
while (len) {
ret = arm_spe_get_packet(buf, len, &packet);
if (ret > 0)
pkt_len = ret;
else
pkt_len = 1;
printf(".");
color_fprintf(stdout, color, " %08x: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
color_fprintf(stdout, color, " ");
if (ret > 0) {
ret = arm_spe_pkt_desc(&packet, desc,
ARM_SPE_PKT_DESC_MAX);
if (ret > 0)
color_fprintf(stdout, color, " %s\n", desc);
} else {
color_fprintf(stdout, color, " Bad packet!\n");
}
pos += pkt_len;
buf += pkt_len;
len -= pkt_len;
}
}
static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
size_t len)
{
printf(".\n");
arm_spe_dump(spe, buf, len);
}
static int arm_spe_process_event(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
return 0;
}
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
struct auxtrace_buffer *buffer;
off_t data_offset;
int fd = perf_data__fd(session->data);
int err;
if (perf_data__is_pipe(session->data)) {
data_offset = 0;
} else {
data_offset = lseek(fd, 0, SEEK_CUR);
if (data_offset == -1)
return -errno;
}
err = auxtrace_queues__add_event(&spe->queues, session, event,
data_offset, &buffer);
if (err)
return err;
/* Dump here now we have copied a piped trace out of the pipe */
if (dump_trace) {
if (auxtrace_buffer__get_data(buffer, fd)) {
arm_spe_dump_event(spe, buffer->data,
buffer->size);
auxtrace_buffer__put_data(buffer);
}
}
return 0;
}
static int arm_spe_flush(struct perf_session *session __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
return 0;
}
static void arm_spe_free_queue(void *priv)
{
struct arm_spe_queue *speq = priv;
if (!speq)
return;
free(speq);
}
static void arm_spe_free_events(struct perf_session *session)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
struct auxtrace_queues *queues = &spe->queues;
unsigned int i;
for (i = 0; i < queues->nr_queues; i++) {
arm_spe_free_queue(queues->queue_array[i].priv);
queues->queue_array[i].priv = NULL;
}
auxtrace_queues__free(queues);
}
static void arm_spe_free(struct perf_session *session)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
auxtrace_heap__free(&spe->heap);
arm_spe_free_events(session);
session->auxtrace = NULL;
free(spe);
}
static const char * const arm_spe_info_fmts[] = {
[ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
};
static void arm_spe_print_info(u64 *arr)
{
if (!dump_trace)
return;
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
}
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
struct arm_spe *spe;
int err;
if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
min_sz)
return -EINVAL;
spe = zalloc(sizeof(struct arm_spe));
if (!spe)
return -ENOMEM;
err = auxtrace_queues__init(&spe->queues);
if (err)
goto err_free;
spe->session = session;
spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;
spe->auxtrace.free_events = arm_spe_free_events;
spe->auxtrace.free = arm_spe_free;
session->auxtrace = &spe->auxtrace;
arm_spe_print_info(&auxtrace_info->priv[0]);
return 0;
err_free:
free(spe);
return err;
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#ifndef INCLUDE__PERF_ARM_SPE_H__
#define INCLUDE__PERF_ARM_SPE_H__
#define ARM_SPE_PMU_NAME "arm_spe_"
enum {
ARM_SPE_PMU_TYPE,
ARM_SPE_PER_CPU_MMAPS,
ARM_SPE_AUXTRACE_PRIV_MAX,
};
#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
union perf_event;
struct perf_session;
struct perf_pmu;
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu);
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session);
struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
#endif
......@@ -54,6 +54,7 @@
#include "intel-pt.h"
#include "intel-bts.h"
#include "arm-spe.h"
#include "sane_ctype.h"
#include "symbol/kallsyms.h"
......@@ -910,6 +911,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
return intel_pt_process_auxtrace_info(event, session);
case PERF_AUXTRACE_INTEL_BTS:
return intel_bts_process_auxtrace_info(event, session);
case PERF_AUXTRACE_ARM_SPE:
return arm_spe_process_auxtrace_info(event, session);
case PERF_AUXTRACE_CS_ETM:
case PERF_AUXTRACE_UNKNOWN:
default:
......
......@@ -43,6 +43,7 @@ enum auxtrace_type {
PERF_AUXTRACE_INTEL_PT,
PERF_AUXTRACE_INTEL_BTS,
PERF_AUXTRACE_CS_ETM,
PERF_AUXTRACE_ARM_SPE,
};
enum itrace_period_type {
......
......@@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
CALLCHAIN_PARAM_DEFAULT
};
/*
* Are there any events usind DWARF callchains?
*
* I.e.
*
* -e cycles/call-graph=dwarf/
*/
bool dwarf_callchain_users;
struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT
};
......@@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
ret = 0;
param->record_mode = CALLCHAIN_DWARF;
param->dump_size = default_stack_dump_size;
dwarf_callchain_users = true;
tok = strtok_r(NULL, ",", &saveptr);
if (tok) {
......
......@@ -89,6 +89,8 @@ enum chain_value {
CCVAL_COUNT,
};
extern bool dwarf_callchain_users;
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
......
......@@ -1760,7 +1760,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
switch (old_state) {
case BKW_MMAP_NOTREADY: {
if (state != BKW_MMAP_RUNNING)
goto state_err;;
goto state_err;
break;
}
case BKW_MMAP_RUNNING: {
......
......@@ -651,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
return ret;
}
void perf_evsel__config_callchain(struct perf_evsel *evsel,
struct record_opts *opts,
struct callchain_param *param)
static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
struct record_opts *opts,
struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
......@@ -699,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
}
}
void perf_evsel__config_callchain(struct perf_evsel *evsel,
struct record_opts *opts,
struct callchain_param *param)
{
if (param->enabled)
return __perf_evsel__config_callchain(evsel, opts, param);
}
static void
perf_evsel__reset_callgraph(struct perf_evsel *evsel,
struct callchain_param *param)
......@@ -718,19 +726,19 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel,
}
static void apply_config_terms(struct perf_evsel *evsel,
struct record_opts *opts)
struct record_opts *opts, bool track)
{
struct perf_evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->attr;
struct callchain_param param;
/* callgraph default */
struct callchain_param param = {
.record_mode = callchain_param.record_mode,
};
u32 dump_size = 0;
int max_stack = 0;
const char *callgraph_buf = NULL;
/* callgraph default */
param.record_mode = callchain_param.record_mode;
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
case PERF_EVSEL__CONFIG_TERM_PERIOD:
......@@ -781,7 +789,7 @@ static void apply_config_terms(struct perf_evsel *evsel,
attr->write_backward = term->val.overwrite ? 1 : 0;
break;
case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
BUG_ON(1);
break;
default:
break;
}
......@@ -789,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
/* User explicitly set per-event callgraph, clear the old setting and reset. */
if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
bool sample_address = false;
if (max_stack) {
param.max_stack = max_stack;
if (callgraph_buf == NULL)
......@@ -808,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
evsel->name);
return;
}
if (param.record_mode == CALLCHAIN_DWARF)
sample_address = true;
}
}
if (dump_size > 0) {
......@@ -820,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
perf_evsel__reset_callgraph(evsel, &callchain_param);
/* set perf-event callgraph */
if (param.enabled)
if (param.enabled) {
if (sample_address) {
perf_evsel__set_sample_bit(evsel, ADDR);
perf_evsel__set_sample_bit(evsel, DATA_SRC);
evsel->attr.mmap_data = track;
}
perf_evsel__config_callchain(evsel, opts, &param);
}
}
}
......@@ -1052,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
* Apply event specific term settings,
* it overloads any global configuration.
*/
apply_config_terms(evsel, opts);
apply_config_terms(evsel, opts, track);
evsel->ignore_missing_thread = opts->ignore_missing_thread;
}
......
......@@ -499,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
PyLong_FromUnsignedLongLong(sample->time));
pydict_set_item_string_decref(dict_sample, "period",
PyLong_FromUnsignedLongLong(sample->period));
pydict_set_item_string_decref(dict_sample, "phys_addr",
PyLong_FromUnsignedLongLong(sample->phys_addr));
set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict, "sample", dict_sample);
......
......@@ -116,7 +116,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
static int parse_percent(double *pcnt, char *str)
{
char *c;
char *c, *endptr;
double d;
c = strchr(str, '%');
if (c)
......@@ -124,8 +125,11 @@ static int parse_percent(double *pcnt, char *str)
else
return -1;
*pcnt = atof(str) / 100.0;
d = strtod(str, &endptr);
if (endptr != str + strlen(str))
return -1;
*pcnt = d / 100.0;
return 0;
}
......@@ -257,6 +261,37 @@ static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
return i;
}
static int one_percent_convert(struct perf_time_interval *ptime_buf,
const char *ostr, u64 start, u64 end, char *c)
{
char *str;
int len = strlen(ostr), ret;
/*
* c points to '%'.
* '%' should be the last character
*/
if (ostr + len - 1 != c)
return -1;
/*
* Construct a string like "xx%/1"
*/
str = malloc(len + 3);
if (str == NULL)
return -ENOMEM;
memcpy(str, ostr, len);
strcpy(str + len, "/1");
ret = percent_slash_split(str, ptime_buf, start, end);
if (ret == 0)
ret = 1;
free(str);
return ret;
}
int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
const char *ostr, u64 start, u64 end)
{
......@@ -266,6 +301,7 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
* ostr example:
* 10%/2,10%/3: select the second 10% slice and the third 10% slice
* 0%-10%,30%-40%: multiple time range
* 50%: just one percent
*/
memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
......@@ -282,9 +318,41 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
end, percent_dash_split);
}
c = strchr(ostr, '%');
if (c)
return one_percent_convert(ptime_buf, ostr, start, end, c);
return -1;
}
struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
{
const char *p1, *p2;
int i = 1;
struct perf_time_interval *ptime;
/*
* At least allocate one time range.
*/
if (!ostr)
goto alloc;
p1 = ostr;
while (p1 < ostr + strlen(ostr)) {
p2 = strchr(p1, ',');
if (!p2)
break;
p1 = p2 + 1;
i++;
}
alloc:
*size = i;
ptime = calloc(i, sizeof(*ptime));
return ptime;
}
bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
{
/* if time is not set don't drop sample */
......
......@@ -16,6 +16,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
const char *ostr, u64 start, u64 end);
struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
......
......@@ -631,9 +631,8 @@ static unw_accessors_t accessors = {
static int _unwind__prepare_access(struct thread *thread)
{
if (callchain_param.record_mode != CALLCHAIN_DWARF)
if (!dwarf_callchain_users)
return 0;
thread->addr_space = unw_create_addr_space(&accessors, 0);
if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
......@@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)
static void _unwind__flush_access(struct thread *thread)
{
if (callchain_param.record_mode != CALLCHAIN_DWARF)
if (!dwarf_callchain_users)
return;
unw_flush_cache(thread->addr_space, 0, 0);
}
static void _unwind__finish_access(struct thread *thread)
{
if (callchain_param.record_mode != CALLCHAIN_DWARF)
if (!dwarf_callchain_users)
return;
unw_destroy_addr_space(thread->addr_space);
}
......
......@@ -210,7 +210,7 @@ static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64
size -= ret;
off_in += ret;
off_out -= ret;
off_out += ret;
}
munmap(ptr, off_in + size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment