Commit 91a79e5f authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-20161028' of...

Merge tag 'perf-core-for-mingo-20161028' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Support matching by topic in 'perf list' (Andi Kleen)

User visible:

- Apply cpu color only when there was activity in 'perf sched map' (Namhyung Kim)

- Always show the task's COMM in 'perf sched map -v' (Namhyung Kim)

- Fix hierarchy column counts in the perf hist browser (top, report), avoiding
  showing nothing after pressing the RIGHT key a number of times (Namhyung Kim)

Infrastructure:

- Support cascading options in libsubcmd and use it to share common options in
  'perf sched' subcommands (Namhyung Kim)

- Avoid worker cacheline bouncing in 'perf bench futex' (Davidlohr Bueso)

- Sanitize numeric parameters in 'perf bench futex' (Davidlohr Bueso)

- Update copies of kernel files (Arnaldo Carvalho de Melo)

- Fix scripting (perl, python) setup to avoid leaks (Arnaldo Carvalho de Melo)

- Add missing object file to the python binding linkage list (Arnaldo Carvalho de Melo)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 76e2d261 46cb25b1
......@@ -72,4 +72,9 @@
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_MASK 0x3f
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
PKEY_DISABLE_WRITE)
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
......@@ -314,12 +314,19 @@ static int get_value(struct parse_opt_ctx_t *p,
static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
{
retry:
for (; options->type != OPTION_END; options++) {
if (options->short_name == *p->opt) {
p->opt = p->opt[1] ? p->opt + 1 : NULL;
return get_value(p, options, OPT_SHORT);
}
}
if (options->parent) {
options = options->parent;
goto retry;
}
return -2;
}
......@@ -333,6 +340,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
if (!arg_end)
arg_end = arg + strlen(arg);
retry:
for (; options->type != OPTION_END; options++) {
const char *rest;
int flags = 0;
......@@ -426,6 +434,12 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
}
if (abbrev_option)
return get_value(p, abbrev_option, abbrev_flags);
if (options->parent) {
options = options->parent;
goto retry;
}
return -2;
}
......
......@@ -109,11 +109,13 @@ struct option {
intptr_t defval;
bool *set;
void *data;
const struct option *parent;
};
#define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v )
#define OPT_END() { .type = OPTION_END }
#define OPT_PARENT(p) { .type = OPTION_END, .parent = (p) }
#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) }
#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) }
#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) }
......
......@@ -381,10 +381,10 @@ $(PERF_IN): prepare FORCE
(diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/lib/memcpy_64.S && ( \
(diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
(diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
|| echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/lib/memset_64.S && ( \
(diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
(diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
|| echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
@(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \
(diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \
......
......@@ -335,6 +335,9 @@
326 common copy_file_range sys_copy_file_range
327 64 preadv2 sys_preadv2
328 64 pwritev2 sys_pwritev2
329 common pkey_mprotect sys_pkey_mprotect
330 common pkey_alloc sys_pkey_alloc
331 common pkey_free sys_pkey_free
#
# x32-specific system call numbers start at 512 to avoid cache impact
......
......@@ -39,15 +39,12 @@ static unsigned int threads_starting;
static struct stats throughput_stats;
static pthread_cond_t thread_parent, thread_worker;
#define SMP_CACHE_BYTES 256
#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES)))
struct worker {
int tid;
u_int32_t *futex;
pthread_t thread;
unsigned long ops;
} __cacheline_aligned;
};
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
......@@ -66,8 +63,9 @@ static const char * const bench_futex_hash_usage[] = {
static void *workerfn(void *arg)
{
int ret;
unsigned int i;
struct worker *w = (struct worker *) arg;
unsigned int i;
unsigned long ops = w->ops; /* avoid cacheline bouncing */
pthread_mutex_lock(&thread_lock);
threads_starting--;
......@@ -77,7 +75,7 @@ static void *workerfn(void *arg)
pthread_mutex_unlock(&thread_lock);
do {
for (i = 0; i < nfutexes; i++, w->ops++) {
for (i = 0; i < nfutexes; i++, ops++) {
/*
* We want the futex calls to fail in order to stress
* the hashing of uaddr and not measure other steps,
......@@ -91,6 +89,7 @@ static void *workerfn(void *arg)
}
} while (!done);
w->ops = ops;
return NULL;
}
......@@ -131,6 +130,8 @@ int bench_futex_hash(int argc, const char **argv,
}
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
nsecs = futexbench_sanitize_numeric(nsecs);
nfutexes = futexbench_sanitize_numeric(nfutexes);
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
......@@ -138,6 +139,8 @@ int bench_futex_hash(int argc, const char **argv,
if (!nthreads) /* default to the number of CPUs */
nthreads = ncpus;
else
nthreads = futexbench_sanitize_numeric(nthreads);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
......
......@@ -75,6 +75,7 @@ static void toggle_done(int sig __maybe_unused,
static void *workerfn(void *arg)
{
struct worker *w = (struct worker *) arg;
unsigned long ops = w->ops;
pthread_mutex_lock(&thread_lock);
threads_starting--;
......@@ -103,9 +104,10 @@ static void *workerfn(void *arg)
if (ret && !silent)
warn("thread %d: Could not unlock pi-lock for %p (%d)",
w->tid, w->futex, ret);
w->ops++; /* account for thread's share of work */
ops++; /* account for thread's share of work */
} while (!done);
w->ops = ops;
return NULL;
}
......@@ -150,6 +152,7 @@ int bench_futex_lock_pi(int argc, const char **argv,
goto err;
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
nsecs = futexbench_sanitize_numeric(nsecs);
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
......@@ -157,6 +160,8 @@ int bench_futex_lock_pi(int argc, const char **argv,
if (!nthreads)
nthreads = ncpus;
else
nthreads = futexbench_sanitize_numeric(nthreads);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
......
......@@ -128,6 +128,8 @@ int bench_futex_requeue(int argc, const char **argv,
if (!nthreads)
nthreads = ncpus;
else
nthreads = futexbench_sanitize_numeric(nthreads);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
......
......@@ -217,8 +217,12 @@ int bench_futex_wake_parallel(int argc, const char **argv,
sigaction(SIGINT, &act, NULL);
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
nwaking_threads = futexbench_sanitize_numeric(nwaking_threads);
if (!nblocked_threads)
nblocked_threads = ncpus;
else
nblocked_threads = futexbench_sanitize_numeric(nblocked_threads);
/* some sanity checks */
if (nwaking_threads > nblocked_threads || !nwaking_threads)
......
......@@ -129,6 +129,7 @@ int bench_futex_wake(int argc, const char **argv,
}
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
nwakes = futexbench_sanitize_numeric(nwakes);
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
......@@ -136,6 +137,8 @@ int bench_futex_wake(int argc, const char **argv,
if (!nthreads)
nthreads = ncpus;
else
nthreads = futexbench_sanitize_numeric(nthreads);
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
......
......@@ -7,6 +7,7 @@
#ifndef _FUTEX_H
#define _FUTEX_H
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
......@@ -99,4 +100,7 @@ static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr,
}
#endif
/* User input sanitation */
#define futexbench_sanitize_numeric(__n) abs((__n))
#endif /* _FUTEX_H */
......@@ -1191,6 +1191,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
int i;
int ret;
u64 avg;
char max_lat_at[32];
if (!work_list->nb_atoms)
return;
......@@ -1212,12 +1213,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
printf(" ");
avg = work_list->total_lat / work_list->nb_atoms;
timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
(double)work_list->total_runtime / NSEC_PER_MSEC,
work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
(double)work_list->max_lat / NSEC_PER_MSEC,
(double)work_list->max_lat_at / NSEC_PER_SEC);
max_lat_at);
}
static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
......@@ -1402,6 +1404,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
int cpus_nr;
bool new_cpu = false;
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
......@@ -1479,7 +1482,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
cpu_color = COLOR_CPUS;
if (cpu != this_cpu)
color_fprintf(stdout, cpu_color, " ");
color_fprintf(stdout, color, " ");
else
color_fprintf(stdout, cpu_color, "*");
......@@ -1492,8 +1495,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
goto out;
color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC);
if (new_shortname) {
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
color_fprintf(stdout, color, " %12s secs ", stimestamp);
if (new_shortname || (verbose && sched_in->tid)) {
const char *pid_color = color;
if (thread__has_color(sched_in))
......@@ -1954,6 +1958,15 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.next_shortname2 = '0',
.skip_merge = 0,
};
const struct option sched_options[] = {
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_END()
};
const struct option latency_options[] = {
OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
"sort by key(s): runtime, switch, avg, max"),
......@@ -1965,7 +1978,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"dump raw trace in ASCII"),
OPT_BOOLEAN('p', "pids", &sched.skip_merge,
"latency stats per pid instead of per comm"),
OPT_END()
OPT_PARENT(sched_options)
};
const struct option replay_options[] = {
OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
......@@ -1975,16 +1988,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
OPT_END()
};
const struct option sched_options[] = {
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_END()
OPT_PARENT(sched_options)
};
const struct option map_options[] = {
OPT_BOOLEAN(0, "compact", &sched.map.comp,
......@@ -1995,7 +1999,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"highlight given CPUs in map"),
OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
"display given CPUs in map"),
OPT_END()
OPT_PARENT(sched_options)
};
const char * const latency_usage[] = {
"perf sched latency [<options>]",
......
......@@ -441,7 +441,6 @@ static void print_sample_start(struct perf_sample *sample,
{
struct perf_event_attr *attr = &evsel->attr;
unsigned long secs;
unsigned long usecs;
unsigned long long nsecs;
if (PRINT_FIELD(COMM)) {
......@@ -471,11 +470,14 @@ static void print_sample_start(struct perf_sample *sample,
nsecs = sample->time;
secs = nsecs / NSEC_PER_SEC;
nsecs -= secs * NSEC_PER_SEC;
usecs = nsecs / NSEC_PER_USEC;
if (nanosecs)
printf("%5lu.%09llu: ", secs, nsecs);
else
printf("%5lu.%06lu: ", secs, usecs);
else {
char sample_time[32];
timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time));
printf("%12s: ", sample_time);
}
}
}
......
......@@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser,
browser->b.use_navkeypressed = true;
browser->show_headers = symbol_conf.show_hist_headers;
hists__for_each_format(hists, fmt)
if (symbol_conf.report_hierarchy) {
struct perf_hpp_list_node *fmt_node;
/* count overhead columns (in the first node) */
fmt_node = list_first_entry(&hists->hpp_formats,
struct perf_hpp_list_node, list);
perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
++browser->b.columns;
/* add a single column for whole hierarchy sort keys*/
++browser->b.columns;
} else {
hists__for_each_format(hists, fmt)
++browser->b.columns;
}
hists__reset_column_width(hists);
}
......
......@@ -64,7 +64,7 @@ int parse_branch_str(const char *str, __u64 *mode)
}
if (!br->name) {
ret = -1;
ui__warning("unknown branch filter %s,"
pr_warning("unknown branch filter %s,"
" check man page\n", s);
goto error;
}
......
......@@ -1141,7 +1141,9 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
if (event_glob != NULL &&
!(strglobmatch_nocase(name, event_glob) ||
(!is_cpu && strglobmatch_nocase(alias->name,
event_glob))))
event_glob)) ||
(alias->topic &&
strglobmatch_nocase(alias->topic, event_glob))))
continue;
if (is_cpu && !name_only && !alias->desc)
......
......@@ -18,6 +18,7 @@ util/thread_map.c
util/util.c
util/xyarray.c
util/cgroup.c
util/parse-branch-options.c
util/rblist.c
util/counts.c
util/strlist.c
......
......@@ -25,6 +25,7 @@
#include <errno.h>
#include "../perf.h"
#include "debug.h"
#include "util.h"
#include "trace-event.h"
......@@ -86,16 +87,15 @@ struct scripting_ops python_scripting_unsupported_ops = {
static void register_python_scripting(struct scripting_ops *scripting_ops)
{
int err;
err = script_spec_register("Python", scripting_ops);
if (err)
die("error registering Python script extension");
err = script_spec_register("py", scripting_ops);
if (err)
die("error registering py script extension");
scripting_context = malloc(sizeof(struct scripting_context));
if (scripting_context == NULL)
scripting_context = malloc(sizeof(*scripting_context));
if (scripting_context == NULL ||
script_spec_register("Python", scripting_ops) ||
script_spec_register("py", scripting_ops)) {
pr_err("Error registering Python script extension: disabling it\n");
zfree(&scripting_context);
}
}
#ifdef NO_LIBPYTHON
......@@ -150,16 +150,15 @@ struct scripting_ops perl_scripting_unsupported_ops = {
static void register_perl_scripting(struct scripting_ops *scripting_ops)
{
int err;
err = script_spec_register("Perl", scripting_ops);
if (err)
die("error registering Perl script extension");
err = script_spec_register("pl", scripting_ops);
if (err)
die("error registering pl script extension");
scripting_context = malloc(sizeof(struct scripting_context));
if (scripting_context == NULL)
scripting_context = malloc(sizeof(*scripting_context));
if (scripting_context == NULL ||
script_spec_register("Perl", scripting_ops) ||
script_spec_register("pl", scripting_ops)) {
pr_err("Error registering Perl script extension: disabling it\n");
zfree(&scripting_context);
}
}
#ifdef NO_LIBPERL
......
......@@ -433,6 +433,14 @@ int parse_nsec_time(const char *str, u64 *ptime)
return 0;
}
int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
{
u64 sec = timestamp / NSEC_PER_SEC;
u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
}
unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
{
struct parse_tag *i = tags;
......
......@@ -362,4 +362,7 @@ extern int sched_getcpu(void);
#endif
int is_printable_array(char *p, unsigned int len);
int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
#endif /* GIT_COMPAT_UTIL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment