Commit 00e4cb1c authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

 * Don't install scripting files files when perl/python support is disabled.

 * Support ! in -e expressions in 'perf trace', to filter a list of syscalls.

 * Add --verbose and -o/--output options to 'perf trace'.

 * Introduce better formatting of syscall arguments in 'perf trace',
   including so far beautifiers for mmap, madvise, syscall return
   values.

 * Fixup jobserver setup in libtraceevent makefile.

 * Debug improvements from Adrian Hunter.

 * Try to increase the file descriptor limits on EMFILE, from Andi Kleen.

 * Remove unused force option in 'perf kvm', from David Ahern.

 * Make 'perf trace' command line arguments consistent with 'perf record',
   from David Ahern.

 * Fix correlation of samples coming after PERF_RECORD_EXIT event, from
   David Ahern.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents aee2bce3 456da532
......@@ -60,7 +60,7 @@ ifeq ($(BUILD_SRC),)
ifneq ($(BUILD_OUTPUT),)
define build_output
$(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \
$(if $(VERBOSE:1=),@)+$(MAKE) -C $(BUILD_OUTPUT) \
BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1
endef
......
......@@ -23,29 +23,45 @@ analysis phases.
OPTIONS
-------
-a::
--all-cpus::
System-wide collection from all CPUs.
-e::
--expr::
List of events to show, currently only syscall names.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
-o::
--output=::
Output file name.
-p::
--pid=::
Record events on existing process ID (comma separated list).
-t::
--tid=::
Record events on existing thread ID (comma separated list).
-u::
--uid=::
Record events in threads owned by uid. Name or number.
-v::
--verbose=::
Verbosity level.
-i::
--no-inherit::
Child tasks do not inherit counters.
-m::
--mmap-pages=::
Number of mmap data pages. Must be a power of two.
-C::
--cpu::
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
......
......@@ -766,17 +766,21 @@ check: $(OUTPUT)common-cmds.h
install-bin: all
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifndef NO_LIBPERL
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
endif
ifndef NO_LIBPYTHON
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
endif
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
$(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
......
......@@ -1305,7 +1305,6 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
const char * const record_args[] = {
"record",
"-R",
"-f",
"-m", "1024",
"-c", "1",
};
......
This diff is collapsed.
......@@ -14,6 +14,7 @@
#include "target.h"
#include "evlist.h"
#include "evsel.h"
#include "debug.h"
#include <unistd.h>
#include "parse-events.h"
......@@ -486,6 +487,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
int nr_cpus = cpu_map__nr(evlist->cpus);
int nr_threads = thread_map__nr(evlist->threads);
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
......@@ -524,6 +526,7 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
int thread;
int nr_threads = thread_map__nr(evlist->threads);
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
......
......@@ -13,6 +13,7 @@
#include <traceevent/event-parse.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <sys/resource.h>
#include "asm/bug.h"
#include "evsel.h"
#include "evlist.h"
......@@ -21,6 +22,7 @@
#include "thread_map.h"
#include "target.h"
#include "perf_regs.h"
#include "debug.h"
static struct {
bool sample_id_all;
......@@ -861,12 +863,72 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
return fd;
}
#define __PRINT_ATTR(fmt, cast, field) \
fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field)
#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field)
#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field)
#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field)
#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field)
#define PRINT_ATTR2N(name1, field1, name2, field2) \
fprintf(fp, " %-19s %u %-19s %u\n", \
name1, attr->field1, name2, attr->field2)
#define PRINT_ATTR2(field1, field2) \
PRINT_ATTR2N(#field1, field1, #field2, field2)
static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
{
size_t ret = 0;
ret += fprintf(fp, "%.60s\n", graph_dotted_line);
ret += fprintf(fp, "perf_event_attr:\n");
ret += PRINT_ATTR_U32(type);
ret += PRINT_ATTR_U32(size);
ret += PRINT_ATTR_X64(config);
ret += PRINT_ATTR_U64(sample_period);
ret += PRINT_ATTR_U64(sample_freq);
ret += PRINT_ATTR_X64(sample_type);
ret += PRINT_ATTR_X64(read_format);
ret += PRINT_ATTR2(disabled, inherit);
ret += PRINT_ATTR2(pinned, exclusive);
ret += PRINT_ATTR2(exclude_user, exclude_kernel);
ret += PRINT_ATTR2(exclude_hv, exclude_idle);
ret += PRINT_ATTR2(mmap, comm);
ret += PRINT_ATTR2(freq, inherit_stat);
ret += PRINT_ATTR2(enable_on_exec, task);
ret += PRINT_ATTR2(watermark, precise_ip);
ret += PRINT_ATTR2(mmap_data, sample_id_all);
ret += PRINT_ATTR2(exclude_host, exclude_guest);
ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel,
"excl.callchain_user", exclude_callchain_user);
ret += PRINT_ATTR_U32(wakeup_events);
ret += PRINT_ATTR_U32(wakeup_watermark);
ret += PRINT_ATTR_X32(bp_type);
ret += PRINT_ATTR_X64(bp_addr);
ret += PRINT_ATTR_X64(config1);
ret += PRINT_ATTR_U64(bp_len);
ret += PRINT_ATTR_X64(config2);
ret += PRINT_ATTR_X64(branch_sample_type);
ret += PRINT_ATTR_X64(sample_regs_user);
ret += PRINT_ATTR_U32(sample_stack_user);
ret += fprintf(fp, "%.60s\n", graph_dotted_line);
return ret;
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
int cpu, thread;
unsigned long flags = 0;
int pid = -1, err;
enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
if (evsel->fd == NULL &&
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
......@@ -884,6 +946,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
if (verbose >= 2)
perf_event_attr__fprintf(&evsel->attr, stderr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
for (thread = 0; thread < threads->nr; thread++) {
......@@ -893,6 +958,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
pid = threads->map[thread];
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
pr_debug2("perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n",
pid, cpus->map[cpu], group_fd, flags);
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
pid,
......@@ -902,12 +970,37 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
err = -errno;
goto try_fallback;
}
set_rlimit = NO_CHANGE;
}
}
return 0;
try_fallback:
/*
* perf stat needs between 5 and 22 fds per CPU. When we run out
* of them try to increase the limits.
*/
if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
struct rlimit l;
int old_errno = errno;
if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
if (set_rlimit == NO_CHANGE)
l.rlim_cur = l.rlim_max;
else {
l.rlim_cur = l.rlim_max + 1000;
l.rlim_max = l.rlim_cur;
}
if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
set_rlimit++;
errno = old_errno;
goto retry_open;
}
}
errno = old_errno;
}
if (err != -EINVAL || cpu > 0 || thread > 0)
goto out_close;
......
......@@ -253,7 +253,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
return;
}
static struct thread *__machine__findnew_thread(struct machine *machine, pid_t tid,
static struct thread *__machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
{
struct rb_node **p = &machine->threads.rb_node;
......@@ -265,8 +266,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
* so most of the time we dont have to look up
* the full rbtree:
*/
if (machine->last_match && machine->last_match->tid == tid)
if (machine->last_match && machine->last_match->tid == tid) {
if (pid && pid != machine->last_match->pid_)
machine->last_match->pid_ = pid;
return machine->last_match;
}
while (*p != NULL) {
parent = *p;
......@@ -274,6 +278,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
if (th->tid == tid) {
machine->last_match = th;
if (pid && pid != th->pid_)
th->pid_ = pid;
return th;
}
......@@ -286,7 +292,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
if (!create)
return NULL;
th = thread__new(tid);
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, &machine->threads);
......@@ -298,12 +304,12 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
struct thread *machine__findnew_thread(struct machine *machine, pid_t tid)
{
return __machine__findnew_thread(machine, tid, true);
return __machine__findnew_thread(machine, 0, tid, true);
}
struct thread *machine__find_thread(struct machine *machine, pid_t tid)
{
return __machine__findnew_thread(machine, tid, false);
return __machine__findnew_thread(machine, 0, tid, false);
}
int machine__process_comm_event(struct machine *machine, union perf_event *event)
......@@ -1031,11 +1037,27 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
return 0;
}
static void machine__remove_thread(struct machine *machine, struct thread *th)
{
machine->last_match = NULL;
rb_erase(&th->rb_node, &machine->threads);
/*
* We may have references to this thread, for instance in some hist_entry
* instances, so just move them to a separate list.
*/
list_add_tail(&th->node, &machine->dead_threads);
}
int machine__process_fork_event(struct machine *machine, union perf_event *event)
{
struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
struct thread *thread = machine__find_thread(machine, event->fork.tid);
struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
machine__remove_thread(machine, thread);
thread = machine__findnew_thread(machine, event->fork.tid);
if (dump_trace)
perf_event__fprintf_task(event, stdout);
......@@ -1048,18 +1070,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
return 0;
}
static void machine__remove_thread(struct machine *machine, struct thread *th)
{
machine->last_match = NULL;
rb_erase(&th->rb_node, &machine->threads);
/*
* We may have references to this thread, for instance in some hist_entry
* instances, so just move them to a separate list.
*/
list_add_tail(&th->node, &machine->dead_threads);
}
int machine__process_exit_event(struct machine *machine, union perf_event *event)
int machine__process_exit_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
struct thread *thread = machine__find_thread(machine, event->fork.tid);
......@@ -1067,7 +1079,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
perf_event__fprintf_task(event, stdout);
if (thread != NULL)
machine__remove_thread(machine, thread);
thread__exited(thread);
return 0;
}
......
......@@ -8,6 +8,26 @@
#include "cpumap.h"
#include "thread_map.h"
/*
* Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'.
*/
int verbose;
int eprintf(int level, const char *fmt, ...)
{
va_list args;
int ret = 0;
if (verbose >= level) {
va_start(args, fmt);
ret = vfprintf(stderr, fmt, args);
va_end(args);
}
return ret;
}
/* Define PyVarObject_HEAD_INIT for python 2.5 */
#ifndef PyVarObject_HEAD_INIT
# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
......
......@@ -7,12 +7,13 @@
#include "util.h"
#include "debug.h"
struct thread *thread__new(pid_t tid)
struct thread *thread__new(pid_t pid, pid_t tid)
{
struct thread *self = zalloc(sizeof(*self));
if (self != NULL) {
map_groups__init(&self->mg);
self->pid_ = pid;
self->tid = tid;
self->ppid = -1;
self->comm = malloc(32);
......
......@@ -12,10 +12,12 @@ struct thread {
struct list_head node;
};
struct map_groups mg;
pid_t pid_; /* Not all tools update this */
pid_t tid;
pid_t ppid;
char shortname[3];
bool comm_set;
bool dead; /* if set thread has exited */
char *comm;
int comm_len;
......@@ -24,8 +26,12 @@ struct thread {
struct machine;
struct thread *thread__new(pid_t tid);
struct thread *thread__new(pid_t pid, pid_t tid);
void thread__delete(struct thread *self);
static inline void thread__exited(struct thread *thread)
{
thread->dead = true;
}
int thread__set_comm(struct thread *self, const char *comm);
int thread__comm_len(struct thread *self);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment