Commit ffa86c2f authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.12-20170314' of...

Merge tag 'perf-core-for-mingo-4.12-20170314' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add PERF_RECORD_NAMESPACES so that the kernel can record information
  required to associate samples to namespaces, helping in container
  problem characterization.

  Now the 'perf record has a --namespace' option to ask for such info,
  and when present, it can be used, initially, via a new sort order,
  'cgroup_id', allowing histogram entry bucketization by a (device, inode)
  based cgroup identifier (Hari Bathini)

- Add --next option to 'perf sched timehist', showing what is the next
  thread to run (Brendan Gregg)

Fixes:

- Fix segfault with basic block 'cycles' sort dimension (Changbin Du)

- Add c2c to command-list.txt, making it appear in the 'perf help'
  output (Changbin Du)

- Fix zeroing of 'abs_path' variable in the perf hists browser switch
  file code (Changbin Du)

- Hide tips messages when -q/--quiet is given to 'perf report' (Namhyung Kim)

Infrastructure changes:

- Use ref_reloc_sym + offset to setup kretprobes (Naveen Rao)

- Ignore generated files pmu-events/{jevents,pmu-events.c} for git (Changbin Du)

Documentation changes:

- Document +field style argument support for --field option (Changbin Du)

- Clarify 'perf c2c --stats' help message (Namhyung Kim)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 84e5b549 5f6bee34
......@@ -8,8 +8,9 @@ Overview
--------
These events are similar to tracepoint based events. Instead of Tracepoint,
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
kprobes can probe (this means, all functions except those with
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
......
......@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
......@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
......
......@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
......@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
......
......@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h"
......@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
......@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
......@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}
/*
......@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}
/*
* namespaces tracking
*/
struct perf_namespaces_event {
struct task_struct *task;
struct {
struct perf_event_header header;
u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};
static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}
static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_namespaces_match(event))
return;
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);
perf_output_put(&handle, namespaces_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;
error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}
void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;
if (!atomic_read(&nr_namespaces_events))
return;
namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};
ns_link_info = namespaces_event.event_id.link_info;
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);
#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif
perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}
/*
* mmap tracking
*/
......@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
......@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
......
......@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
perf_event_namespaces(current);
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
......
......@@ -1740,11 +1740,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
}
EXPORT_SYMBOL_GPL(unregister_kprobes);
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
int __weak kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return NOTIFY_DONE;
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
......
......@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
......@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
perf_event_namespaces(tsk);
out:
fput(file);
return err;
......
......@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
......@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
......
......@@ -31,3 +31,5 @@ config.mak.autogen
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
pmu-events/pmu-events.c
pmu-events/jevents
......@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
Record events of type PERF_RECORD_NAMESPACES.
--transaction::
Record transaction flags for transaction related events.
......
......@@ -72,7 +72,8 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
local_weight, cgroup_id.
Each key has following meaning:
......@@ -92,6 +93,7 @@ OPTIONS
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
......@@ -173,6 +175,9 @@ OPTIONS
By default, every sort keys not specified in -F will be appended
automatically.
If the keys starts with a prefix '+', then it will append the specified
field(s) to the default field order. For example: perf report -F +period,sample.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
......
......@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
--migrations::
Show migration events.
-n::
--next::
Show next task.
-I::
--idle-hist::
Show idle-related events only.
......
......@@ -248,6 +248,9 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
--show-namespace-events
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
......
......@@ -10,6 +10,7 @@
#include "symbol.h"
#include "map.h"
#include "probe-event.h"
#include "probe-file.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
......@@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
* However, if the user specifies an offset, we fall back to using the
* GEP since all userspace applications (objdump/readelf) show function
* disassembly with offsets from the GEP.
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
!map || !sym)
if (pev->point.offset || !map || !sym)
return;
/* For kretprobes, add an offset only if the kernel supports it */
if (!pev->uprobes && pev->point.retprobe) {
#ifdef HAVE_LIBELF_SUPPORT
if (!kretprobe_offset_is_supported())
#endif
return;
}
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
......
......@@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......
......@@ -2334,7 +2334,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
static void perf_c2c_display(struct perf_session *session)
{
if (c2c.use_stdio)
if (use_browser == 0)
perf_c2c__hists_fprintf(stdout, session);
else
perf_c2c__hists_browse(&c2c.hists.hists);
......@@ -2536,7 +2536,7 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
"Display only statistic tables (implies --stdio)"),
OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
"Display full length of symbols"),
OPT_BOOLEAN(0, "no-source", &no_source,
......
......@@ -364,6 +364,7 @@ static struct perf_tool tool = {
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
};
......
......@@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool,
return err;
}
static int perf_event__repipe_namespaces(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
int err = perf_event__process_namespaces(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
static int perf_event__repipe_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject)
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
inject->tool.comm = perf_event__repipe_comm;
inject->tool.namespaces = perf_event__repipe_namespaces;
inject->tool.exit = perf_event__repipe_exit;
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
......
......@@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = {
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
......
......@@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
......@@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->tool.exit = perf_event__process_exit;
kvm->tool.fork = perf_event__process_fork;
kvm->tool.lost = process_lost_event;
kvm->tool.namespaces = perf_event__process_namespaces;
kvm->tool.ordered_events = true;
perf_tool__fill_defaults(&kvm->tool);
......
......@@ -858,6 +858,7 @@ static int __cmd_report(bool display_info)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
......
......@@ -342,6 +342,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
.lost = perf_event__process_lost,
.fork = perf_event__process_fork,
.build_id = perf_event__process_build_id,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
},
.input_name = "perf.data",
......
......@@ -876,6 +876,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGTERM, sig_handler);
signal(SIGSEGV, sigsegv_handler);
if (rec->opts.record_namespaces)
tool->namespace_events = true;
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
signal(SIGUSR2, snapshot_sig_handler);
if (rec->opts.auxtrace_snapshot_mode)
......@@ -983,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
*/
if (forks) {
union perf_event *event;
pid_t tgid;
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
if (event == NULL) {
......@@ -996,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* cannot see a correct process name for those events.
* Synthesize COMM event to prevent it.
*/
perf_event__synthesize_comm(tool, event,
rec->evlist->workload.pid,
process_synthesized_event,
machine);
tgid = perf_event__synthesize_comm(tool, event,
rec->evlist->workload.pid,
process_synthesized_event,
machine);
free(event);
if (tgid == -1)
goto out_child;
event = malloc(sizeof(event->namespaces) +
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
machine->id_hdr_size);
if (event == NULL) {
err = -ENOMEM;
goto out_child;
}
/*
* Synthesize NAMESPACES event for the command specified.
*/
perf_event__synthesize_namespaces(tool, event,
rec->evlist->workload.pid,
tgid, process_synthesized_event,
machine);
free(event);
perf_evlist__start_workload(rec->evlist);
......@@ -1497,6 +1521,7 @@ static struct record record = {
.fork = perf_event__process_fork,
.exit = perf_event__process_exit,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.ordered_events = true,
......@@ -1611,6 +1636,8 @@ static struct option __record_options[] = {
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
"Record namespaces events"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
......
......@@ -394,8 +394,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
fprintf(stdout, "\n\n");
}
if (sort_order == NULL &&
parent_pattern == default_parent_pattern)
if (!quiet)
fprintf(stdout, "#\n# (%s)\n#\n", help);
if (rep->show_threads) {
......@@ -701,6 +700,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
......
......@@ -221,6 +221,7 @@ struct perf_sched {
unsigned int max_stack;
bool show_cpu_visual;
bool show_wakeups;
bool show_next;
bool show_migrations;
bool show_state;
u64 skipped_samples;
......@@ -1897,14 +1898,18 @@ static char task_state_char(struct thread *thread, int state)
}
static void timehist_print_sample(struct perf_sched *sched,
struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
struct thread *thread,
u64 t, int state)
{
struct thread_runtime *tr = thread__priv(thread);
const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
u32 max_cpus = sched->max_cpu + 1;
char tstr[64];
char nstr[30];
u64 wait_time;
timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
......@@ -1937,7 +1942,12 @@ static void timehist_print_sample(struct perf_sched *sched,
if (sched->show_state)
printf(" %5c ", task_state_char(thread, state));
if (sched->show_wakeups)
if (sched->show_next) {
snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
printf(" %-*s", comm_width, nstr);
}
if (sched->show_wakeups && !sched->show_next)
printf(" %-*s", comm_width, "");
if (thread->tid == 0)
......@@ -2531,7 +2541,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
}
if (!sched->summary_only)
timehist_print_sample(sched, sample, &al, thread, t, state);
timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
out:
if (sched->hist_time.start == 0 && t >= ptime->start)
......@@ -3272,6 +3282,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.lost = perf_event__process_lost,
.fork = perf_sched__process_fork_event,
.ordered_events = true,
......@@ -3340,6 +3351,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('S', "with-summary", &sched.summary,
"Show all syscalls and summary with statistics"),
OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"),
OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
......@@ -3437,10 +3449,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(timehist_usage, timehist_options);
}
if (sched.show_wakeups && sched.summary_only) {
pr_err(" Error: -s and -w are mutually exclusive.\n");
if ((sched.show_wakeups || sched.show_next) &&
sched.summary_only) {
pr_err(" Error: -s and -[n|w] are mutually exclusive.\n");
parse_options_usage(timehist_usage, timehist_options, "s", true);
parse_options_usage(NULL, timehist_options, "w", true);
if (sched.show_wakeups)
parse_options_usage(NULL, timehist_options, "w", true);
if (sched.show_next)
parse_options_usage(NULL, timehist_options, "n", true);
return -EINVAL;
}
......
......@@ -830,6 +830,7 @@ struct perf_script {
bool show_task_events;
bool show_mmap_events;
bool show_switch_events;
bool show_namespace_events;
bool allocated;
struct cpu_map *cpus;
struct thread_map *threads;
......@@ -1118,6 +1119,41 @@ static int process_comm_event(struct perf_tool *tool,
return ret;
}
static int process_namespaces_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
int ret = -1;
thread = machine__findnew_thread(machine, event->namespaces.pid,
event->namespaces.tid);
if (thread == NULL) {
pr_debug("problem processing NAMESPACES event, skipping it.\n");
return -1;
}
if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
goto out;
if (!evsel->attr.sample_id_all) {
sample->cpu = 0;
sample->time = 0;
sample->tid = event->namespaces.tid;
sample->pid = event->namespaces.pid;
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
ret = 0;
out:
thread__put(thread);
return ret;
}
static int process_fork_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -1293,6 +1329,8 @@ static int __cmd_script(struct perf_script *script)
}
if (script->show_switch_events)
script->tool.context_switch = process_switch_event;
if (script->show_namespace_events)
script->tool.namespaces = process_namespaces_event;
ret = perf_session__process_events(script->session);
......@@ -2097,6 +2135,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
......@@ -2180,6 +2219,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the mmap events"),
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
"Show namespace events (if recorded)"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN(0, "ns", &nanosecs,
"Use 9 decimal places when displaying time"),
......
......@@ -2415,8 +2415,9 @@ static int trace__replay(struct trace *trace)
trace->tool.exit = perf_event__process_exit;
trace->tool.fork = perf_event__process_fork;
trace->tool.attr = perf_event__process_attr;
trace->tool.tracing_data = perf_event__process_tracing_data;
trace->tool.tracing_data = perf_event__process_tracing_data;
trace->tool.build_id = perf_event__process_build_id;
trace->tool.namespaces = perf_event__process_namespaces;
trace->tool.ordered_events = true;
trace->tool.ordering_requires_timestamps = true;
......
......@@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common
perf-buildid-list mainporcelain common
perf-data mainporcelain common
perf-diff mainporcelain common
perf-c2c mainporcelain common
perf-config mainporcelain common
perf-evlist mainporcelain common
perf-ftrace mainporcelain common
......
......@@ -50,6 +50,7 @@ struct record_opts {
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
bool record_namespaces;
bool record_switch_events;
bool all_kernel;
bool all_user;
......
......@@ -2308,7 +2308,7 @@ static int switch_data_file(void)
return ret;
memset(options, 0, sizeof(options));
memset(options, 0, sizeof(abs_path));
memset(abs_path, 0, sizeof(abs_path));
while ((dent = readdir(pwd_dir))) {
char path[PATH_MAX];
......
......@@ -42,6 +42,7 @@ libperf-y += pstack.o
libperf-y += session.o
libperf-$(CONFIG_AUDIT) += syscalltbl.o
libperf-y += ordered-events.o
libperf-y += namespaces.o
libperf-y += comm.o
libperf-y += thread.o
libperf-y += thread_map.o
......
......@@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
.lost = perf_event__process_lost,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......
......@@ -31,6 +31,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
......@@ -49,6 +50,16 @@ static const char *perf_event__names[] = {
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
};
static const char *perf_ns__names[] = {
[NET_NS_INDEX] = "net",
[UTS_NS_INDEX] = "uts",
[IPC_NS_INDEX] = "ipc",
[PID_NS_INDEX] = "pid",
[USER_NS_INDEX] = "user",
[MNT_NS_INDEX] = "mnt",
[CGROUP_NS_INDEX] = "cgroup",
};
const char *perf_event__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_event__names))
......@@ -58,6 +69,13 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}
static const char *perf_ns__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_ns__names))
return "UNKNOWN";
return perf_ns__names[id];
}
static int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
......@@ -203,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
return tgid;
}
static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
struct perf_ns_link_info *ns_link_info)
{
struct stat64 st;
char proc_ns[128];
sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
if (stat64(proc_ns, &st) == 0) {
ns_link_info->dev = st.st_dev;
ns_link_info->ino = st.st_ino;
}
}
int perf_event__synthesize_namespaces(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine)
{
u32 idx;
struct perf_ns_link_info *ns_link_info;
if (!tool || !tool->namespace_events)
return 0;
memset(&event->namespaces, 0, (sizeof(event->namespaces) +
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
machine->id_hdr_size));
event->namespaces.pid = tgid;
event->namespaces.tid = pid;
event->namespaces.nr_namespaces = NR_NAMESPACES;
ns_link_info = event->namespaces.link_info;
for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
perf_event__get_ns_link_info(pid, perf_ns__name(idx),
&ns_link_info[idx]);
event->namespaces.header.type = PERF_RECORD_NAMESPACES;
event->namespaces.header.size = (sizeof(event->namespaces) +
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
machine->id_hdr_size);
if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1;
return 0;
}
static int perf_event__synthesize_fork(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid, pid_t ppid,
......@@ -434,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event,
union perf_event *fork_event,
union perf_event *namespaces_event,
pid_t pid, int full,
perf_event__handler_t process,
perf_event__handler_t process,
struct perf_tool *tool,
struct machine *machine,
bool mmap_data,
......@@ -455,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (tgid == -1)
return -1;
if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
tgid, process, machine) < 0)
return -1;
return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data,
proc_map_timeout);
......@@ -488,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
ppid, process, machine) < 0)
break;
if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
tgid, process, machine) < 0)
break;
/*
* Send the prepared comm event
*/
......@@ -516,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
unsigned int proc_map_timeout)
{
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
int err = -1, thread, j;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
......@@ -530,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
machine->id_hdr_size);
if (namespaces_event == NULL)
goto out_free_fork;
err = 0;
for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event,
fork_event,
fork_event, namespaces_event,
thread_map__pid(threads, thread), 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
......@@ -559,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
/* if not, generate events for it */
if (need_leader &&
__event__synthesize_thread(comm_event, mmap_event,
fork_event,
fork_event, namespaces_event,
comm_event->comm.pid, 0,
process, tool, machine,
mmap_data, proc_map_timeout)) {
......@@ -568,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
}
}
}
free(namespaces_event);
out_free_fork:
free(fork_event);
out_free_mmap:
free(mmap_event);
......@@ -587,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
char proc_path[PATH_MAX];
struct dirent *dirent;
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
int err = -1;
if (machine__is_default_guest(machine))
......@@ -604,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (fork_event == NULL)
goto out_free_mmap;
namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
machine->id_hdr_size);
if (namespaces_event == NULL)
goto out_free_fork;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
proc = opendir(proc_path);
if (proc == NULL)
goto out_free_fork;
goto out_free_namespaces;
while ((dirent = readdir(proc)) != NULL) {
char *end;
......@@ -620,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
* We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized.
*/
__event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
1, process, tool, machine, mmap_data,
__event__synthesize_thread(comm_event, mmap_event, fork_event,
namespaces_event, pid, 1, process,
tool, machine, mmap_data,
proc_map_timeout);
}
err = 0;
closedir(proc);
out_free_namespaces:
free(namespaces_event);
out_free_fork:
free(fork_event);
out_free_mmap:
......@@ -1008,6 +1108,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
}
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
{
size_t ret = 0;
struct perf_ns_link_info *ns_link_info;
u32 nr_namespaces, idx;
ns_link_info = event->namespaces.link_info;
nr_namespaces = event->namespaces.nr_namespaces;
ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
event->namespaces.pid,
event->namespaces.tid,
nr_namespaces);
for (idx = 0; idx < nr_namespaces; idx++) {
if (idx && (idx % 4 == 0))
ret += fprintf(fp, "\n\t\t ");
ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
perf_ns__name(idx), (u64)ns_link_info[idx].dev,
(u64)ns_link_info[idx].ino,
((idx + 1) != nr_namespaces) ? ", " : "]\n");
}
return ret;
}
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
......@@ -1016,6 +1143,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
return machine__process_comm_event(machine, event, sample);
}
int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_namespaces_event(machine, event, sample);
}
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
......@@ -1196,6 +1331,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP:
ret += perf_event__fprintf_mmap(event, fp);
break;
case PERF_RECORD_NAMESPACES:
ret += perf_event__fprintf_namespaces(event, fp);
break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
......
......@@ -39,6 +39,13 @@ struct comm_event {
char comm[16];
};
struct namespaces_event {
struct perf_event_header header;
u32 pid, tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[];
};
struct fork_event {
struct perf_event_header header;
u32 pid, ppid;
......@@ -485,6 +492,7 @@ union perf_event {
struct mmap_event mmap;
struct mmap2_event mmap2;
struct comm_event comm;
struct namespaces_event namespaces;
struct fork_event fork;
struct lost_event lost;
struct lost_samples_event lost_samples;
......@@ -587,6 +595,10 @@ int perf_event__process_switch(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_namespaces(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -636,6 +648,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
int perf_event__synthesize_namespaces(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine);
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
......@@ -653,6 +671,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
......
......@@ -932,6 +932,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
if (opts->record_namespaces)
attr->namespaces = track;
if (opts->record_switch_events)
attr->context_switch = track;
......
......@@ -3,6 +3,7 @@
#include "hist.h"
#include "map.h"
#include "session.h"
#include "namespaces.h"
#include "sort.h"
#include "evlist.h"
#include "evsel.h"
......@@ -169,6 +170,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
......@@ -574,9 +576,14 @@ __hists__add_entry(struct hists *hists,
bool sample_self,
struct hist_entry_ops *ops)
{
struct namespaces *ns = thread__namespaces(al->thread);
struct hist_entry entry = {
.thread = al->thread,
.comm = thread__comm(al->thread),
.cgroup_id = {
.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
},
.ms = {
.map = al->map,
.sym = al->sym,
......
......@@ -30,6 +30,7 @@ enum hist_column {
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
HISTC_CGROUP_ID,
HISTC_PARENT,
HISTC_CPU,
HISTC_SOCKET,
......
......@@ -13,6 +13,7 @@
#include <symbol/kallsyms.h>
#include "unwind.h"
#include "linux/hash.h"
#include "asm/bug.h"
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
......@@ -501,6 +502,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
return err;
}
int machine__process_namespaces_event(struct machine *machine __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
struct thread *thread = machine__findnew_thread(machine,
event->namespaces.pid,
event->namespaces.tid);
int err = 0;
WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
"\nWARNING: kernel seems to support more namespaces than perf"
" tool.\nTry updating the perf tool..\n\n");
WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
"\nWARNING: perf tool seems to support more namespaces than"
" the kernel.\nTry updating the kernel..\n\n");
if (dump_trace)
perf_event__fprintf_namespaces(event, stdout);
if (thread == NULL ||
thread__set_namespaces(thread, sample->time, &event->namespaces)) {
dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
err = -1;
}
thread__put(thread);
return err;
}
int machine__process_lost_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample __maybe_unused)
{
......@@ -1538,6 +1570,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_comm_event(machine, event, sample); break;
case PERF_RECORD_MMAP:
ret = machine__process_mmap_event(machine, event, sample); break;
case PERF_RECORD_NAMESPACES:
ret = machine__process_namespaces_event(machine, event, sample); break;
case PERF_RECORD_MMAP2:
ret = machine__process_mmap2_event(machine, event, sample); break;
case PERF_RECORD_FORK:
......
......@@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
int machine__process_switch_event(struct machine *machine,
union perf_event *event);
int machine__process_namespaces_event(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
......
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* Copyright (C) 2017 Hari Bathini, IBM Corporation
*/
#include "namespaces.h"
#include "util.h"
#include "event.h"
#include <stdlib.h>
#include <stdio.h>
struct namespaces *namespaces__new(struct namespaces_event *event)
{
struct namespaces *namespaces;
u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
sizeof(struct perf_ns_link_info));
namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
if (!namespaces)
return NULL;
namespaces->end_time = -1;
if (event)
memcpy(namespaces->link_info, event->link_info, link_info_size);
return namespaces;
}
void namespaces__free(struct namespaces *namespaces)
{
free(namespaces);
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* Copyright (C) 2017 Hari Bathini, IBM Corporation
*/
#ifndef __PERF_NAMESPACES_H
#define __PERF_NAMESPACES_H
#include "../perf.h"
#include <linux/list.h>
struct namespaces_event;
struct namespaces {
struct list_head list;
u64 end_time;
struct perf_ns_link_info link_info[];
};
struct namespaces *namespaces__new(struct namespaces_event *event);
void namespaces__free(struct namespaces *namespaces);
#endif /* __PERF_NAMESPACES_H */
......@@ -757,7 +757,9 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
}
for (i = 0; i < ntevs; i++) {
if (!tevs[i].point.address || tevs[i].point.retprobe)
if (!tevs[i].point.address)
continue;
if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
continue;
/* If we found a wrong one, mark it by NULL symbol */
if (kprobe_warn_out_range(tevs[i].point.symbol,
......@@ -1528,11 +1530,6 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
return -EINVAL;
}
if (pp->retprobe && !pp->function) {
semantic_error("Return probe requires an entry function.\n");
return -EINVAL;
}
if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
semantic_error("Offset/Line/Lazy pattern can't be used with "
"return probe.\n");
......@@ -2841,7 +2838,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
}
/* Note that the symbols in the kmodule are not relocated */
if (!pev->uprobes && !pp->retprobe && !pev->target) {
if (!pev->uprobes && !pev->target &&
(!pp->retprobe || kretprobe_offset_is_supported())) {
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
......
......@@ -877,35 +877,33 @@ int probe_cache__show_all_caches(struct strfilter *filter)
return 0;
}
enum ftrace_readme {
FTRACE_README_PROBE_TYPE_X = 0,
FTRACE_README_KRETPROBE_OFFSET,
FTRACE_README_END,
};
static struct {
const char *pattern;
bool avail;
bool checked;
} probe_type_table[] = {
#define DEFINE_TYPE(idx, pat, def_avail) \
[idx] = {.pattern = pat, .avail = (def_avail)}
DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true),
DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true),
DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false),
DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true),
DEFINE_TYPE(PROBE_TYPE_BITFIELD,
"* b<bit-width>@<bit-offset>/<container-size>", true),
bool avail;
} ftrace_readme_table[] = {
#define DEFINE_TYPE(idx, pat) \
[idx] = {.pattern = pat, .avail = false}
DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
};
bool probe_type_is_available(enum probe_type type)
static bool scan_ftrace_readme(enum ftrace_readme type)
{
int fd;
FILE *fp;
char *buf = NULL;
size_t len = 0;
bool target_line = false;
bool ret = probe_type_table[type].avail;
int fd;
bool ret = false;
static bool scanned = false;
if (type >= PROBE_TYPE_END)
return false;
/* We don't have to check the type which supported by default */
if (ret || probe_type_table[type].checked)
return ret;
if (scanned)
goto result;
fd = open_trace_file("README", false);
if (fd < 0)
......@@ -917,21 +915,34 @@ bool probe_type_is_available(enum probe_type type)
return ret;
}
while (getline(&buf, &len, fp) > 0 && !ret) {
if (!target_line) {
target_line = !!strstr(buf, " type: ");
if (!target_line)
continue;
} else if (strstr(buf, "\t ") != buf)
break;
ret = strglobmatch(buf, probe_type_table[type].pattern);
}
/* Cache the result */
probe_type_table[type].checked = true;
probe_type_table[type].avail = ret;
while (getline(&buf, &len, fp) > 0)
for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++)
if (!ftrace_readme_table[i].avail)
ftrace_readme_table[i].avail =
strglobmatch(buf, ftrace_readme_table[i].pattern);
scanned = true;
fclose(fp);
free(buf);
return ret;
result:
if (type >= FTRACE_README_END)
return false;
return ftrace_readme_table[type].avail;
}
bool probe_type_is_available(enum probe_type type)
{
if (type >= PROBE_TYPE_END)
return false;
else if (type == PROBE_TYPE_X)
return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X);
return true;
}
bool kretprobe_offset_is_supported(void)
{
return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
}
......@@ -65,6 +65,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
const char *group, const char *event);
int probe_cache__show_all_caches(struct strfilter *filter);
bool probe_type_is_available(enum probe_type type);
bool kretprobe_offset_is_supported(void);
#else /* ! HAVE_LIBELF_SUPPORT */
static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
{
......
......@@ -1239,6 +1239,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->mmap2(tool, event, sample, machine);
case PERF_RECORD_COMM:
return tool->comm(tool, event, sample, machine);
case PERF_RECORD_NAMESPACES:
return tool->namespaces(tool, event, sample, machine);
case PERF_RECORD_FORK:
return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
......@@ -1494,6 +1496,11 @@ int perf_session__register_idle_thread(struct perf_session *session)
err = -1;
}
if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
pr_err("problem inserting idle task.\n");
err = -1;
}
/* machine__findnew_thread() got the thread, so put it */
thread__put(thread);
return err;
......
......@@ -536,6 +536,46 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
/* --sort cgroup_id */
static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
{
return (int64_t)(right_dev - left_dev);
}
static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
{
return (int64_t)(right_ino - left_ino);
}
static int64_t
sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
{
int64_t ret;
ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
if (ret != 0)
return ret;
return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
left->cgroup_id.ino);
}
static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
char *bf, size_t size,
unsigned int width __maybe_unused)
{
return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
he->cgroup_id.ino);
}
struct sort_entry sort_cgroup_id = {
.se_header = "cgroup id (dev/inode)",
.se_cmp = sort__cgroup_id_cmp,
.se_snprintf = hist_entry__cgroup_id_snprintf,
.se_width_idx = HISTC_CGROUP_ID,
};
/* --sort socket */
static int64_t
......@@ -846,6 +886,9 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
static int64_t
sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
{
if (!left->branch_info || !right->branch_info)
return cmp_null(left->branch_info, right->branch_info);
return left->branch_info->flags.cycles -
right->branch_info->flags.cycles;
}
......@@ -853,6 +896,8 @@ sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
if (!he->branch_info)
return scnprintf(bf, size, "%-.*s", width, "N/A");
if (he->branch_info->flags.cycles == 0)
return repsep_snprintf(bf, size, "%-*s", width, "-");
return repsep_snprintf(bf, size, "%-*hd", width,
......@@ -1459,6 +1504,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
};
#undef DIM
......
......@@ -54,6 +54,11 @@ struct he_stat {
u32 nr_events;
};
struct namespace_id {
u64 dev;
u64 ino;
};
struct hist_entry_diff {
bool computed;
union {
......@@ -91,6 +96,7 @@ struct hist_entry {
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
struct namespace_id cgroup_id;
u64 ip;
u64 transaction;
s32 socket;
......@@ -212,6 +218,7 @@ enum sort_type {
SORT_TRANSACTION,
SORT_TRACE,
SORT_SYM_SIZE,
SORT_CGROUP_ID,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
......
......@@ -7,6 +7,7 @@
#include "thread-stack.h"
#include "util.h"
#include "debug.h"
#include "namespaces.h"
#include "comm.h"
#include "unwind.h"
......@@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
comm_str = malloc(32);
......@@ -66,7 +68,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
void thread__delete(struct thread *thread)
{
struct comm *comm, *tmp;
struct namespaces *namespaces, *tmp_namespaces;
struct comm *comm, *tmp_comm;
BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
......@@ -76,7 +79,12 @@ void thread__delete(struct thread *thread)
map_groups__put(thread->mg);
thread->mg = NULL;
}
list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) {
list_for_each_entry_safe(namespaces, tmp_namespaces,
&thread->namespaces_list, list) {
list_del(&namespaces->list);
namespaces__free(namespaces);
}
list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
list_del(&comm->list);
comm__free(comm);
}
......@@ -104,6 +112,38 @@ void thread__put(struct thread *thread)
}
}
struct namespaces *thread__namespaces(const struct thread *thread)
{
if (list_empty(&thread->namespaces_list))
return NULL;
return list_first_entry(&thread->namespaces_list, struct namespaces, list);
}
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event)
{
struct namespaces *new, *curr = thread__namespaces(thread);
new = namespaces__new(event);
if (!new)
return -ENOMEM;
list_add(&new->list, &thread->namespaces_list);
if (timestamp && curr) {
/*
* setns syscall must have changed few or all the namespaces
* of this thread. Update end time for the namespaces
* previously used.
*/
curr = list_next_entry(new, list);
curr->end_time = timestamp;
}
return 0;
}
struct comm *thread__comm(const struct thread *thread)
{
if (list_empty(&thread->comm_list))
......
......@@ -28,6 +28,7 @@ struct thread {
bool comm_set;
int comm_len;
bool dead; /* if set thread has exited */
struct list_head namespaces_list;
struct list_head comm_list;
u64 db_id;
......@@ -40,6 +41,7 @@ struct thread {
};
struct machine;
struct namespaces;
struct comm;
struct thread *thread__new(pid_t pid, pid_t tid);
......@@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread)
thread->dead = true;
}
struct namespaces *thread__namespaces(const struct thread *thread);
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event);
int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
bool exec);
static inline int thread__set_comm(struct thread *thread, const char *comm,
......
......@@ -40,6 +40,7 @@ struct perf_tool {
event_op mmap,
mmap2,
comm,
namespaces,
fork,
exit,
lost,
......@@ -66,6 +67,7 @@ struct perf_tool {
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
bool namespace_events;
};
#endif /* __PERF_TOOL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment