Commit e4222673 authored by Hari Bathini's avatar Hari Bathini Committed by Arnaldo Carvalho de Melo

perf: Add PERF_RECORD_NAMESPACES to include namespaces related info

With the advert of container technologies like docker, that depend on
namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for
recording namespaces related info. By recording info for every
namespace, it is left to userspace to take a call on the definition of a
container and trace containers by updating perf tool accordingly.

Each namespace has a combination of device and inode numbers. Though
every namespace has the same device number currently, that may change in
future to avoid the need for a namespace of namespaces. Considering such
possibility, record both device and inode numbers separately for each
namespace.
Signed-off-by: default avatarHari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 3ef5b402
...@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks ...@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void); extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */ /* Callchains */
...@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks ...@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { } static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { } static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline int perf_swevent_get_recursion_context(void) { return -1; }
......
...@@ -344,7 +344,8 @@ struct perf_event_attr { ...@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */ use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */ context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */ write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36; namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union { union {
__u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_events; /* wakeup every n events */
...@@ -610,6 +611,23 @@ struct perf_event_header { ...@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size; __u16 size;
}; };
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type { enum perf_event_type {
/* /*
...@@ -862,6 +880,18 @@ enum perf_event_type { ...@@ -862,6 +880,18 @@ enum perf_event_type {
*/ */
PERF_RECORD_SWITCH_CPU_WIDE = 15, PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */ PERF_RECORD_MAX, /* non-ABI */
}; };
......
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/sched/clock.h> #include <linux/sched/clock.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h" #include "internal.h"
...@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); ...@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly; static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly; static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly; static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly; static atomic_t nr_switch_events __read_mostly;
...@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) ...@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events); atomic_dec(&nr_mmap_events);
if (event->attr.comm) if (event->attr.comm)
atomic_dec(&nr_comm_events); atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task) if (event->attr.task)
atomic_dec(&nr_task_events); atomic_dec(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
...@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, ...@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task) void perf_event_fork(struct task_struct *task)
{ {
perf_event_task(task, NULL, 1); perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
} }
/* /*
...@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec) ...@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event); perf_event_comm_event(&comm_event);
} }
/*
* namespaces tracking
*/
struct perf_namespaces_event {
struct task_struct *task;
struct {
struct perf_event_header header;
u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};
static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}
static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_namespaces_match(event))
return;
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);
perf_output_put(&handle, namespaces_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;
error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}
void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;
if (!atomic_read(&nr_namespaces_events))
return;
namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};
ns_link_info = namespaces_event.event_id.link_info;
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);
#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif
perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}
/* /*
* mmap tracking * mmap tracking
*/ */
...@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event) ...@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events); atomic_inc(&nr_mmap_events);
if (event->attr.comm) if (event->attr.comm)
atomic_inc(&nr_comm_events); atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task) if (event->attr.task)
atomic_inc(&nr_task_events); atomic_inc(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
...@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES; return -EACCES;
} }
if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
if (attr.freq) { if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate) if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL; return -EINVAL;
......
...@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) ...@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
} }
} }
perf_event_namespaces(current);
bad_unshare_cleanup_cred: bad_unshare_cleanup_cred:
if (new_cred) if (new_cred)
put_cred(new_cred); put_cred(new_cred);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep; static struct kmem_cache *nsproxy_cachep;
...@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) ...@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out; goto out;
} }
switch_task_namespaces(tsk, new_nsproxy); switch_task_namespaces(tsk, new_nsproxy);
perf_event_namespaces(tsk);
out: out:
fput(file); fput(file);
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment