Commit a69b0ca4 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Fix cloning

Alexander reported that when the 'original' context gets destroyed, no
new clones happen.

This can happen irrespective of the ctx switch optimization, any task
can die, even the parent, and we want to continue monitoring the task
hierarchy until we either close the event or no tasks are left in the
hierarchy.

perf_event_init_context() will attempt to pin the 'parent' context
during clone(). At that point current is the parent, and since current
cannot have exited while executing clone(), its context cannot have
passed through perf_event_exit_task_context(). Therefore
perf_pin_task_context() cannot observe ctx->task == TASK_TOMBSTONE.

However, since inherit_event() does:

	if (parent_event->parent)
		parent_event = parent_event->parent;

it looks at the 'original' event when it does: is_orphaned_event().
This can return true if the context that contains the this event has
passed through perf_event_exit_task_context(). And thus we'll fail to
clone the perf context.

Fix this by adding a new state: STATE_DEAD, which is set by
perf_release() to indicate that the filedesc (or kernel reference) is
dead and there are no observers for our data left.

Only for STATE_DEAD will is_orphaned_event() be true and inhibit
cloning.

STATE_EXIT is otherwise preserved such that is_event_hup() remains
functional and will report when the observed task hierarchy becomes
empty.
Reported-by: default avatarAlexander Shishkin <alexander.shishkin@linux.intel.com>
Tested-by: default avatarAlexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarAlexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dvyukov@google.com
Cc: eranian@google.com
Cc: oleg@redhat.com
Cc: panand@redhat.com
Cc: sasha.levin@oracle.com
Cc: vince@deater.net
Fixes: c6e5b732 ("perf: Synchronously clean up child events")
Link: http://lkml.kernel.org/r/20160224174947.919845295@infradead.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 6f932e5b
...@@ -397,6 +397,7 @@ struct pmu { ...@@ -397,6 +397,7 @@ struct pmu {
* enum perf_event_active_state - the states of a event * enum perf_event_active_state - the states of a event
*/ */
enum perf_event_active_state { enum perf_event_active_state {
PERF_EVENT_STATE_DEAD = -4,
PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_EXIT = -3,
PERF_EVENT_STATE_ERROR = -2, PERF_EVENT_STATE_ERROR = -2,
PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_OFF = -1,
......
...@@ -1645,7 +1645,7 @@ static void perf_group_detach(struct perf_event *event) ...@@ -1645,7 +1645,7 @@ static void perf_group_detach(struct perf_event *event)
static bool is_orphaned_event(struct perf_event *event) static bool is_orphaned_event(struct perf_event *event)
{ {
return event->state == PERF_EVENT_STATE_EXIT; return event->state == PERF_EVENT_STATE_DEAD;
} }
static inline int pmu_filter_match(struct perf_event *event) static inline int pmu_filter_match(struct perf_event *event)
...@@ -1732,7 +1732,6 @@ group_sched_out(struct perf_event *group_event, ...@@ -1732,7 +1732,6 @@ group_sched_out(struct perf_event *group_event,
} }
#define DETACH_GROUP 0x01UL #define DETACH_GROUP 0x01UL
#define DETACH_STATE 0x02UL
/* /*
* Cross CPU call to remove a performance event * Cross CPU call to remove a performance event
...@@ -1752,8 +1751,6 @@ __perf_remove_from_context(struct perf_event *event, ...@@ -1752,8 +1751,6 @@ __perf_remove_from_context(struct perf_event *event,
if (flags & DETACH_GROUP) if (flags & DETACH_GROUP)
perf_group_detach(event); perf_group_detach(event);
list_del_event(event, ctx); list_del_event(event, ctx);
if (flags & DETACH_STATE)
event->state = PERF_EVENT_STATE_EXIT;
if (!ctx->nr_events && ctx->is_active) { if (!ctx->nr_events && ctx->is_active) {
ctx->is_active = 0; ctx->is_active = 0;
...@@ -3772,22 +3769,24 @@ int perf_event_release_kernel(struct perf_event *event) ...@@ -3772,22 +3769,24 @@ int perf_event_release_kernel(struct perf_event *event)
ctx = perf_event_ctx_lock(event); ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx); WARN_ON_ONCE(ctx->parent_ctx);
perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE); perf_remove_from_context(event, DETACH_GROUP);
perf_event_ctx_unlock(event, ctx);
raw_spin_lock_irq(&ctx->lock);
/* /*
* At this point we must have event->state == PERF_EVENT_STATE_EXIT, * Mark this even as STATE_DEAD, there is no external reference to it
* either from the above perf_remove_from_context() or through * anymore.
* perf_event_exit_event().
* *
* Therefore, anybody acquiring event->child_mutex after the below * Anybody acquiring event->child_mutex after the below loop _must_
* loop _must_ also see this, most importantly inherit_event() which * also see this, most importantly inherit_event() which will avoid
* will avoid placing more children on the list. * placing more children on the list.
* *
* Thus this guarantees that we will in fact observe and kill _ALL_ * Thus this guarantees that we will in fact observe and kill _ALL_
* child events. * child events.
*/ */
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT); event->state = PERF_EVENT_STATE_DEAD;
raw_spin_unlock_irq(&ctx->lock);
perf_event_ctx_unlock(event, ctx);
again: again:
mutex_lock(&event->child_mutex); mutex_lock(&event->child_mutex);
...@@ -4000,7 +3999,7 @@ static bool is_event_hup(struct perf_event *event) ...@@ -4000,7 +3999,7 @@ static bool is_event_hup(struct perf_event *event)
{ {
bool no_children; bool no_children;
if (event->state != PERF_EVENT_STATE_EXIT) if (event->state > PERF_EVENT_STATE_EXIT)
return false; return false;
mutex_lock(&event->child_mutex); mutex_lock(&event->child_mutex);
...@@ -8727,7 +8726,7 @@ perf_event_exit_event(struct perf_event *child_event, ...@@ -8727,7 +8726,7 @@ perf_event_exit_event(struct perf_event *child_event,
if (parent_event) if (parent_event)
perf_group_detach(child_event); perf_group_detach(child_event);
list_del_event(child_event, child_ctx); list_del_event(child_event, child_ctx);
child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */ child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
raw_spin_unlock_irq(&child_ctx->lock); raw_spin_unlock_irq(&child_ctx->lock);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment