Commit a63eaf34 authored by Paul Mackerras's avatar Paul Mackerras Committed by Ingo Molnar

perf_counter: Dynamically allocate tasks' perf_counter_context struct

This replaces the struct perf_counter_context in the task_struct with
a pointer to a dynamically allocated perf_counter_context struct.  The
main reason for doing is this is to allow us to transfer a
perf_counter_context from one task to another when we do lazy PMU
switching in a later patch.

This has a few side-benefits: the task_struct becomes a little smaller,
we save some memory because only tasks that have perf_counters attached
get a perf_counter_context allocated for them, and we can remove the
inclusion of <linux/perf_counter.h> in sched.h, meaning that we don't
end up recompiling nearly everything whenever perf_counter.h changes.

The perf_counter_context structures are reference-counted and freed
when the last reference is dropped.  A context can have references
from its task and the counters on its task.  Counters can outlive the
task so it is possible that a context will be freed well after its
task has exited.

Contexts are allocated on fork if the parent had a context, or
otherwise the first time that a per-task counter is created on a task.
In the latter case, we set the context pointer in the task struct
locklessly using an atomic compare-and-exchange operation in case we
raced with some other task in creating a context for the subject task.

This also removes the task pointer from the perf_counter struct.  The
task pointer was not used anywhere and would make it harder to move a
context from one task to another.  Anything that needed to know which
task a counter was attached to was already using counter->ctx->task.

The __perf_counter_init_context function moves up in perf_counter.c
so that it can be called from find_get_context, and now initializes
the refcount, but is otherwise unchanged.

We were potentially calling list_del_counter twice: once from
__perf_counter_exit_task when the task exits and once from
__perf_counter_remove_from_context when the counter's fd gets closed.
This adds a check in list_del_counter so it doesn't do anything if
the counter has already been removed from the lists.

Since perf_counter_task_sched_in doesn't do anything if the task doesn't
have a context, and leaves cpuctx->task_ctx = NULL, this adds code to
__perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in
the case where the current task adds the first counter to itself and
thus creates a context for itself.

This also adds similar code to __perf_counter_enable to handle a
similar situation which can arise when the counters have been disabled
using prctl; that also leaves cpuctx->task_ctx = NULL.

[ Impact: refactor counter context management to prepare for new feature ]
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 34adc806
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
* Mikael Pettersson : PM converted to driver model. * Mikael Pettersson : PM converted to driver model.
*/ */
#include <linux/perf_counter.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h> #include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h> #include <linux/acpi_pmtmr.h>
......
...@@ -108,18 +108,6 @@ extern struct group_info init_groups; ...@@ -108,18 +108,6 @@ extern struct group_info init_groups;
extern struct cred init_cred; extern struct cred init_cred;
#ifdef CONFIG_PERF_COUNTERS
# define INIT_PERF_COUNTERS(tsk) \
.perf_counter_ctx.counter_list = \
LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \
.perf_counter_ctx.event_list = \
LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \
.perf_counter_ctx.lock = \
__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
#else
# define INIT_PERF_COUNTERS(tsk)
#endif
/* /*
* INIT_TASK is used to set up the first task table, touch at * INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB) * your own risk!. Base=0, limit=0x1fffff (=2MB)
...@@ -183,7 +171,6 @@ extern struct cred init_cred; ...@@ -183,7 +171,6 @@ extern struct cred init_cred;
}, \ }, \
.dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \
INIT_IDS \ INIT_IDS \
INIT_PERF_COUNTERS(tsk) \
INIT_TRACE_IRQFLAGS \ INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \ INIT_LOCKDEP \
INIT_FTRACE_GRAPH \ INIT_FTRACE_GRAPH \
......
...@@ -449,7 +449,6 @@ struct perf_counter { ...@@ -449,7 +449,6 @@ struct perf_counter {
struct hw_perf_counter hw; struct hw_perf_counter hw;
struct perf_counter_context *ctx; struct perf_counter_context *ctx;
struct task_struct *task;
struct file *filp; struct file *filp;
struct perf_counter *parent; struct perf_counter *parent;
...@@ -498,7 +497,6 @@ struct perf_counter { ...@@ -498,7 +497,6 @@ struct perf_counter {
* Used as a container for task counters and CPU counters as well: * Used as a container for task counters and CPU counters as well:
*/ */
struct perf_counter_context { struct perf_counter_context {
#ifdef CONFIG_PERF_COUNTERS
/* /*
* Protect the states of the counters in the list, * Protect the states of the counters in the list,
* nr_active, and the list: * nr_active, and the list:
...@@ -516,6 +514,7 @@ struct perf_counter_context { ...@@ -516,6 +514,7 @@ struct perf_counter_context {
int nr_counters; int nr_counters;
int nr_active; int nr_active;
int is_active; int is_active;
atomic_t refcount;
struct task_struct *task; struct task_struct *task;
/* /*
...@@ -523,7 +522,6 @@ struct perf_counter_context { ...@@ -523,7 +522,6 @@ struct perf_counter_context {
*/ */
u64 time; u64 time;
u64 timestamp; u64 timestamp;
#endif
}; };
/** /**
......
...@@ -71,7 +71,6 @@ struct sched_param { ...@@ -71,7 +71,6 @@ struct sched_param {
#include <linux/path.h> #include <linux/path.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/perf_counter.h>
#include <linux/pid.h> #include <linux/pid.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/topology.h> #include <linux/topology.h>
...@@ -99,6 +98,7 @@ struct robust_list_head; ...@@ -99,6 +98,7 @@ struct robust_list_head;
struct bio; struct bio;
struct bts_tracer; struct bts_tracer;
struct fs_struct; struct fs_struct;
struct perf_counter_context;
/* /*
* List of flags we want to share for kernel threads, * List of flags we want to share for kernel threads,
...@@ -1387,7 +1387,9 @@ struct task_struct { ...@@ -1387,7 +1387,9 @@ struct task_struct {
struct list_head pi_state_list; struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache; struct futex_pi_state *pi_state_cache;
#endif #endif
struct perf_counter_context perf_counter_ctx; #ifdef CONFIG_PERF_COUNTERS
struct perf_counter_context *perf_counter_ctxp;
#endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct mempolicy *mempolicy; struct mempolicy *mempolicy;
short il_next; short il_next;
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <linux/tracehook.h> #include <linux/tracehook.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/init_task.h> #include <linux/init_task.h>
#include <linux/perf_counter.h>
#include <trace/sched.h> #include <trace/sched.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) ...@@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
#ifdef CONFIG_PERF_COUNTERS #ifdef CONFIG_PERF_COUNTERS
WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); WARN_ON_ONCE(tsk->perf_counter_ctxp);
#endif #endif
trace_sched_process_free(tsk); trace_sched_process_free(tsk);
put_task_struct(tsk); put_task_struct(tsk);
......
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <trace/sched.h> #include <trace/sched.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/perf_counter.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment