Commit ddebb8b6 authored by John Levon's avatar John Levon Committed by Linus Torvalds

[PATCH] improve OProfile on many-way systems

Anton prompted me to get this patch merged.  It changes the core buffer
sync algorithm of OProfile to avoid global locks wherever possible.  Anton
tested an earlier version of this patch with some success.  I've lightly
tested this applied against 2.6.8.1-mm3 on my two-way machine.

The changes also have the happy side-effect of losing less samples after
munmap operations, and removing the blind spot of tasks exiting inside the
kernel.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6e7ca99d
...@@ -32,61 +32,65 @@ ...@@ -32,61 +32,65 @@
#include "cpu_buffer.h" #include "cpu_buffer.h"
#include "buffer_sync.h" #include "buffer_sync.h"
#define DEFAULT_EXPIRE (HZ / 4) static LIST_HEAD(dying_tasks);
static LIST_HEAD(dead_tasks);
static void wq_sync_buffers(void *); cpumask_t marked_cpus = CPU_MASK_NONE;
static DECLARE_WORK(sync_wq, wq_sync_buffers, NULL); static spinlock_t task_mortuary = SPIN_LOCK_UNLOCKED;
void process_task_mortuary(void);
static struct timer_list sync_timer;
static void timer_ping(unsigned long data);
static void sync_cpu_buffers(void);
/* We must make sure to process every entry in the CPU buffers /* Take ownership of the task struct and place it on the
* before a task got the PF_EXITING flag, otherwise we will hold * list for processing. Only after two full buffer syncs
* references to a possibly freed task_struct. We are safe with * does the task eventually get freed, because by then
* samples past the PF_EXITING point in do_exit(), because we * we are sure we will not reference it again.
* explicitly check for that in cpu_buffer.c
*/ */
static int exit_task_notify(struct notifier_block * self, unsigned long val, void * data) static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
{ {
sync_cpu_buffers(); struct task_struct * task = (struct task_struct *)data;
return 0; spin_lock(&task_mortuary);
list_add(&task->tasks, &dying_tasks);
spin_unlock(&task_mortuary);
return NOTIFY_OK;
} }
/* There are two cases of tasks modifying task->mm->mmap list we
* must concern ourselves with. First, when a task is about to /* The task is on its way out. A sync of the buffer means we can catch
* exit (exit_mmap()), we should process the buffer to deal with * any remaining samples for this task.
* any samples in the CPU buffer, before we lose the ->mmap information
* we need. It is vital to get this case correct, otherwise we can
* end up trying to access a freed task_struct.
*/ */
static int mm_notify(struct notifier_block * self, unsigned long val, void * data) static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data)
{ {
sync_cpu_buffers(); /* To avoid latency problems, we only process the current CPU,
return 0; * hoping that most samples for the task are on this CPU
*/
sync_buffer(smp_processor_id());
return 0;
} }
/* Second, a task may unmap (part of) an executable mmap, /* The task is about to try a do_munmap(). We peek at what it's going to
* so we want to process samples before that happens too. This is merely * do, and if it's an executable region, process the samples first, so
* a QOI issue not a correctness one. * we don't lose any. This does not have to be exact, it's a QoI issue
* only.
*/ */
static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) static int munmap_notify(struct notifier_block * self, unsigned long val, void * data)
{ {
/* Note that we cannot sync the buffers directly, because we might end up unsigned long addr = (unsigned long)data;
* taking the the mmap_sem that we hold now inside of event_buffer_read() struct mm_struct * mm = current->mm;
* on a page fault, whilst holding buffer_sem - deadlock. struct vm_area_struct * mpnt;
*
* This would mean a threaded reader of the event buffer, but we should down_read(&mm->mmap_sem);
* prevent it anyway.
* mpnt = find_vma(mm, addr);
* Delaying the work in a context that doesn't hold the mmap_sem means if (mpnt && mpnt->vm_file && (mpnt->vm_flags & VM_EXEC)) {
* that we won't lose samples from other mappings that current() may up_read(&mm->mmap_sem);
* have. Note that either way, we lose any pending samples for what is /* To avoid latency problems, we only process the current CPU,
* being unmapped. * hoping that most samples for the task are on this CPU
*/ */
schedule_work(&sync_wq); sync_buffer(smp_processor_id());
return 0;
}
up_read(&mm->mmap_sem);
return 0; return 0;
} }
...@@ -100,7 +104,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v ...@@ -100,7 +104,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v
if (val != MODULE_STATE_COMING) if (val != MODULE_STATE_COMING)
return 0; return 0;
sync_cpu_buffers(); /* FIXME: should we process all CPU buffers ? */
down(&buffer_sem); down(&buffer_sem);
add_event_entry(ESCAPE_CODE); add_event_entry(ESCAPE_CODE);
add_event_entry(MODULE_LOADED_CODE); add_event_entry(MODULE_LOADED_CODE);
...@@ -110,16 +114,16 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v ...@@ -110,16 +114,16 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v
} }
static struct notifier_block exit_task_nb = { static struct notifier_block task_free_nb = {
.notifier_call = exit_task_notify, .notifier_call = task_free_notify,
}; };
static struct notifier_block exec_unmap_nb = { static struct notifier_block task_exit_nb = {
.notifier_call = munmap_notify, .notifier_call = task_exit_notify,
}; };
static struct notifier_block exit_mmap_nb = { static struct notifier_block munmap_nb = {
.notifier_call = mm_notify, .notifier_call = munmap_notify,
}; };
static struct notifier_block module_load_nb = { static struct notifier_block module_load_nb = {
...@@ -127,11 +131,12 @@ static struct notifier_block module_load_nb = { ...@@ -127,11 +131,12 @@ static struct notifier_block module_load_nb = {
}; };
static void end_sync_timer(void) static void end_sync(void)
{ {
del_timer_sync(&sync_timer); end_cpu_timers();
/* timer might have queued work, make sure it's completed. */ /* make sure we don't leak task structs */
flush_scheduled_work(); process_task_mortuary();
process_task_mortuary();
} }
...@@ -139,18 +144,15 @@ int sync_start(void) ...@@ -139,18 +144,15 @@ int sync_start(void)
{ {
int err; int err;
init_timer(&sync_timer); start_cpu_timers();
sync_timer.function = timer_ping;
sync_timer.expires = jiffies + DEFAULT_EXPIRE;
add_timer(&sync_timer);
err = profile_event_register(EXIT_TASK, &exit_task_nb); err = task_handoff_register(&task_free_nb);
if (err) if (err)
goto out1; goto out1;
err = profile_event_register(EXIT_MMAP, &exit_mmap_nb); err = profile_event_register(PROFILE_TASK_EXIT, &task_exit_nb);
if (err) if (err)
goto out2; goto out2;
err = profile_event_register(EXEC_UNMAP, &exec_unmap_nb); err = profile_event_register(PROFILE_MUNMAP, &munmap_nb);
if (err) if (err)
goto out3; goto out3;
err = register_module_notifier(&module_load_nb); err = register_module_notifier(&module_load_nb);
...@@ -160,13 +162,13 @@ int sync_start(void) ...@@ -160,13 +162,13 @@ int sync_start(void)
out: out:
return err; return err;
out4: out4:
profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb); profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
out3: out3:
profile_event_unregister(EXIT_MMAP, &exit_mmap_nb); profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
out2: out2:
profile_event_unregister(EXIT_TASK, &exit_task_nb); task_handoff_unregister(&task_free_nb);
out1: out1:
end_sync_timer(); end_sync();
goto out; goto out;
} }
...@@ -174,10 +176,10 @@ int sync_start(void) ...@@ -174,10 +176,10 @@ int sync_start(void)
void sync_stop(void) void sync_stop(void)
{ {
unregister_module_notifier(&module_load_nb); unregister_module_notifier(&module_load_nb);
profile_event_unregister(EXIT_TASK, &exit_task_nb); profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
profile_event_unregister(EXIT_MMAP, &exit_mmap_nb); profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb); task_handoff_unregister(&task_free_nb);
end_sync_timer(); end_sync();
} }
...@@ -417,24 +419,80 @@ static void increment_tail(struct oprofile_cpu_buffer * b) ...@@ -417,24 +419,80 @@ static void increment_tail(struct oprofile_cpu_buffer * b)
} }
/* Move tasks along towards death. Any tasks on dead_tasks
* will definitely have no remaining references in any
* CPU buffers at this point, because we use two lists,
* and to have reached the list, it must have gone through
* one full sync already.
*/
void process_task_mortuary(void)
{
struct list_head * pos;
struct list_head * pos2;
struct task_struct * task;
spin_lock(&task_mortuary);
list_for_each_safe(pos, pos2, &dead_tasks) {
task = list_entry(pos, struct task_struct, tasks);
list_del(&task->tasks);
free_task(task);
}
list_for_each_safe(pos, pos2, &dying_tasks) {
task = list_entry(pos, struct task_struct, tasks);
list_del(&task->tasks);
list_add_tail(&task->tasks, &dead_tasks);
}
spin_unlock(&task_mortuary);
}
static void mark_done(int cpu)
{
int i;
cpu_set(cpu, marked_cpus);
for_each_online_cpu(i) {
if (!cpu_isset(i, marked_cpus))
return;
}
/* All CPUs have been processed at least once,
* we can process the mortuary once
*/
process_task_mortuary();
cpus_clear(marked_cpus);
}
/* Sync one of the CPU's buffers into the global event buffer. /* Sync one of the CPU's buffers into the global event buffer.
* Here we need to go through each batch of samples punctuated * Here we need to go through each batch of samples punctuated
* by context switch notes, taking the task's mmap_sem and doing * by context switch notes, taking the task's mmap_sem and doing
* lookup in task->mm->mmap to convert EIP into dcookie/offset * lookup in task->mm->mmap to convert EIP into dcookie/offset
* value. * value.
*/ */
static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf) void sync_buffer(int cpu)
{ {
struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
struct mm_struct *mm = NULL; struct mm_struct *mm = NULL;
struct task_struct * new; struct task_struct * new;
unsigned long cookie = 0; unsigned long cookie = 0;
int in_kernel = 1; int in_kernel = 1;
unsigned int i; unsigned int i;
unsigned long available;
down(&buffer_sem);
add_cpu_switch(cpu);
/* Remember, only we can modify tail_pos */ /* Remember, only we can modify tail_pos */
unsigned long const available = get_slots(cpu_buf); available = get_slots(cpu_buf);
for (i=0; i < available; ++i) { for (i=0; i < available; ++i) {
struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
...@@ -462,50 +520,8 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf) ...@@ -462,50 +520,8 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
increment_tail(cpu_buf); increment_tail(cpu_buf);
} }
release_mm(mm); release_mm(mm);
}
/* Process each CPU's local buffer into the global
* event buffer.
*/
static void sync_cpu_buffers(void)
{
int i;
down(&buffer_sem); mark_done(cpu);
for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * cpu_buf;
if (!cpu_possible(i))
continue;
cpu_buf = &cpu_buffer[i];
add_cpu_switch(i);
sync_buffer(cpu_buf);
}
up(&buffer_sem); up(&buffer_sem);
mod_timer(&sync_timer, jiffies + DEFAULT_EXPIRE);
}
static void wq_sync_buffers(void * data)
{
sync_cpu_buffers();
}
/* It is possible that we could have no munmap() or
* other events for a period of time. This will lead
* the CPU buffers to overflow and lose samples and
* context switches. We try to reduce the problem
* by timing out when nothing happens for a while.
*/
static void timer_ping(unsigned long data)
{
schedule_work(&sync_wq);
/* timer is re-added by the scheduled task */
} }
...@@ -16,4 +16,7 @@ int sync_start(void); ...@@ -16,4 +16,7 @@ int sync_start(void);
/* remove the hooks */ /* remove the hooks */
void sync_stop(void); void sync_stop(void);
/* sync the given CPU's buffer */
void sync_buffer(int cpu);
#endif /* OPROFILE_BUFFER_SYNC_H */ #endif /* OPROFILE_BUFFER_SYNC_H */
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Each CPU has a local buffer that stores PC value/event * Each CPU has a local buffer that stores PC value/event
* pairs. We also log context switches when we notice them. * pairs. We also log context switches when we notice them.
* Eventually each CPU's buffer is processed into the global * Eventually each CPU's buffer is processed into the global
* event buffer by sync_cpu_buffers(). * event buffer by sync_buffer().
* *
* We use a local buffer for two reasons: an NMI or similar * We use a local buffer for two reasons: an NMI or similar
* interrupt cannot synchronise, and high sampling rates * interrupt cannot synchronise, and high sampling rates
...@@ -22,21 +22,24 @@ ...@@ -22,21 +22,24 @@
#include <linux/errno.h> #include <linux/errno.h>
#include "cpu_buffer.h" #include "cpu_buffer.h"
#include "buffer_sync.h"
#include "oprof.h" #include "oprof.h"
struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned; struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
static void wq_sync_buffer(void *);
static void timer_ping(unsigned long data);
#define DEFAULT_TIMER_EXPIRE (HZ / 2)
int timers_enabled;
static void __free_cpu_buffers(int num) static void __free_cpu_buffers(int num)
{ {
int i; int i;
for (i=0; i < num; ++i) { for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i]; if (!cpu_online(i))
if (!cpu_possible(i))
continue; continue;
vfree(cpu_buffer[i].buffer);
vfree(b->buffer);
} }
} }
...@@ -47,12 +50,12 @@ int alloc_cpu_buffers(void) ...@@ -47,12 +50,12 @@ int alloc_cpu_buffers(void)
unsigned long buffer_size = fs_cpu_buffer_size; unsigned long buffer_size = fs_cpu_buffer_size;
for (i=0; i < NR_CPUS; ++i) { for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i]; struct oprofile_cpu_buffer * b = &cpu_buffer[i];
if (!cpu_possible(i)) if (!cpu_online(i))
continue; continue;
b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size); b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size);
if (!b->buffer) if (!b->buffer)
goto fail; goto fail;
...@@ -64,9 +67,15 @@ int alloc_cpu_buffers(void) ...@@ -64,9 +67,15 @@ int alloc_cpu_buffers(void)
b->head_pos = 0; b->head_pos = 0;
b->sample_received = 0; b->sample_received = 0;
b->sample_lost_overflow = 0; b->sample_lost_overflow = 0;
b->sample_lost_task_exit = 0; b->cpu = i;
init_timer(&b->timer);
b->timer.function = timer_ping;
b->timer.data = i;
b->timer.expires = jiffies + DEFAULT_TIMER_EXPIRE;
INIT_WORK(&b->work, wq_sync_buffer, b);
} }
return 0; return 0;
fail: fail:
__free_cpu_buffers(i); __free_cpu_buffers(i);
return -ENOMEM; return -ENOMEM;
...@@ -79,6 +88,42 @@ void free_cpu_buffers(void) ...@@ -79,6 +88,42 @@ void free_cpu_buffers(void)
} }
void start_cpu_timers(void)
{
int i;
timers_enabled = 1;
for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i];
if (!cpu_online(i))
continue;
add_timer_on(&b->timer, i);
}
}
void end_cpu_timers(void)
{
int i;
timers_enabled = 0;
for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i];
if (!cpu_online(i))
continue;
del_timer_sync(&b->timer);
}
flush_scheduled_work();
}
/* compute number of available slots in cpu_buffer queue */ /* compute number of available slots in cpu_buffer queue */
static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b) static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
{ {
...@@ -145,21 +190,9 @@ void oprofile_add_sample(unsigned long eip, unsigned int is_kernel, ...@@ -145,21 +190,9 @@ void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
/* notice a task switch */ /* notice a task switch */
if (cpu_buf->last_task != task) { if (cpu_buf->last_task != task) {
cpu_buf->last_task = task; cpu_buf->last_task = task;
if (!(task->flags & PF_EXITING)) { cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL;
cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL; cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task;
cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task; increment_head(cpu_buf);
increment_head(cpu_buf);
}
}
/* If the task is exiting it's not safe to take a sample
* as the task_struct is about to be freed. We can't just
* notify at release_task() time because of CLONE_DETACHED
* tasks that release_task() themselves.
*/
if (task->flags & PF_EXITING) {
cpu_buf->sample_lost_task_exit++;
return;
} }
cpu_buf->buffer[cpu_buf->head_pos].eip = eip; cpu_buf->buffer[cpu_buf->head_pos].eip = eip;
...@@ -178,3 +211,36 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf) ...@@ -178,3 +211,36 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
cpu_buf->last_is_kernel = -1; cpu_buf->last_is_kernel = -1;
cpu_buf->last_task = NULL; cpu_buf->last_task = NULL;
} }
/* FIXME: not guaranteed to be on our CPU */
static void wq_sync_buffer(void * data)
{
struct oprofile_cpu_buffer * b = (struct oprofile_cpu_buffer *)data;
if (b->cpu != smp_processor_id()) {
printk("WQ on CPU%d, prefer CPU%d\n",
smp_processor_id(), b->cpu);
}
sync_buffer(b->cpu);
/* don't re-add the timer if we're shutting down */
if (timers_enabled) {
del_timer_sync(&b->timer);
add_timer_on(&b->timer, b->cpu);
}
}
/* This serves to avoid cpu buffer overflow, and makes sure
* the task mortuary progresses
*/
static void timer_ping(unsigned long data)
{
struct oprofile_cpu_buffer * b = &cpu_buffer[data];
if (b->cpu != smp_processor_id()) {
printk("Timer on CPU%d, prefer CPU%d\n",
smp_processor_id(), b->cpu);
}
schedule_work(&b->work);
/* work will re-enable our timer */
}
...@@ -12,15 +12,18 @@ ...@@ -12,15 +12,18 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/cache.h> #include <linux/cache.h>
struct task_struct; struct task_struct;
/* allocate a sample buffer for each CPU */
int alloc_cpu_buffers(void); int alloc_cpu_buffers(void);
void free_cpu_buffers(void); void free_cpu_buffers(void);
void start_cpu_timers(void);
void end_cpu_timers(void);
/* CPU buffer is composed of such entries (which are /* CPU buffer is composed of such entries (which are
* also used for context switch notes) * also used for context switch notes)
*/ */
...@@ -38,11 +41,13 @@ struct oprofile_cpu_buffer { ...@@ -38,11 +41,13 @@ struct oprofile_cpu_buffer {
struct op_sample * buffer; struct op_sample * buffer;
unsigned long sample_received; unsigned long sample_received;
unsigned long sample_lost_overflow; unsigned long sample_lost_overflow;
unsigned long sample_lost_task_exit; int cpu;
struct timer_list timer;
struct work_struct work;
} ____cacheline_aligned; } ____cacheline_aligned;
extern struct oprofile_cpu_buffer cpu_buffer[]; extern struct oprofile_cpu_buffer cpu_buffer[];
void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf); void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
#endif /* OPROFILE_CPU_BUFFER_H */ #endif /* OPROFILE_CPU_BUFFER_H */
...@@ -29,7 +29,6 @@ void oprofile_reset_stats(void) ...@@ -29,7 +29,6 @@ void oprofile_reset_stats(void)
cpu_buf = &cpu_buffer[i]; cpu_buf = &cpu_buffer[i];
cpu_buf->sample_received = 0; cpu_buf->sample_received = 0;
cpu_buf->sample_lost_overflow = 0; cpu_buf->sample_lost_overflow = 0;
cpu_buf->sample_lost_task_exit = 0;
} }
atomic_set(&oprofile_stats.sample_lost_no_mm, 0); atomic_set(&oprofile_stats.sample_lost_no_mm, 0);
...@@ -66,8 +65,6 @@ void oprofile_create_stats_files(struct super_block * sb, struct dentry * root) ...@@ -66,8 +65,6 @@ void oprofile_create_stats_files(struct super_block * sb, struct dentry * root)
&cpu_buf->sample_received); &cpu_buf->sample_received);
oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow", oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow",
&cpu_buf->sample_lost_overflow); &cpu_buf->sample_lost_overflow);
oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_task_exit",
&cpu_buf->sample_lost_task_exit);
} }
oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm", oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm",
......
...@@ -26,9 +26,8 @@ void create_prof_cpu_mask(struct proc_dir_entry *); ...@@ -26,9 +26,8 @@ void create_prof_cpu_mask(struct proc_dir_entry *);
#endif #endif
enum profile_type { enum profile_type {
EXIT_TASK, PROFILE_TASK_EXIT,
EXIT_MMAP, PROFILE_MUNMAP
EXEC_UNMAP
}; };
#ifdef CONFIG_PROFILING #ifdef CONFIG_PROFILING
...@@ -38,16 +37,20 @@ struct task_struct; ...@@ -38,16 +37,20 @@ struct task_struct;
struct mm_struct; struct mm_struct;
/* task is in do_exit() */ /* task is in do_exit() */
void profile_exit_task(struct task_struct * task); void profile_task_exit(struct task_struct * task);
/* change of vma mappings */ /* task is dead, free task struct ? Returns 1 if
void profile_exec_unmap(struct mm_struct * mm); * the task was taken, 0 if the task should be freed.
*/
int profile_handoff_task(struct task_struct * task);
/* exit of all vmas for a task */ /* sys_munmap */
void profile_exit_mmap(struct mm_struct * mm); void profile_munmap(unsigned long addr);
int profile_event_register(enum profile_type, struct notifier_block * n); int task_handoff_register(struct notifier_block * n);
int task_handoff_unregister(struct notifier_block * n);
int profile_event_register(enum profile_type, struct notifier_block * n);
int profile_event_unregister(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n);
int register_profile_notifier(struct notifier_block * nb); int register_profile_notifier(struct notifier_block * nb);
...@@ -60,6 +63,16 @@ void profile_hook(struct pt_regs * regs); ...@@ -60,6 +63,16 @@ void profile_hook(struct pt_regs * regs);
#else #else
static inline int task_handoff_register(struct notifier_block * n)
{
return -ENOSYS;
}
static inline int task_handoff_unregister(struct notifier_block * n)
{
return -ENOSYS;
}
static inline int profile_event_register(enum profile_type t, struct notifier_block * n) static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
{ {
return -ENOSYS; return -ENOSYS;
...@@ -70,9 +83,9 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_ ...@@ -70,9 +83,9 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
return -ENOSYS; return -ENOSYS;
} }
#define profile_exit_task(a) do { } while (0) #define profile_task_exit(a) do { } while (0)
#define profile_exec_unmap(a) do { } while (0) #define profile_handoff_task(a) (0)
#define profile_exit_mmap(a) do { } while (0) #define profile_munmap(a) do { } while (0)
static inline int register_profile_notifier(struct notifier_block * nb) static inline int register_profile_notifier(struct notifier_block * nb)
{ {
......
...@@ -578,6 +578,7 @@ static inline pid_t process_group(struct task_struct *tsk) ...@@ -578,6 +578,7 @@ static inline pid_t process_group(struct task_struct *tsk)
return tsk->signal->pgrp; return tsk->signal->pgrp;
} }
extern void free_task(struct task_struct *tsk);
extern void __put_task_struct(struct task_struct *tsk); extern void __put_task_struct(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
#define put_task_struct(tsk) \ #define put_task_struct(tsk) \
......
...@@ -787,6 +787,8 @@ asmlinkage NORET_TYPE void do_exit(long code) ...@@ -787,6 +787,8 @@ asmlinkage NORET_TYPE void do_exit(long code)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
profile_task_exit(tsk);
if (unlikely(in_interrupt())) if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!"); panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid)) if (unlikely(!tsk->pid))
...@@ -803,8 +805,6 @@ asmlinkage NORET_TYPE void do_exit(long code) ...@@ -803,8 +805,6 @@ asmlinkage NORET_TYPE void do_exit(long code)
current->comm, current->pid, current->comm, current->pid,
preempt_count()); preempt_count());
profile_exit_task(tsk);
if (unlikely(current->ptrace & PT_TRACE_EXIT)) { if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
current->ptrace_message = code; current->ptrace_message = code;
ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/profile.h>
#include <linux/rmap.h> #include <linux/rmap.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -76,11 +77,12 @@ int nr_processes(void) ...@@ -76,11 +77,12 @@ int nr_processes(void)
static kmem_cache_t *task_struct_cachep; static kmem_cache_t *task_struct_cachep;
#endif #endif
static void free_task(struct task_struct *tsk) void free_task(struct task_struct *tsk)
{ {
free_thread_info(tsk->thread_info); free_thread_info(tsk->thread_info);
free_task_struct(tsk); free_task_struct(tsk);
} }
EXPORT_SYMBOL(free_task);
void __put_task_struct(struct task_struct *tsk) void __put_task_struct(struct task_struct *tsk)
{ {
...@@ -93,7 +95,9 @@ void __put_task_struct(struct task_struct *tsk) ...@@ -93,7 +95,9 @@ void __put_task_struct(struct task_struct *tsk)
security_task_free(tsk); security_task_free(tsk);
free_uid(tsk->user); free_uid(tsk->user);
put_group_info(tsk->group_info); put_group_info(tsk->group_info);
free_task(tsk);
if (!profile_handoff_task(tsk))
free_task(tsk);
} }
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
......
...@@ -53,31 +53,54 @@ void __init profile_init(void) ...@@ -53,31 +53,54 @@ void __init profile_init(void)
#ifdef CONFIG_PROFILING #ifdef CONFIG_PROFILING
static DECLARE_RWSEM(profile_rwsem); static DECLARE_RWSEM(profile_rwsem);
static struct notifier_block * exit_task_notifier; static rwlock_t handoff_lock = RW_LOCK_UNLOCKED;
static struct notifier_block * exit_mmap_notifier; static struct notifier_block * task_exit_notifier;
static struct notifier_block * exec_unmap_notifier; static struct notifier_block * task_free_notifier;
static struct notifier_block * munmap_notifier;
void profile_exit_task(struct task_struct * task) void profile_task_exit(struct task_struct * task)
{ {
down_read(&profile_rwsem); down_read(&profile_rwsem);
notifier_call_chain(&exit_task_notifier, 0, task); notifier_call_chain(&task_exit_notifier, 0, task);
up_read(&profile_rwsem); up_read(&profile_rwsem);
} }
void profile_exit_mmap(struct mm_struct * mm) int profile_handoff_task(struct task_struct * task)
{ {
down_read(&profile_rwsem); int ret;
notifier_call_chain(&exit_mmap_notifier, 0, mm); read_lock(&handoff_lock);
up_read(&profile_rwsem); ret = notifier_call_chain(&task_free_notifier, 0, task);
read_unlock(&handoff_lock);
return (ret == NOTIFY_OK) ? 1 : 0;
} }
void profile_exec_unmap(struct mm_struct * mm) void profile_munmap(unsigned long addr)
{ {
down_read(&profile_rwsem); down_read(&profile_rwsem);
notifier_call_chain(&exec_unmap_notifier, 0, mm); notifier_call_chain(&munmap_notifier, 0, (void *)addr);
up_read(&profile_rwsem); up_read(&profile_rwsem);
} }
int task_handoff_register(struct notifier_block * n)
{
int err = -EINVAL;
write_lock(&handoff_lock);
err = notifier_chain_register(&task_free_notifier, n);
write_unlock(&handoff_lock);
return err;
}
int task_handoff_unregister(struct notifier_block * n)
{
int err = -EINVAL;
write_lock(&handoff_lock);
err = notifier_chain_unregister(&task_free_notifier, n);
write_unlock(&handoff_lock);
return err;
}
int profile_event_register(enum profile_type type, struct notifier_block * n) int profile_event_register(enum profile_type type, struct notifier_block * n)
{ {
int err = -EINVAL; int err = -EINVAL;
...@@ -85,14 +108,11 @@ int profile_event_register(enum profile_type type, struct notifier_block * n) ...@@ -85,14 +108,11 @@ int profile_event_register(enum profile_type type, struct notifier_block * n)
down_write(&profile_rwsem); down_write(&profile_rwsem);
switch (type) { switch (type) {
case EXIT_TASK: case PROFILE_TASK_EXIT:
err = notifier_chain_register(&exit_task_notifier, n); err = notifier_chain_register(&task_exit_notifier, n);
break; break;
case EXIT_MMAP: case PROFILE_MUNMAP:
err = notifier_chain_register(&exit_mmap_notifier, n); err = notifier_chain_register(&munmap_notifier, n);
break;
case EXEC_UNMAP:
err = notifier_chain_register(&exec_unmap_notifier, n);
break; break;
} }
...@@ -109,14 +129,11 @@ int profile_event_unregister(enum profile_type type, struct notifier_block * n) ...@@ -109,14 +129,11 @@ int profile_event_unregister(enum profile_type type, struct notifier_block * n)
down_write(&profile_rwsem); down_write(&profile_rwsem);
switch (type) { switch (type) {
case EXIT_TASK: case PROFILE_TASK_EXIT:
err = notifier_chain_unregister(&exit_task_notifier, n); err = notifier_chain_unregister(&task_exit_notifier, n);
break;
case EXIT_MMAP:
err = notifier_chain_unregister(&exit_mmap_notifier, n);
break; break;
case EXEC_UNMAP: case PROFILE_MUNMAP:
err = notifier_chain_unregister(&exec_unmap_notifier, n); err = notifier_chain_unregister(&munmap_notifier, n);
break; break;
} }
...@@ -156,6 +173,8 @@ void profile_hook(struct pt_regs * regs) ...@@ -156,6 +173,8 @@ void profile_hook(struct pt_regs * regs)
EXPORT_SYMBOL_GPL(register_profile_notifier); EXPORT_SYMBOL_GPL(register_profile_notifier);
EXPORT_SYMBOL_GPL(unregister_profile_notifier); EXPORT_SYMBOL_GPL(unregister_profile_notifier);
EXPORT_SYMBOL_GPL(task_handoff_register);
EXPORT_SYMBOL_GPL(task_handoff_unregister);
#endif /* CONFIG_PROFILING */ #endif /* CONFIG_PROFILING */
......
...@@ -233,6 +233,8 @@ void add_timer_on(struct timer_list *timer, int cpu) ...@@ -233,6 +233,8 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_unlock_irqrestore(&base->lock, flags); spin_unlock_irqrestore(&base->lock, flags);
} }
EXPORT_SYMBOL(add_timer_on);
/*** /***
* mod_timer - modify a timer's timeout * mod_timer - modify a timer's timeout
* @timer: the timer to be modified * @timer: the timer to be modified
......
...@@ -1650,10 +1650,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) ...@@ -1650,10 +1650,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
if (mpnt->vm_start >= end) if (mpnt->vm_start >= end)
return 0; return 0;
/* Something will probably happen, so notify. */
if (mpnt->vm_file && (mpnt->vm_flags & VM_EXEC))
profile_exec_unmap(mm);
/* /*
* If we need to split any vma, do it now to save pain later. * If we need to split any vma, do it now to save pain later.
* *
...@@ -1696,6 +1692,8 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len) ...@@ -1696,6 +1692,8 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
int ret; int ret;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
profile_munmap(addr);
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
ret = do_munmap(mm, addr, len); ret = do_munmap(mm, addr, len);
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
...@@ -1798,8 +1796,6 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1798,8 +1796,6 @@ void exit_mmap(struct mm_struct *mm)
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long nr_accounted = 0; unsigned long nr_accounted = 0;
profile_exit_mmap(mm);
lru_add_drain(); lru_add_drain();
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment