Commit 220abb7d authored by John Levon's avatar John Levon Committed by Linus Torvalds

[PATCH] oprofile: kernel/user addresses fix

This patch replaces the assumption that > PAGE_OFFSET == kernel address
with testing for user_mode(regs) and inserting switch codes instead.
parent d22d63e3
...@@ -96,10 +96,13 @@ static int athlon_check_ctrs(unsigned int const cpu, ...@@ -96,10 +96,13 @@ static int athlon_check_ctrs(unsigned int const cpu,
{ {
unsigned int low, high; unsigned int low, high;
int i; int i;
unsigned long eip = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
for (i = 0 ; i < NUM_COUNTERS; ++i) { for (i = 0 ; i < NUM_COUNTERS; ++i) {
CTR_READ(low, high, msrs, i); CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) { if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(instruction_pointer(regs), i, cpu); oprofile_add_sample(eip, is_kernel, i, cpu);
CTR_WRITE(reset_value[i], msrs, i); CTR_WRITE(reset_value[i], msrs, i);
return 1; return 1;
} }
......
...@@ -569,6 +569,8 @@ static int p4_check_ctrs(unsigned int const cpu, ...@@ -569,6 +569,8 @@ static int p4_check_ctrs(unsigned int const cpu,
{ {
unsigned long ctr, low, high, stag, real; unsigned long ctr, low, high, stag, real;
int i; int i;
unsigned long eip = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
stag = get_stagger(); stag = get_stagger();
...@@ -599,7 +601,7 @@ static int p4_check_ctrs(unsigned int const cpu, ...@@ -599,7 +601,7 @@ static int p4_check_ctrs(unsigned int const cpu,
CCCR_READ(low, high, real); CCCR_READ(low, high, real);
CTR_READ(ctr, high, real); CTR_READ(ctr, high, real);
if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
oprofile_add_sample(regs->eip, i, cpu); oprofile_add_sample(eip, is_kernel, i, cpu);
CTR_WRITE(reset_value[i], real); CTR_WRITE(reset_value[i], real);
CCCR_CLEAR_OVF(low); CCCR_CLEAR_OVF(low);
CCCR_WRITE(low, high, real); CCCR_WRITE(low, high, real);
...@@ -624,7 +626,8 @@ static void p4_start(struct op_msrs const * const msrs) ...@@ -624,7 +626,8 @@ static void p4_start(struct op_msrs const * const msrs)
stag = get_stagger(); stag = get_stagger();
for (i = 0; i < num_counters; ++i) { for (i = 0; i < num_counters; ++i) {
if (!reset_value[i]) continue; if (!reset_value[i])
continue;
CCCR_READ(low, high, VIRT_CTR(stag, i)); CCCR_READ(low, high, VIRT_CTR(stag, i));
CCCR_SET_ENABLE(low); CCCR_SET_ENABLE(low);
CCCR_WRITE(low, high, VIRT_CTR(stag, i)); CCCR_WRITE(low, high, VIRT_CTR(stag, i));
......
...@@ -90,11 +90,13 @@ static int ppro_check_ctrs(unsigned int const cpu, ...@@ -90,11 +90,13 @@ static int ppro_check_ctrs(unsigned int const cpu,
{ {
unsigned int low, high; unsigned int low, high;
int i; int i;
unsigned long eip = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
for (i = 0 ; i < NUM_COUNTERS; ++i) { for (i = 0 ; i < NUM_COUNTERS; ++i) {
CTR_READ(low, high, msrs, i); CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) { if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(instruction_pointer(regs), i, cpu); oprofile_add_sample(eip, is_kernel, i, cpu);
CTR_WRITE(reset_value[i], msrs, i); CTR_WRITE(reset_value[i], msrs, i);
return 1; return 1;
} }
......
...@@ -20,8 +20,9 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void * ...@@ -20,8 +20,9 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void *
{ {
struct pt_regs * regs = (struct pt_regs *)data; struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
unsigned long eip = instruction_pointer(regs);
oprofile_add_sample(instruction_pointer(regs), 0, cpu); oprofile_add_sample(eip, !user_mode(regs), 0, cpu);
return 0; return 0;
} }
......
...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void * ...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void *
{ {
struct pt_regs * regs = (struct pt_regs *)data; struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
unsigned long pc = regs->iaoq[0];
int is_kernel = !user_mode(regs);
oprofile_add_sample(regs->iaoq[0], 0, cpu); oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0; return 0;
} }
......
...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void * ...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void *
{ {
struct pt_regs * regs = (struct pt_regs *)data; struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
unsigned long pc = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
oprofile_add_sample(instruction_pointer(regs), 0, cpu); oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0; return 0;
} }
......
...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void * ...@@ -19,8 +19,10 @@ static int timer_notify(struct notifier_block * self, unsigned long val, void *
{ {
struct pt_regs * regs = (struct pt_regs *)data; struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
unsigned long pc = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
oprofile_add_sample(instruction_pointer(regs), 0, cpu); oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0; return 0;
} }
......
...@@ -199,8 +199,16 @@ static void add_cpu_switch(int i) ...@@ -199,8 +199,16 @@ static void add_cpu_switch(int i)
last_cookie = ~0UL; last_cookie = ~0UL;
} }
static void add_kernel_ctx_switch(unsigned int in_kernel)
{
add_event_entry(ESCAPE_CODE);
if (in_kernel)
add_event_entry(KERNEL_ENTER_SWITCH_CODE);
else
add_event_entry(KERNEL_EXIT_SWITCH_CODE);
}
static void add_ctx_switch(pid_t pid, unsigned long cookie) static void add_user_ctx_switch(pid_t pid, unsigned long cookie)
{ {
add_event_entry(ESCAPE_CODE); add_event_entry(ESCAPE_CODE);
add_event_entry(CTX_SWITCH_CODE); add_event_entry(CTX_SWITCH_CODE);
...@@ -243,19 +251,13 @@ static void add_us_sample(struct mm_struct * mm, struct op_sample * s) ...@@ -243,19 +251,13 @@ static void add_us_sample(struct mm_struct * mm, struct op_sample * s)
} }
static inline int is_kernel(unsigned long val)
{
return val > PAGE_OFFSET;
}
/* Add a sample to the global event buffer. If possible the /* Add a sample to the global event buffer. If possible the
* sample is converted into a persistent dentry/offset pair * sample is converted into a persistent dentry/offset pair
* for later lookup from userspace. * for later lookup from userspace.
*/ */
static void add_sample(struct mm_struct * mm, struct op_sample * s) static void add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
{ {
if (is_kernel(s->eip)) { if (in_kernel) {
add_sample_entry(s->eip, s->event); add_sample_entry(s->eip, s->event);
} else if (mm) { } else if (mm) {
add_us_sample(mm, s); add_us_sample(mm, s);
...@@ -319,26 +321,34 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf) ...@@ -319,26 +321,34 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
struct mm_struct * mm = 0; struct mm_struct * mm = 0;
struct task_struct * new; struct task_struct * new;
unsigned long cookie; unsigned long cookie;
int in_kernel = 1;
int i; int i;
for (i=0; i < cpu_buf->pos; ++i) { for (i=0; i < cpu_buf->pos; ++i) {
struct op_sample * s = &cpu_buf->buffer[i]; struct op_sample * s = &cpu_buf->buffer[i];
if (is_ctx_switch(s->eip)) { if (is_ctx_switch(s->eip)) {
new = (struct task_struct *)s->event; if (s->event <= 1) {
/* kernel/userspace switch */
release_mm(mm); in_kernel = s->event;
mm = take_task_mm(new); add_kernel_ctx_switch(s->event);
} else {
cookie = get_exec_dcookie(mm); /* userspace context switch */
add_ctx_switch(new->pid, cookie); new = (struct task_struct *)s->event;
release_mm(mm);
mm = take_task_mm(new);
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new->pid, cookie);
}
} else { } else {
add_sample(mm, s); add_sample(mm, s, in_kernel);
} }
} }
release_mm(mm); release_mm(mm);
cpu_buf->pos = 0; cpu_buffer_reset(cpu_buf);
} }
...@@ -364,10 +374,12 @@ static void sync_cpu_buffers(void) ...@@ -364,10 +374,12 @@ static void sync_cpu_buffers(void)
* lockers only, and this region is already * lockers only, and this region is already
* protected by buffer_sem. It's raw to prevent * protected by buffer_sem. It's raw to prevent
* the preempt bogometer firing. Fruity, huh ? */ * the preempt bogometer firing. Fruity, huh ? */
_raw_spin_lock(&cpu_buf->int_lock); if (cpu_buf->pos > 0) {
add_cpu_switch(i); _raw_spin_lock(&cpu_buf->int_lock);
sync_buffer(cpu_buf); add_cpu_switch(i);
_raw_spin_unlock(&cpu_buf->int_lock); sync_buffer(cpu_buf);
_raw_spin_unlock(&cpu_buf->int_lock);
}
} }
up(&buffer_sem); up(&buffer_sem);
...@@ -393,3 +405,4 @@ static void timer_ping(unsigned long data) ...@@ -393,3 +405,4 @@ static void timer_ping(unsigned long data)
schedule_work(&sync_wq); schedule_work(&sync_wq);
/* timer is re-added by the scheduled task */ /* timer is re-added by the scheduled task */
} }
...@@ -62,6 +62,7 @@ int alloc_cpu_buffers(void) ...@@ -62,6 +62,7 @@ int alloc_cpu_buffers(void)
spin_lock_init(&b->int_lock); spin_lock_init(&b->int_lock);
b->pos = 0; b->pos = 0;
b->last_task = 0; b->last_task = 0;
b->last_is_kernel = -1;
b->sample_received = 0; b->sample_received = 0;
b->sample_lost_locked = 0; b->sample_lost_locked = 0;
b->sample_lost_overflow = 0; b->sample_lost_overflow = 0;
...@@ -84,12 +85,20 @@ void free_cpu_buffers(void) ...@@ -84,12 +85,20 @@ void free_cpu_buffers(void)
* be safe from any context. Instead we trylock the CPU's int_lock. * be safe from any context. Instead we trylock the CPU's int_lock.
* int_lock is taken by the processing code in sync_cpu_buffers() * int_lock is taken by the processing code in sync_cpu_buffers()
* so we avoid disturbing that. * so we avoid disturbing that.
*
* is_kernel is needed because on some architectures you cannot
* tell if you are in kernel or user space simply by looking at
* eip. We tag this in the buffer by generating kernel enter/exit
* events whenever is_kernel changes
*/ */
void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu) void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
unsigned long event, int cpu)
{ {
struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
struct task_struct * task; struct task_struct * task;
is_kernel = !!is_kernel;
cpu_buf->sample_received++; cpu_buf->sample_received++;
if (!spin_trylock(&cpu_buf->int_lock)) { if (!spin_trylock(&cpu_buf->int_lock)) {
...@@ -101,9 +110,17 @@ void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu) ...@@ -101,9 +110,17 @@ void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu)
cpu_buf->sample_lost_overflow++; cpu_buf->sample_lost_overflow++;
goto out; goto out;
} }
task = current; task = current;
/* notice a switch from user->kernel or vice versa */
if (cpu_buf->last_is_kernel != is_kernel) {
cpu_buf->last_is_kernel = is_kernel;
cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
cpu_buf->buffer[cpu_buf->pos].event = is_kernel;
cpu_buf->pos++;
}
/* notice a task switch */ /* notice a task switch */
if (cpu_buf->last_task != task) { if (cpu_buf->last_task != task) {
cpu_buf->last_task = task; cpu_buf->last_task = task;
...@@ -130,3 +147,19 @@ void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu) ...@@ -130,3 +147,19 @@ void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu)
out: out:
spin_unlock(&cpu_buf->int_lock); spin_unlock(&cpu_buf->int_lock);
} }
/* resets the cpu buffer to a sane state - should be called with
* cpu_buf->int_lock held
*/
void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf)
{
cpu_buf->pos = 0;
/* reset these to invalid values; the next sample
* collected will populate the buffer with proper
* values to initialize the buffer
*/
cpu_buf->last_is_kernel = -1;
cpu_buf->last_task = 0;
}
...@@ -20,7 +20,7 @@ struct task_struct; ...@@ -20,7 +20,7 @@ struct task_struct;
int alloc_cpu_buffers(void); int alloc_cpu_buffers(void);
void free_cpu_buffers(void); void free_cpu_buffers(void);
/* CPU buffer is composed of such entries (which are /* CPU buffer is composed of such entries (which are
* also used for context switch notes) * also used for context switch notes)
*/ */
...@@ -34,6 +34,7 @@ struct oprofile_cpu_buffer { ...@@ -34,6 +34,7 @@ struct oprofile_cpu_buffer {
/* protected by int_lock */ /* protected by int_lock */
unsigned long pos; unsigned long pos;
struct task_struct * last_task; struct task_struct * last_task;
int last_is_kernel;
struct op_sample * buffer; struct op_sample * buffer;
unsigned long sample_received; unsigned long sample_received;
unsigned long sample_lost_locked; unsigned long sample_lost_locked;
...@@ -43,4 +44,6 @@ struct oprofile_cpu_buffer { ...@@ -43,4 +44,6 @@ struct oprofile_cpu_buffer {
extern struct oprofile_cpu_buffer cpu_buffer[]; extern struct oprofile_cpu_buffer cpu_buffer[];
void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf);
#endif /* OPROFILE_CPU_BUFFER_H */ #endif /* OPROFILE_CPU_BUFFER_H */
...@@ -25,9 +25,11 @@ void wake_up_buffer_waiter(void); ...@@ -25,9 +25,11 @@ void wake_up_buffer_waiter(void);
* relevant data. * relevant data.
*/ */
#define ESCAPE_CODE ~0UL #define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1 #define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2 #define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3 #define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
/* add data to the event buffer */ /* add data to the event buffer */
void add_event_entry(unsigned long data); void add_event_entry(unsigned long data);
......
...@@ -49,7 +49,8 @@ int oprofile_arch_init(struct oprofile_operations ** ops); ...@@ -49,7 +49,8 @@ int oprofile_arch_init(struct oprofile_operations ** ops);
* Add a sample. This may be called from any context. Pass * Add a sample. This may be called from any context. Pass
* smp_processor_id() as cpu. * smp_processor_id() as cpu.
*/ */
extern void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu); extern void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
unsigned long event, int cpu);
/** /**
* Create a file of the given name as a child of the given root, with * Create a file of the given name as a child of the given root, with
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment