Commit bf5ae502 authored by David S. Miller's avatar David S. Miller

Merge bk://kernel.bkbits.net/acme/net-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
parents 05b52f33 901d6cc4
......@@ -5,7 +5,8 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) common.o
oprofile-$(CONFIG_ALPHA_GENERIC) += op_model_ev4.o \
......
......@@ -175,7 +175,7 @@ oprofile_arch_init(struct oprofile_operations **ops)
}
if (!lmodel)
return ENODEV;
return -ENODEV;
model = lmodel;
oprof_axp_ops.cpu_type = lmodel->cpu_type;
......
......@@ -114,6 +114,15 @@ zdisk bzdisk: vmlinux
install fdimage fdimage144 fdimage288: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
prepare: include/asm-$(ARCH)/asm_offsets.h
CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
include/config/MARKER
include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
$(call filechk,gen-asm-offsets)
archclean:
$(Q)$(MAKE) $(clean)=arch/i386/boot
......
/*
* Generate definitions needed by assembly language modules.
* This code generates raw asm output which is post-processed
* to extract and format the required data.
*/
#include <linux/signal.h>
#include <asm/ucontext.h>
#include "sigframe.h"
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
#define BLANK() asm volatile("\n->" : : )
void foo(void)
{
DEFINE(SIGCONTEXT_eax, offsetof (struct sigcontext, eax));
DEFINE(SIGCONTEXT_ebx, offsetof (struct sigcontext, ebx));
DEFINE(SIGCONTEXT_ecx, offsetof (struct sigcontext, ecx));
DEFINE(SIGCONTEXT_edx, offsetof (struct sigcontext, edx));
DEFINE(SIGCONTEXT_esi, offsetof (struct sigcontext, esi));
DEFINE(SIGCONTEXT_edi, offsetof (struct sigcontext, edi));
DEFINE(SIGCONTEXT_ebp, offsetof (struct sigcontext, ebp));
DEFINE(SIGCONTEXT_esp, offsetof (struct sigcontext, esp));
DEFINE(SIGCONTEXT_eip, offsetof (struct sigcontext, eip));
BLANK();
DEFINE(RT_SIGFRAME_sigcontext,
offsetof (struct rt_sigframe, uc.uc_mcontext));
}
struct sigframe
{
char *pretcode;
int sig;
struct sigcontext sc;
struct _fpstate fpstate;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
};
struct rt_sigframe
{
char *pretcode;
int sig;
struct siginfo *pinfo;
void *puc;
struct siginfo info;
struct ucontext uc;
struct _fpstate fpstate;
char retcode[8];
};
......@@ -23,6 +23,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
#include "sigframe.h"
#define DEBUG_SIG 0
......@@ -126,28 +127,6 @@ sys_sigaltstack(const stack_t *uss, stack_t *uoss)
* Do a signal return; undo the signal stack.
*/
struct sigframe
{
char *pretcode;
int sig;
struct sigcontext sc;
struct _fpstate fpstate;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
};
struct rt_sigframe
{
char *pretcode;
int sig;
struct siginfo *pinfo;
void *puc;
struct siginfo info;
struct ucontext uc;
struct _fpstate fpstate;
char retcode[8];
};
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
{
......
......@@ -7,6 +7,7 @@
*/
#include <asm/unistd.h>
#include <asm/asm_offsets.h>
/* XXX
......@@ -18,21 +19,124 @@
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
__kernel_sigreturn:
.LSTART_kernel_sigreturn:
.LSTART_sigreturn:
popl %eax /* XXX does this mean it needs unwind info? */
movl $__NR_sigreturn, %eax
int $0x80
.LEND_sigreturn:
.size __kernel_sigreturn,.-.LSTART_sigreturn
.text
.balign 32
.globl __kernel_rt_sigreturn
.type __kernel_rt_sigreturn,@function
__kernel_rt_sigreturn:
.LSTART_kernel_rt_sigreturn:
.LSTART_rt_sigreturn:
movl $__NR_rt_sigreturn, %eax
int $0x80
.LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
.previous
.section .eh_frame,"a",@progbits
.LSTARTFRAMEDLSI1:
.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
.LSTARTCIEDLSI1:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0 /* DW_CFA_nop */
.align 4
.LENDCIEDLSI1:
.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
.LSTARTFDEDLSI1:
.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
/* HACK: The dwarf2 unwind routines will subtract 1 from the
return address to get an address in the middle of the
presumed call instruction. Since we didn't get here via
a call, we need to include the nop before the real start
to make up for it. */
.long .LSTART_sigreturn-1-. /* PC-relative start address */
.long .LEND_sigreturn-.LSTART_sigreturn+1
.uleb128 0 /* Augmentation */
/* What follows are the instructions for the table generation.
We record the locations of each register saved. This is
complicated by the fact that the "CFA" is always assumed to
be the value of the stack pointer in the caller. This means
that we must define the CFA of this body of code to be the
saved value of the stack pointer in the sigcontext. Which
also means that there is no fixed relation to the other
saved registers, which means that we must use DW_CFA_expression
to compute their addresses. It also means that when we
adjust the stack with the popl, we have to do it all over again. */
#define do_cfa_expr(offset) \
.byte 0x0f; /* DW_CFA_def_cfa_expression */ \
.uleb128 1f-0f; /* length */ \
0: .byte 0x74; /* DW_OP_breg4 */ \
.sleb128 offset; /* offset */ \
.byte 0x06; /* DW_OP_deref */ \
1:
#define do_expr(regno, offset) \
.byte 0x10; /* DW_CFA_expression */ \
.uleb128 regno; /* regno */ \
.uleb128 1f-0f; /* length */ \
0: .byte 0x74; /* DW_OP_breg4 */ \
.sleb128 offset; /* offset */ \
1:
do_cfa_expr(SIGCONTEXT_esp+4)
do_expr(0, SIGCONTEXT_eax+4)
do_expr(1, SIGCONTEXT_ecx+4)
do_expr(2, SIGCONTEXT_edx+4)
do_expr(3, SIGCONTEXT_ebx+4)
do_expr(5, SIGCONTEXT_ebp+4)
do_expr(6, SIGCONTEXT_esi+4)
do_expr(7, SIGCONTEXT_edi+4)
do_expr(8, SIGCONTEXT_eip+4)
.byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
do_cfa_expr(SIGCONTEXT_esp)
do_expr(0, SIGCONTEXT_eax)
do_expr(1, SIGCONTEXT_ecx)
do_expr(2, SIGCONTEXT_edx)
do_expr(3, SIGCONTEXT_ebx)
do_expr(5, SIGCONTEXT_ebp)
do_expr(6, SIGCONTEXT_esi)
do_expr(7, SIGCONTEXT_edi)
do_expr(8, SIGCONTEXT_eip)
.align 4
.LENDFDEDLSI1:
.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
.LSTARTFDEDLSI2:
.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
/* HACK: See above wrt unwind library assumptions. */
.long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
.uleb128 0 /* Augmentation */
/* What follows are the instructions for the table generation.
We record the locations of each register saved. This is
slightly less complicated than the above, since we don't
modify the stack pointer in the process. */
do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
.align 4
.LENDFDEDLSI2:
.previous
......@@ -74,11 +74,13 @@ SYSENTER_RETURN:
.long .Lenter_kernel-.Lpush_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ebp-.Lenter_kernel
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x12 /* RA at offset 12 now */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_edx-.Lpop_ebp
.byte 0x0e /* DW_CFA_def_cfa_offset */
......
......@@ -505,36 +505,20 @@ void __init mem_init(void)
#endif
}
#include <linux/slab.h>
kmem_cache_t *pmd_cache;
kmem_cache_t *pgd_cache;
void pmd_ctor(void *, kmem_cache_t *, unsigned long);
void pgd_ctor(void *, kmem_cache_t *, unsigned long);
#if CONFIG_X86_PAE
struct kmem_cache_s *pae_pgd_cachep;
void __init pgtable_cache_init(void)
{
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pae_pmd",
PTRS_PER_PMD*sizeof(pmd_t),
0,
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
pmd_ctor,
NULL);
if (!pmd_cache)
panic("pgtable_cache_init(): cannot create pmd cache");
}
/*
* PAE pgds must be 16-byte aligned:
*/
pgd_cache = kmem_cache_create("pgd", PTRS_PER_PGD*sizeof(pgd_t), 0,
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, pgd_ctor, NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
if (!pae_pgd_cachep)
panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
}
#endif
/*
* This function cannot be __init, since exceptions don't work in that
......
......@@ -151,60 +151,61 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
return pte;
}
extern kmem_cache_t *pmd_cache;
extern kmem_cache_t *pgd_cache;
#if CONFIG_X86_PAE
void pmd_ctor(void *__pmd, kmem_cache_t *pmd_cache, unsigned long flags)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
clear_page(__pmd);
int i;
pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
if (pgd) {
for (i = 0; i < USER_PTRS_PER_PGD; i++) {
unsigned long pmd = __get_free_page(GFP_KERNEL);
if (!pmd)
goto out_oom;
clear_page(pmd);
set_pgd(pgd + i, __pgd(1 + __pa(pmd)));
}
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return pgd;
out_oom:
for (i--; i >= 0; i--)
free_page((unsigned long)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pae_pgd_cachep, pgd);
return NULL;
}
void pgd_ctor(void *__pgd, kmem_cache_t *pgd_cache, unsigned long flags)
void pgd_free(pgd_t *pgd)
{
pgd_t *pgd = __pgd;
int i;
if (PTRS_PER_PMD == 1)
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
for (i = 0; i < USER_PTRS_PER_PGD; i++)
free_page((unsigned long)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pae_pgd_cachep, pgd);
}
#else
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
pgd_t *pgd = kmem_cache_alloc(pgd_cache, SLAB_KERNEL);
if (PTRS_PER_PMD == 1)
return pgd;
else if (!pgd)
return NULL;
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
pmd_t *pmd = kmem_cache_alloc(pmd_cache, SLAB_KERNEL);
if (!pmd)
goto out_oom;
set_pgd(pgd + i, __pgd(1 + __pa((unsigned long long)((unsigned long)pmd))));
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
if (pgd) {
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return pgd;
out_oom:
for (i--; i >= 0; --i)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pgd_cache, (void *)pgd);
return NULL;
}
void pgd_free(pgd_t *pgd)
{
int i;
free_page((unsigned long)pgd);
}
if (PTRS_PER_PMD > 1) {
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
set_pgd(pgd + i, __pgd(0));
}
}
#endif /* CONFIG_X86_PAE */
kmem_cache_free(pgd_cache, (void *)pgd);
}
......@@ -3,8 +3,9 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o timer_int.o
oprofile-y := $(DRIVER_OBJS) init.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
op_model_ppro.o op_model_p4.o
......@@ -11,22 +11,19 @@
#include <linux/init.h>
/* We support CPUs that have performance counters like the Pentium Pro
* with NMI mode samples. Other x86 CPUs use a simple interrupt keyed
* off the timer interrupt, which cannot profile interrupts-disabled
* code unlike the NMI-based code.
* with the NMI mode driver.
*/
extern int nmi_init(struct oprofile_operations ** ops);
extern void nmi_exit(void);
extern void timer_init(struct oprofile_operations ** ops);
int __init oprofile_arch_init(struct oprofile_operations ** ops)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!nmi_init(ops))
return nmi_init(ops);
#else
return -ENODEV;
#endif
timer_init(ops);
return 0;
}
......
......@@ -314,13 +314,13 @@ int __init nmi_init(struct oprofile_operations ** ops)
__u8 family = current_cpu_data.x86;
if (!cpu_has_apic)
return 0;
return -ENODEV;
switch (vendor) {
case X86_VENDOR_AMD:
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
if (family < 6)
return 0;
return -ENODEV;
model = &op_athlon_spec;
nmi_ops.cpu_type = "i386/athlon";
break;
......@@ -331,30 +331,30 @@ int __init nmi_init(struct oprofile_operations ** ops)
/* Pentium IV */
case 0xf:
if (!p4_init())
return 0;
return -ENODEV;
break;
/* A P6-class processor */
case 6:
if (!ppro_init())
return 0;
return -ENODEV;
break;
default:
return 0;
return -ENODEV;
}
break;
#endif /* !CONFIG_X86_64 */
default:
return 0;
return -ENODEV;
}
init_driverfs();
using_nmi = 1;
*ops = &nmi_ops;
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
return 1;
return 0;
}
......
......@@ -3,6 +3,7 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o timer_int.o
oprofile-y := $(DRIVER_OBJS) init.o
......@@ -15,8 +15,7 @@ extern void timer_init(struct oprofile_operations ** ops);
int __init oprofile_arch_init(struct oprofile_operations ** ops)
{
timer_init(ops);
return 0;
return -ENODEV;
}
......
/**
* @file timer_int.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/irq.h>
#include <linux/oprofile.h>
#include <asm/ptrace.h>
static int timer_notify(struct notifier_block * self, unsigned long val, void * data)
{
struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id();
unsigned long pc = regs->iaoq[0];
int is_kernel = !user_mode(regs);
oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0;
}
static struct notifier_block timer_notifier = {
.notifier_call = timer_notify,
};
static int timer_start(void)
{
return register_profile_notifier(&timer_notifier);
}
static void timer_stop(void)
{
unregister_profile_notifier(&timer_notifier);
}
static struct oprofile_operations timer_ops = {
.start = timer_start,
.stop = timer_stop,
.cpu_type = "timer"
};
void __init timer_init(struct oprofile_operations ** ops)
{
*ops = &timer_ops;
printk(KERN_INFO "oprofile: using timer interrupt.\n");
}
......@@ -3,6 +3,7 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o timer_int.o
oprofile-y := $(DRIVER_OBJS) init.o
......@@ -15,8 +15,7 @@ extern void timer_init(struct oprofile_operations ** ops);
int __init oprofile_arch_init(struct oprofile_operations ** ops)
{
timer_init(ops);
return 0;
return -ENODEV;
}
......
/**
* @file timer_int.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/irq.h>
#include <linux/oprofile.h>
#include <linux/profile.h>
#include <asm/ptrace.h>
static int timer_notify(struct notifier_block * self, unsigned long val, void * data)
{
struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id();
unsigned long pc = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0;
}
static struct notifier_block timer_notifier = {
.notifier_call = timer_notify,
};
static int timer_start(void)
{
return register_profile_notifier(&timer_notifier);
}
static void timer_stop(void)
{
unregister_profile_notifier(&timer_notifier);
}
static struct oprofile_operations timer_ops = {
.start = timer_start,
.stop = timer_stop,
.cpu_type = "timer"
};
void __init timer_init(struct oprofile_operations ** ops)
{
*ops = &timer_ops;
printk(KERN_INFO "oprofile: using timer interrupt.\n");
}
......@@ -3,6 +3,7 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o timer_int.o
oprofile-y := $(DRIVER_OBJS) init.o
......@@ -15,8 +15,7 @@ extern void timer_init(struct oprofile_operations ** ops);
int __init oprofile_arch_init(struct oprofile_operations ** ops)
{
timer_init(ops);
return 0;
return -ENODEV;
}
......
/**
* @file timer_int.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/irq.h>
#include <linux/oprofile.h>
#include <linux/profile.h>
#include <asm/ptrace.h>
static int timer_notify(struct notifier_block * self, unsigned long val, void * data)
{
struct pt_regs * regs = (struct pt_regs *)data;
int cpu = smp_processor_id();
unsigned long pc = instruction_pointer(regs);
int is_kernel = !user_mode(regs);
oprofile_add_sample(pc, is_kernel, 0, cpu);
return 0;
}
static struct notifier_block timer_notifier = {
.notifier_call = timer_notify,
};
static int timer_start(void)
{
return register_profile_notifier(&timer_notifier);
}
static void timer_stop(void)
{
unregister_profile_notifier(&timer_notifier);
}
static struct oprofile_operations timer_ops = {
.start = timer_start,
.stop = timer_stop,
.cpu_type = "timer"
};
void __init timer_init(struct oprofile_operations ** ops)
{
*ops = &timer_ops;
printk(KERN_INFO "oprofile: using timer interrupt.\n");
}
......@@ -9,9 +9,10 @@ obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofilefs.o oprofile_stats.o \
timer_int.o )
oprofile-objs := $(DRIVER_OBJS) init.o timer_int.o
oprofile-objs := $(DRIVER_OBJS) init.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o
......@@ -23,11 +24,9 @@ $(obj)/op_model_athlon.c: ${INCL}
@ln -sf ../../i386/oprofile/op_model_athlon.c $(obj)/op_model_athlon.c
$(obj)/init.c: ${INCL}
@ln -sf ../../i386/oprofile/init.c $(obj)/init.c
$(obj)/timer_int.c: ${INCL}
@ln -sf ../../i386/oprofile/timer_int.c $(obj)/timer_int.c
$(obj)/op_counter.h:
@ln -sf ../../i386/oprofile/op_counter.h $(obj)/op_counter.h
$(obj)/op_x86_model.h:
@ln -sf ../../i386/oprofile/op_x86_model.h $(obj)/op_x86_model.h
clean-files += op_x86_model.h op_counter.h timer_int.c init.c \
clean-files += op_x86_model.h op_counter.h init.c \
op_model_athlon.c nmi_int.c
......@@ -58,8 +58,8 @@ static int exit_task_notify(struct notifier_block * self, unsigned long val, voi
* must concern ourselves with. First, when a task is about to
* exit (exit_mmap()), we should process the buffer to deal with
* any samples in the CPU buffer, before we lose the ->mmap information
* we need. Second, a task may unmap (part of) an executable mmap,
* so we want to process samples before that happens too
* we need. It is vital to get this case correct, otherwise we can
* end up trying to access a freed task_struct.
*/
static int mm_notify(struct notifier_block * self, unsigned long val, void * data)
{
......@@ -67,6 +67,29 @@ static int mm_notify(struct notifier_block * self, unsigned long val, void * dat
return 0;
}
/* Second, a task may unmap (part of) an executable mmap,
* so we want to process samples before that happens too. This is merely
* a QOI issue not a correctness one.
*/
static int munmap_notify(struct notifier_block * self, unsigned long val, void * data)
{
/* Note that we cannot sync the buffers directly, because we might end up
* taking the the mmap_sem that we hold now inside of event_buffer_read()
* on a page fault, whilst holding buffer_sem - deadlock.
*
* This would mean a threaded reader of the event buffer, but we should
* prevent it anyway.
*
* Delaying the work in a context that doesn't hold the mmap_sem means
* that we won't lose samples from other mappings that current() may
* have. Note that either way, we lose any pending samples for what is
* being unmapped.
*/
schedule_work(&sync_wq);
return 0;
}
/* We need to be told about new modules so we don't attribute to a previously
* loaded module, or drop the samples on the floor.
......@@ -92,7 +115,7 @@ static struct notifier_block exit_task_nb = {
};
static struct notifier_block exec_unmap_nb = {
.notifier_call = mm_notify,
.notifier_call = munmap_notify,
};
static struct notifier_block exit_mmap_nb = {
......@@ -147,6 +170,8 @@ void sync_stop(void)
profile_event_unregister(EXIT_MMAP, &exit_mmap_nb);
profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb);
del_timer_sync(&sync_timer);
/* timer might have queued work, make sure it's completed. */
flush_scheduled_work();
}
......@@ -296,6 +321,8 @@ static void add_sample(struct mm_struct * mm, struct op_sample * s, int in_kerne
add_sample_entry(s->eip, s->event);
} else if (mm) {
add_us_sample(mm, s);
} else {
atomic_inc(&oprofile_stats.sample_lost_no_mm);
}
}
......@@ -310,26 +337,23 @@ static void release_mm(struct mm_struct * mm)
/* Take the task's mmap_sem to protect ourselves from
* races when we do lookup_dcookie().
*/
static struct mm_struct * take_task_mm(struct task_struct * task)
static struct mm_struct * take_tasks_mm(struct task_struct * task)
{
struct mm_struct * mm = task->mm;
/* if task->mm !NULL, mm_count must be at least 1. It cannot
* drop to 0 without the task exiting, which will have to sleep
* on buffer_sem first. So we do not need to mark mm_count
* ourselves.
struct mm_struct * mm;
/* Subtle. We don't need to keep a reference to this task's mm,
* because, for the mm to be freed on another CPU, that would have
* to go through the task exit notifier, which ends up sleeping
* on the buffer_sem we hold, so we end up with mutual exclusion
* anyway.
*/
task_lock(task);
mm = task->mm;
task_unlock(task);
if (mm) {
/* More ugliness. If a task took its mmap
* sem then came to sleep on buffer_sem we
* will deadlock waiting for it. So we can
* but try. This will lose samples :/
*/
if (!down_read_trylock(&mm->mmap_sem)) {
/* FIXME: this underestimates samples lost */
atomic_inc(&oprofile_stats.sample_lost_mmap_sem);
mm = NULL;
}
/* needed to walk the task's VMAs */
down_read(&mm->mmap_sem);
}
return mm;
......@@ -399,7 +423,7 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
new = (struct task_struct *)s->event;
release_mm(mm);
mm = take_task_mm(new);
mm = take_tasks_mm(new);
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new->pid, cookie);
......@@ -460,4 +484,3 @@ static void timer_ping(unsigned long data)
schedule_work(&sync_wq);
/* timer is re-added by the scheduled task */
}
......@@ -151,11 +151,15 @@ ssize_t event_buffer_read(struct file * file, char * buf, size_t count, loff_t *
if (count != max || *offset)
return -EINVAL;
/* wait for the event buffer to fill up with some data */
wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready));
if (signal_pending(current))
return -EINTR;
/* can't currently happen */
if (!atomic_read(&buffer_ready))
return -EAGAIN;
down(&buffer_sem);
atomic_set(&buffer_ready, 0);
......
......@@ -28,6 +28,8 @@ int oprofile_setup(void)
{
int err;
down(&start_sem);
if ((err = alloc_cpu_buffers()))
goto out;
......@@ -45,7 +47,6 @@ int oprofile_setup(void)
if ((err = sync_start()))
goto out3;
down(&start_sem);
is_setup = 1;
up(&start_sem);
return 0;
......@@ -58,6 +59,7 @@ int oprofile_setup(void)
out1:
free_cpu_buffers();
out:
up(&start_sem);
return err;
}
......@@ -106,27 +108,34 @@ void oprofile_stop(void)
void oprofile_shutdown(void)
{
down(&start_sem);
sync_stop();
if (oprofile_ops->shutdown)
oprofile_ops->shutdown();
/* down() is also necessary to synchronise all pending events
* before freeing */
down(&buffer_sem);
is_setup = 0;
up(&buffer_sem);
free_event_buffer();
free_cpu_buffers();
up(&start_sem);
}
extern void timer_init(struct oprofile_operations ** ops);
static int __init oprofile_init(void)
{
int err;
/* Architecture must fill in the interrupt ops and the
* logical CPU type.
* logical CPU type, or we can fall back to the timer
* interrupt profiler.
*/
err = oprofile_arch_init(&oprofile_ops);
if (err == -ENODEV) {
timer_init(&oprofile_ops);
err = 0;
}
if (err)
goto out;
......
......@@ -31,7 +31,7 @@ void oprofile_reset_stats(void)
cpu_buf->sample_lost_task_exit = 0;
}
atomic_set(&oprofile_stats.sample_lost_mmap_sem, 0);
atomic_set(&oprofile_stats.sample_lost_no_mm, 0);
atomic_set(&oprofile_stats.event_lost_overflow, 0);
}
......@@ -68,8 +68,8 @@ void oprofile_create_stats_files(struct super_block * sb, struct dentry * root)
&cpu_buf->sample_lost_task_exit);
}
oprofilefs_create_ro_atomic(sb, dir, "sample_lost_mmap_sem",
&oprofile_stats.sample_lost_mmap_sem);
oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm",
&oprofile_stats.sample_lost_no_mm);
oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow",
&oprofile_stats.event_lost_overflow);
}
......@@ -13,7 +13,7 @@
#include <asm/atomic.h>
struct oprofile_stat_struct {
atomic_t sample_lost_mmap_sem;
atomic_t sample_lost_no_mm;
atomic_t event_lost_overflow;
};
......
......@@ -14,8 +14,6 @@
#include <linux/oprofile.h>
#include <asm/ptrace.h>
#include "op_counter.h"
static int timer_notify(struct notifier_block * self, unsigned long val, void * data)
{
struct pt_regs * regs = (struct pt_regs *)data;
......
......@@ -20,11 +20,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p
* Allocate and free page tables.
*/
pgd_t *pgd_alloc(struct mm_struct *);
void pgd_free(pgd_t *pgd);
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(pgd_t *pgd);
pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
struct page *pte_alloc_one(struct mm_struct *, unsigned long);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
static inline void pte_free_kernel(pte_t *pte)
{
......
......@@ -123,4 +123,6 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
#define PTE_FILE_MAX_BITS 32
extern struct kmem_cache_s *pae_pgd_cachep;
#endif /* _I386_PGTABLE_3LEVEL_H */
......@@ -41,12 +41,21 @@ extern unsigned long empty_zero_page[1024];
#ifndef __ASSEMBLY__
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
/*
* Need to initialise the X86 PAE caches
*/
extern void pgtable_cache_init(void);
#else
# include <asm/pgtable-2level.h>
#endif
void pgtable_cache_init(void);
/*
* No page table caches to initialise
*/
#define pgtable_cache_init() do { } while (0)
#endif
#endif
#define PMD_SIZE (1UL << PMD_SHIFT)
......@@ -183,6 +192,7 @@ extern unsigned long pg0[1024];
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
......
......@@ -51,6 +51,7 @@
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
#include <linux/swapops.h>
......@@ -688,6 +689,45 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
vma = find_extend_vma(mm, start);
#ifdef FIXADDR_START
if (!vma && start >= FIXADDR_START && start < FIXADDR_TOP) {
static struct vm_area_struct fixmap_vma = {
/* Catch users - if there are any valid
ones, we can make this be "&init_mm" or
something. */
.vm_mm = NULL,
.vm_start = FIXADDR_START,
.vm_end = FIXADDR_TOP,
.vm_page_prot = PAGE_READONLY,
.vm_flags = VM_READ | VM_EXEC,
};
unsigned long pg = start & PAGE_MASK;
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset_k(pg);
if (!pgd)
return i ? : -EFAULT;
pmd = pmd_offset(pgd, pg);
if (!pmd)
return i ? : -EFAULT;
pte = pte_offset_kernel(pmd, pg);
if (!pte || !pte_present(*pte) || !pte_user(*pte) ||
!(write ? pte_write(*pte) : pte_read(*pte)))
return i ? : -EFAULT;
if (pages) {
pages[i] = pte_page(*pte);
get_page(pages[i]);
}
if (vmas)
vmas[i] = &fixmap_vma;
i++;
start += PAGE_SIZE;
len--;
continue;
}
#endif
if (!vma || (pages && (vma->vm_flags & VM_IO))
|| !(flags & vma->vm_flags))
return i ? : -EFAULT;
......
......@@ -508,6 +508,8 @@ static void SMP_TIMER_NAME(rt_run_flush)(unsigned long dummy)
rt_deadline = 0;
get_random_bytes(&rt_hash_rnd, 4);
for (i = rt_hash_mask; i >= 0; i--) {
spin_lock_bh(&rt_hash_table[i].lock);
rth = rt_hash_table[i].chain;
......@@ -570,7 +572,6 @@ static void rt_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
get_random_bytes(&rt_hash_rnd, 4);
rt_cache_flush(0);
mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment