Commit 83c2f912 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf tools: Fix compile error on x86_64 Ubuntu
  perf report: Fix --stdio output alignment when --showcpuutilization used
  perf annotate: Get rid of field_sep check
  perf annotate: Fix usage string
  perf kmem: Fix a memory leak
  perf kmem: Add missing closedir() calls
  perf top: Add error message for EMFILE
  perf test: Change type of '-v' option to INCR
  perf script: Add missing closedir() calls
  tracing: Fix compile error when static ftrace is enabled
  recordmcount: Fix handling of elf64 big-endian objects.
  perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again
  perf tools: Add support for guest/host-only profiling
  perf kvm: Do guest-only counting by default
  perf top: Don't update total_period on process_sample
  perf hists: Stop using 'self' for struct hist_entry
  perf hists: Rename total_session to total_period
  x86: Add counter when debug stack is used with interrupts enabled
  x86: Allow NMIs to hit breakpoints in i386
  x86: Keep current stack in NMI breakpoints
  ...
parents f0ed5b9a 172d1b0b
......@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
stacktrace [FTRACE]
Enabled the stack tracer on boot up.
stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
will trace at boot up. function-list is a comma separated
list of functions. This list can be changed at run
time by the stack_trace_filter file in the debugfs
tracing directory. Note, this enables stack tracing
and the stacktrace above is not needed.
sti= [PARISC,HW]
Format: <num>
Set the STI (builtin display/keyboard on the HP-PARISC
......
......@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void);
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(int, debug_stack_usage);
static inline void debug_stack_usage_inc(void)
{
__get_cpu_var(debug_stack_usage)++;
}
static inline void debug_stack_usage_dec(void)
{
__get_cpu_var(debug_stack_usage)--;
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_DEBUGREG_H */
......@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
extern struct desc_ptr nmi_idt_descr;
extern gate_desc nmi_idt_table[];
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
......@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit = (limit >> 16) & 0xf;
}
#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
gate_desc s;
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
write_idt_entry(nmi_idt_table, gate, &s);
}
#endif
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
......
......@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) nmi_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
......@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);
int is_debug_stack(unsigned long addr)
{
return __get_cpu_var(debug_stack_usage) ||
(addr <= __get_cpu_var(debug_stack_addr) &&
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
void debug_stack_set_zero(void)
{
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}
void debug_stack_reset(void)
{
load_idt((const struct desc_ptr *)&idt_descr);
}
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
......@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
if (v == DEBUG_STACK-1)
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
}
}
......
......@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
CFI_ENDPROC
END(error_exit)
/*
* Test if a given stack is an NMI stack or not.
*/
.macro test_in_nmi reg stack nmi_ret normal_ret
cmpq %\reg, \stack
ja \normal_ret
subq $EXCEPTION_STKSZ, %\reg
cmpq %\reg, \stack
jb \normal_ret
jmp \nmi_ret
.endm
/* runs on exception stack */
ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
* This means that we can have nested NMIs where the next
* NMI is using the top of the stack of the previous NMI. We
* can't let it execute because the nested NMI will corrupt the
* stack of the previous NMI. NMI handlers are not re-entrant
* anyway.
*
* To handle this case we do the following:
* Check the a special location on the stack that contains
* a variable that is set when NMIs are executing.
* The interrupted task's stack is also checked to see if it
* is an NMI stack.
* If the variable is not set and the stack is not the NMI
* stack then:
* o Set the special variable on the stack
* o Copy the interrupt frame into a "saved" location on the stack
* o Copy the interrupt frame into a "copy" location on the stack
* o Continue processing the NMI
* If the variable is set or the previous stack is the NMI stack:
* o Modify the "copy" location to jump to the repeate_nmi
* o return back to the first NMI
*
* Now on exit of the first NMI, we first clear the stack variable
* The NMI stack will tell any nested NMIs at that point that it is
* nested. Then we pop the stack normally with iret, and if there was
* a nested NMI that updated the copy interrupt stack frame, a
* jump will be made to the repeat_nmi code that will handle the second
* NMI.
*/
/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
/*
* Check the special variable on the stack to see if NMIs are
* executing.
*/
cmp $1, -8(%rsp)
je nested_nmi
/*
* Now test if the previous stack was an NMI stack.
* We need the double check. We check the NMI stack to satisfy the
* race when the first NMI clears the variable before returning.
* We check the variable because the first NMI could be in a
* breakpoint routine using a breakpoint stack.
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
nested_nmi:
/*
* Do nothing if we interrupted the fixup in repeat_nmi.
* It's about to repeat the NMI handler, so we are fine
* with ignoring this one.
*/
movq $repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja 1f
movq $end_repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja nested_nmi_out
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
leaq -6*8(%rsp), %rdx
movq %rdx, %rsp
CFI_ADJUST_CFA_OFFSET 6*8
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
pushq_cfi $__KERNEL_CS
pushq_cfi $repeat_nmi
/* Put stack back */
addq $(11*8), %rsp
CFI_ADJUST_CFA_OFFSET -11*8
nested_nmi_out:
popq_cfi %rdx
/* No need to check faults here */
INTERRUPT_RETURN
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
* stored in rdx, we must keep that space available.
* Here's what our stack frame will look like:
* +-------------------------+
* | original SS |
* | original Return RSP |
* | original RFLAGS |
* | original CS |
* | original RIP |
* +-------------------------+
* | temp storage for rdx |
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
* | Saved SS |
* | Saved Return RSP |
* | Saved RFLAGS |
* | Saved CS |
* | Saved RIP |
* +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
* | pt_regs |
* +-------------------------+
*
* The saved RIP is used to fix up the copied RIP that a nested
* NMI may zero out. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
/* Copy the stack frame to the Saved frame */
.rept 5
pushq_cfi 6*8(%rsp)
.endr
/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
/* Do not pop rdx, nested NMIs will corrupt it */
movq 11*8(%rsp), %rdx
/*
* Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs
* caused by an exception and nested NMI will start here, and
* can still be preempted by another NMI.
*/
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
/*
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
* as we should not be calling schedule in NMI context.
* Even with normal interrupts enabled. An NMI should not be
* setting NEED_RESCHED or anything that normal interrupts and
* exceptions might do.
*/
call save_paranoid
DEFAULT_FRAME 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
/* ebx: no swapgs flag */
DISABLE_INTERRUPTS(CLBR_NONE)
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
testl $3,CS(%rsp)
jnz nmi_userspace
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_ALL 8
/* Clear the NMI executing stack variable */
movq $0, 10*8(%rsp)
jmp irq_return
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz nmi_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz nmi_schedule
movl %ebx,%edx /* arg3: thread flags */
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
jmp nmi_userspace
nmi_schedule:
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
jmp nmi_userspace
CFI_ENDPROC
#else
jmp paranoid_exit
CFI_ENDPROC
#endif
END(nmi)
/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)
/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:
ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
......
......@@ -417,6 +417,10 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
.align L1_CACHE_BYTES
ENTRY(nmi_idt_table)
.skip IDT_ENTRIES * 16
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
......
......@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs);
}
/*
* NMIs can hit breakpoints which will cause it to lose its
* NMI context with the CPU when the breakpoint does an iret.
*/
#ifdef CONFIG_X86_32
/*
* For i386, NMIs use the same stack as the kernel, and we can
* add a workaround to the iret problem in C. Simply have 3 states
* the NMI can be in.
*
* 1) not running
* 2) executing
* 3) latched
*
* When no NMI is in progress, it is in the "not running" state.
* When an NMI comes in, it goes into the "executing" state.
* Normally, if another NMI is triggered, it does not interrupt
* the running NMI and the HW will simply latch it so that when
* the first NMI finishes, it will restart the second NMI.
* (Note, the latch is binary, thus multiple NMIs triggering,
* when one is running, are ignored. Only one NMI is restarted.)
*
* If an NMI hits a breakpoint that executes an iret, another
* NMI can preempt it. We do not want to allow this new NMI
* to run, but we want to execute it when the first one finishes.
* We set the state to "latched", and the first NMI will perform
* an cmpxchg on the state, and if it doesn't successfully
* reset the state to "not running" it will restart the next
* NMI.
*/
enum nmi_states {
NMI_NOT_RUNNING,
NMI_EXECUTING,
NMI_LATCHED,
};
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
#define nmi_nesting_preprocess(regs) \
do { \
if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
__get_cpu_var(nmi_state) = NMI_LATCHED; \
return; \
} \
nmi_restart: \
__get_cpu_var(nmi_state) = NMI_EXECUTING; \
} while (0)
#define nmi_nesting_postprocess() \
do { \
if (cmpxchg(&__get_cpu_var(nmi_state), \
NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
goto nmi_restart; \
} while (0)
#else /* x86_64 */
/*
* In x86_64 things are a bit more difficult. This has the same problem
* where an NMI hitting a breakpoint that calls iret will remove the
* NMI context, allowing a nested NMI to enter. What makes this more
* difficult is that both NMIs and breakpoints have their own stack.
* When a new NMI or breakpoint is executed, the stack is set to a fixed
* point. If an NMI is nested, it will have its stack set at that same
* fixed address that the first NMI had, and will start corrupting the
* stack. This is handled in entry_64.S, but the same problem exists with
* the breakpoint stack.
*
* If a breakpoint is being processed, and the debug stack is being used,
* if an NMI comes in and also hits a breakpoint, the stack pointer
* will be set to the same fixed address as the breakpoint that was
* interrupted, causing that stack to be corrupted. To handle this case,
* check if the stack that was interrupted is the debug stack, and if
* so, change the IDT so that new breakpoints will use the current stack
* and not switch to the fixed address. On return of the NMI, switch back
* to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
static inline void nmi_nesting_preprocess(struct pt_regs *regs)
{
/*
* If we interrupted a breakpoint, it is possible that
* the nmi handler will have breakpoints too. We need to
* change the IDT such that breakpoints that happen here
* continue to use the NMI stack.
*/
if (unlikely(is_debug_stack(regs->sp))) {
debug_stack_set_zero();
__get_cpu_var(update_debug_stack) = 1;
}
}
static inline void nmi_nesting_postprocess(void)
{
if (unlikely(__get_cpu_var(update_debug_stack)))
debug_stack_reset();
}
#endif
dotraplinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
{
nmi_nesting_preprocess(regs);
nmi_enter();
inc_irq_stat(__nmi_count);
......@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
default_do_nmi(regs);
nmi_exit();
/* On i386, may loop back to preprocess */
nmi_nesting_postprocess();
}
void stop_nmi(void)
......
......@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP)
return;
/*
* Let others (NMI) know that the debug stack is in use
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
}
#ifdef CONFIG_X86_64
......@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
SIGTRAP) == NOTIFY_STOP)
return;
/*
* Let others (NMI) know that the debug stack is in use
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
......@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
return;
}
......@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
return;
}
......@@ -718,4 +732,10 @@ void __init trap_init(void)
cpu_init();
x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64
memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
set_nmi_gate(1, &debug);
set_nmi_gate(3, &int3);
#endif
}
......@@ -50,6 +50,11 @@
# define inline inline __attribute__((always_inline))
# define __inline__ __inline__ __attribute__((always_inline))
# define __inline __inline __attribute__((always_inline))
#else
/* A lot of inline functions can cause havoc with function tracing */
# define inline inline notrace
# define __inline__ __inline__ notrace
# define __inline __inline notrace
#endif
#define __deprecated __attribute__((deprecated))
......
......@@ -133,6 +133,8 @@ struct ftrace_func_command {
int ftrace_arch_code_modify_prepare(void);
int ftrace_arch_code_modify_post_process(void);
void ftrace_bug(int err, unsigned long ip);
struct seq_file;
struct ftrace_probe_ops {
......@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
enum {
FTRACE_FL_ENABLED = (1 << 30),
FTRACE_FL_FREE = (1 << 31),
};
#define FTRACE_FL_MASK (0x3UL << 30)
......@@ -172,10 +173,7 @@ struct dyn_ftrace {
unsigned long ip; /* address of mcount call-site */
struct dyn_ftrace *freelist;
};
union {
unsigned long flags;
struct dyn_ftrace *newlist;
};
struct dyn_arch_ftrace arch;
};
......@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
int register_ftrace_command(struct ftrace_func_command *cmd);
int unregister_ftrace_command(struct ftrace_func_command *cmd);
enum {
FTRACE_UPDATE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
FTRACE_START_FUNC_RET = (1 << 3),
FTRACE_STOP_FUNC_RET = (1 << 4),
};
enum {
FTRACE_UPDATE_IGNORE,
FTRACE_UPDATE_MAKE_CALL,
FTRACE_UPDATE_MAKE_NOP,
};
enum {
FTRACE_ITER_FILTER = (1 << 0),
FTRACE_ITER_NOTRACE = (1 << 1),
FTRACE_ITER_PRINTALL = (1 << 2),
FTRACE_ITER_DO_HASH = (1 << 3),
FTRACE_ITER_HASH = (1 << 4),
FTRACE_ITER_ENABLED = (1 << 5),
};
void arch_ftrace_update_code(int command);
struct ftrace_rec_iter;
struct ftrace_rec_iter *ftrace_rec_iter_start(void);
struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
int ftrace_update_record(struct dyn_ftrace *rec, int enable);
int ftrace_test_record(struct dyn_ftrace *rec, int enable);
void ftrace_run_stop_machine(int command);
int ftrace_location(unsigned long ip);
extern ftrace_func_t ftrace_trace_function;
int ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct inode *inode, struct file *file);
ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos);
ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos);
loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
int ftrace_regex_release(struct inode *inode, struct file *file);
void __init
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void *data);
......@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
{
return 0;
}
/*
* Again users of functions that have ftrace_ops may not
* have them defined when ftrace is not enabled, but these
* functions may still be called. Use a macro instead of inline.
*/
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos) { return -ENODEV; }
static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos) { return -ENODEV; }
static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
return -ENODEV;
}
static inline int
ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
#endif /* CONFIG_DYNAMIC_FTRACE */
/* totally disable ftrace - can not re-enable after this */
......
......@@ -22,11 +22,13 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
#include <linux/bsearch.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/sort.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
......@@ -947,13 +949,6 @@ struct ftrace_func_probe {
struct rcu_head rcu;
};
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
FTRACE_START_FUNC_RET = (1 << 3),
FTRACE_STOP_FUNC_RET = (1 << 4),
};
struct ftrace_func_entry {
struct hlist_node hlist;
unsigned long ip;
......@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = {
.filter_hash = EMPTY_HASH,
};
static struct dyn_ftrace *ftrace_new_addrs;
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
struct ftrace_page *next;
struct dyn_ftrace *records;
int index;
struct dyn_ftrace records[];
int size;
};
#define ENTRIES_PER_PAGE \
((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
static struct ftrace_page *ftrace_new_pgs;
#define ENTRY_SIZE sizeof(struct dyn_ftrace)
#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
/* estimate from running different kernels */
#define NR_TO_INIT 10000
......@@ -1003,7 +999,10 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
static struct dyn_ftrace *ftrace_free_records;
static bool ftrace_hash_empty(struct ftrace_hash *hash)
{
return !hash || !hash->count;
}
static struct ftrace_func_entry *
ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
......@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
struct hlist_head *hhd;
struct hlist_node *n;
if (!hash->count)
if (ftrace_hash_empty(hash))
return NULL;
if (hash->size_bits > 0)
......@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
/* Empty hash? */
if (!hash || !hash->count)
if (ftrace_hash_empty(hash))
return new_hash;
size = 1 << hash->size_bits;
......@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
filter_hash = rcu_dereference_raw(ops->filter_hash);
notrace_hash = rcu_dereference_raw(ops->notrace_hash);
if ((!filter_hash || !filter_hash->count ||
if ((ftrace_hash_empty(filter_hash) ||
ftrace_lookup_ip(filter_hash, ip)) &&
(!notrace_hash || !notrace_hash->count ||
(ftrace_hash_empty(notrace_hash) ||
!ftrace_lookup_ip(notrace_hash, ip)))
ret = 1;
else
......@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
} \
}
static int ftrace_cmp_recs(const void *a, const void *b)
{
const struct dyn_ftrace *reca = a;
const struct dyn_ftrace *recb = b;
if (reca->ip > recb->ip)
return 1;
if (reca->ip < recb->ip)
return -1;
return 0;
}
/**
* ftrace_location - return true if the ip giving is a traced location
* @ip: the instruction pointer to check
*
* Returns 1 if @ip given is a pointer to a ftrace location.
* That is, the instruction that is either a NOP or call to
* the function tracer. It checks the ftrace internal tables to
* determine if the address belongs or not.
*/
int ftrace_location(unsigned long ip)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec;
struct dyn_ftrace key;
key.ip = ip;
for (pg = ftrace_pages_start; pg; pg = pg->next) {
rec = bsearch(&key, pg->records, pg->index,
sizeof(struct dyn_ftrace),
ftrace_cmp_recs);
if (rec)
return 1;
}
return 0;
}
static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
......@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash) {
hash = ops->filter_hash;
other_hash = ops->notrace_hash;
if (!hash || !hash->count)
if (ftrace_hash_empty(hash))
all = 1;
} else {
inc = !inc;
......@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
* If the notrace hash has no items,
* then there's nothing to do.
*/
if (hash && !hash->count)
if (ftrace_hash_empty(hash))
return;
}
......@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
match = 1;
} else {
in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
in_hash = !!ftrace_lookup_ip(hash, rec->ip);
in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
/*
*
......@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash && in_hash && !in_other_hash)
match = 1;
else if (!filter_hash && in_hash &&
(in_other_hash || !other_hash->count))
(in_other_hash || ftrace_hash_empty(other_hash)))
match = 1;
}
if (!match)
......@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->freelist = ftrace_free_records;
ftrace_free_records = rec;
rec->flags |= FTRACE_FL_FREE;
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
struct dyn_ftrace *rec;
/* First check for freed records */
if (ftrace_free_records) {
rec = ftrace_free_records;
if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
FTRACE_WARN_ON_ONCE(1);
ftrace_free_records = NULL;
return NULL;
}
ftrace_free_records = rec->freelist;
memset(rec, 0, sizeof(*rec));
return rec;
}
if (ftrace_pages->index == ENTRIES_PER_PAGE) {
if (!ftrace_pages->next) {
/* allocate another page */
ftrace_pages->next =
(void *)get_zeroed_page(GFP_KERNEL);
if (!ftrace_pages->next)
if (ftrace_pages->index == ftrace_pages->size) {
/* We should have allocated enough */
if (WARN_ON(!ftrace_pages->next))
return NULL;
}
ftrace_pages = ftrace_pages->next;
}
......@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
return NULL;
rec->ip = ip;
rec->newlist = ftrace_new_addrs;
ftrace_new_addrs = rec;
return rec;
}
......@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
static void ftrace_bug(int failed, unsigned long ip)
/**
* ftrace_bug - report and shutdown function tracer
* @failed: The failed type (EFAULT, EINVAL, EPERM)
* @ip: The address that failed
*
* The arch code that enables or disables the function tracing
* can call ftrace_bug() when it has detected a problem in
* modifying the code. @failed should be one of either:
* EFAULT - if the problem happens on reading the @ip address
* EINVAL - if what is read at @ip is not what was expected
* EPERM - if the problem happens on writting to the @ip address
*/
void ftrace_bug(int failed, unsigned long ip)
{
switch (failed) {
case -EFAULT:
......@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
return 0;
}
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
{
unsigned long ftrace_addr;
unsigned long flag = 0UL;
ftrace_addr = (unsigned long)FTRACE_ADDR;
/*
* If we are enabling tracing:
* If we are updating calls:
*
* If the record has a ref count, then we need to enable it
* because someone is using it.
*
* Otherwise we make sure its disabled.
*
* If we are disabling tracing, then disable all records that
* If we are disabling calls, then disable all records that
* are enabled.
*/
if (enable && (rec->flags & ~FTRACE_FL_MASK))
......@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
/* If the state of this record hasn't changed, then do nothing */
if ((rec->flags & FTRACE_FL_ENABLED) == flag)
return 0;
return FTRACE_UPDATE_IGNORE;
if (flag) {
if (update)
rec->flags |= FTRACE_FL_ENABLED;
return ftrace_make_call(rec, ftrace_addr);
return FTRACE_UPDATE_MAKE_CALL;
}
if (update)
rec->flags &= ~FTRACE_FL_ENABLED;
return FTRACE_UPDATE_MAKE_NOP;
}
/**
* ftrace_update_record, set a record that now is tracing or not
* @rec: the record to update
* @enable: set to 1 if the record is tracing, zero to force disable
*
* The records that represent all functions that can be traced need
* to be updated when tracing has been enabled.
*/
int ftrace_update_record(struct dyn_ftrace *rec, int enable)
{
return ftrace_check_record(rec, enable, 1);
}
/**
* ftrace_test_record, check if the record has been enabled or not
* @rec: the record to test
* @enable: set to 1 to check if enabled, 0 if it is disabled
*
* The arch code may need to test if a record is already set to
* tracing to determine how to modify the function code that it
* represents.
*/
int ftrace_test_record(struct dyn_ftrace *rec, int enable)
{
return ftrace_check_record(rec, enable, 0);
}
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
unsigned long ftrace_addr;
int ret;
ftrace_addr = (unsigned long)FTRACE_ADDR;
ret = ftrace_update_record(rec, enable);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MAKE_CALL:
return ftrace_make_call(rec, ftrace_addr);
case FTRACE_UPDATE_MAKE_NOP:
return ftrace_make_nop(NULL, rec, ftrace_addr);
}
return -1; /* unknow ftrace bug */
}
static void ftrace_replace_code(int enable)
static void ftrace_replace_code(int update)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
......@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable)
return;
do_for_each_ftrace_rec(pg, rec) {
/* Skip over free records */
if (rec->flags & FTRACE_FL_FREE)
continue;
failed = __ftrace_replace_code(rec, enable);
failed = __ftrace_replace_code(rec, update);
if (failed) {
ftrace_bug(failed, rec->ip);
/* Stop processing */
......@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable)
} while_for_each_ftrace_rec();
}
struct ftrace_rec_iter {
struct ftrace_page *pg;
int index;
};
/**
* ftrace_rec_iter_start, start up iterating over traced functions
*
* Returns an iterator handle that is used to iterate over all
* the records that represent address locations where functions
* are traced.
*
* May return NULL if no records are available.
*/
struct ftrace_rec_iter *ftrace_rec_iter_start(void)
{
/*
* We only use a single iterator.
* Protected by the ftrace_lock mutex.
*/
static struct ftrace_rec_iter ftrace_rec_iter;
struct ftrace_rec_iter *iter = &ftrace_rec_iter;
iter->pg = ftrace_pages_start;
iter->index = 0;
/* Could have empty pages */
while (iter->pg && !iter->pg->index)
iter->pg = iter->pg->next;
if (!iter->pg)
return NULL;
return iter;
}
/**
* ftrace_rec_iter_next, get the next record to process.
* @iter: The handle to the iterator.
*
* Returns the next iterator after the given iterator @iter.
*/
struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
{
iter->index++;
if (iter->index >= iter->pg->index) {
iter->pg = iter->pg->next;
iter->index = 0;
/* Could have empty pages */
while (iter->pg && !iter->pg->index)
iter->pg = iter->pg->next;
}
if (!iter->pg)
return NULL;
return iter;
}
/**
* ftrace_rec_iter_record, get the record at the iterator location
* @iter: The current iterator location
*
* Returns the record that the current @iter is at.
*/
struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
{
return &iter->pg->records[iter->index];
}
static int
ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
......@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
/*
* Do not call function tracer while we update the code.
* We are in stop machine, no worrying about races.
*/
function_trace_stop++;
if (*command & FTRACE_ENABLE_CALLS)
if (*command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
......@@ -1636,19 +1769,31 @@ static int __ftrace_modify_code(void *data)
else if (*command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
/*
* For archs that call ftrace_test_stop_func(), we must
* wait till after we update all the function callers
* before we update the callback. This keeps different
* ops that record different functions from corrupting
* each other.
return 0;
}
/**
* ftrace_run_stop_machine, go back to the stop machine method
* @command: The command to tell ftrace what to do
*
* If an arch needs to fall back to the stop machine method, the
* it can call this function.
*/
__ftrace_trace_function = __ftrace_trace_function_delay;
#endif
function_trace_stop--;
void ftrace_run_stop_machine(int command)
{
stop_machine(__ftrace_modify_code, &command, NULL);
}
return 0;
/**
* arch_ftrace_update_code, modify the code to trace or not trace
* @command: The command that needs to be done
*
* Archs can override this function if it does not need to
* run stop_machine() to modify code.
*/
void __weak arch_ftrace_update_code(int command)
{
ftrace_run_stop_machine(command);
}
static void ftrace_run_update_code(int command)
......@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command)
FTRACE_WARN_ON(ret);
if (ret)
return;
/*
* Do not call function tracer while we update the code.
* We are in stop machine.
*/
function_trace_stop++;
stop_machine(__ftrace_modify_code, &command, NULL);
/*
* By default we use stop_machine() to modify the code.
* But archs can do what ever they want as long as it
* is safe. The stop_machine() is the safest, but also
* produces the most overhead.
*/
arch_ftrace_update_code(command);
#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
/*
* For archs that call ftrace_test_stop_func(), we must
* wait till after we update all the function callers
* before we update the callback. This keeps different
* ops that record different functions from corrupting
* each other.
*/
__ftrace_trace_function = __ftrace_trace_function_delay;
#endif
function_trace_stop--;
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
......@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return -ENODEV;
ftrace_start_up++;
command |= FTRACE_ENABLE_CALLS;
command |= FTRACE_UPDATE_CALLS;
/* ops marked global share the filter hashes */
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
......@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
if (ops != &global_ops || !global_start_up)
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
command |= FTRACE_UPDATE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
......@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
if (ftrace_start_up)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
ftrace_run_update_code(FTRACE_UPDATE_CALLS);
}
static void ftrace_shutdown_sysctl(void)
......@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
struct ftrace_hash *hash;
hash = ops->filter_hash;
return !!(!hash || !hash->count);
return ftrace_hash_empty(hash);
}
static int ftrace_update_code(struct module *mod)
{
struct ftrace_page *pg;
struct dyn_ftrace *p;
cycle_t start, stop;
unsigned long ref = 0;
int i;
/*
* When adding a module, we need to check if tracers are
......@@ -1817,25 +1986,22 @@ static int ftrace_update_code(struct module *mod)
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
while (ftrace_new_addrs) {
for (pg = ftrace_new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
/* If something went wrong, bail without enabling anything */
if (unlikely(ftrace_disabled))
return -1;
p = ftrace_new_addrs;
ftrace_new_addrs = p->newlist;
p = &pg->records[i];
p->flags = ref;
/*
* Do the initial record conversion from mcount jump
* to the NOP instructions.
*/
if (!ftrace_code_disable(mod, p)) {
ftrace_free_rec(p);
/* Game over */
if (!ftrace_code_disable(mod, p))
break;
}
ftrace_update_cnt++;
......@@ -1850,13 +2016,14 @@ static int ftrace_update_code(struct module *mod)
*/
if (ftrace_start_up && ref) {
int failed = __ftrace_replace_code(p, 1);
if (failed) {
if (failed)
ftrace_bug(failed, p->ip);
ftrace_free_rec(p);
}
}
}
ftrace_new_pgs = NULL;
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
......@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
return 0;
}
static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
struct ftrace_page *pg;
int order;
int cnt;
int i;
/* allocate a few pages */
ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
if (!ftrace_pages_start)
return -1;
if (WARN_ON(!count))
return -EINVAL;
order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
/*
* Allocate a few more pages.
*
* TODO: have some parser search vmlinux before
* final linking to find all calls to ftrace.
* Then we can:
* a) know how many pages to allocate.
* and/or
* b) set up the table then.
*
* The dynamic code is still necessary for
* modules.
* We want to fill as much as possible. No more than a page
* may be empty.
*/
while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
order--;
pg = ftrace_pages = ftrace_pages_start;
again:
pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld entries in %d pages\n",
num_to_init, cnt + 1);
if (!pg->records) {
/* if we can't allocate this size, try something smaller */
if (!order)
return -ENOMEM;
order >>= 1;
goto again;
}
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
pg->size = cnt;
/* If we fail, we'll try later anyway */
if (!pg->next)
if (cnt > count)
cnt = count;
return cnt;
}
static struct ftrace_page *
ftrace_allocate_pages(unsigned long num_to_init)
{
struct ftrace_page *start_pg;
struct ftrace_page *pg;
int order;
int cnt;
if (!num_to_init)
return 0;
start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg)
return NULL;
/*
* Try to allocate as much as possible in one continues
* location that fills in all of the space. We want to
* waste as little space as possible.
*/
for (;;) {
cnt = ftrace_allocate_records(pg, num_to_init);
if (cnt < 0)
goto free_pages;
num_to_init -= cnt;
if (!num_to_init)
break;
pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg->next)
goto free_pages;
pg = pg->next;
}
return 0;
return start_pg;
free_pages:
while (start_pg) {
order = get_count_order(pg->size / ENTRIES_PER_PAGE);
free_pages((unsigned long)pg->records, order);
start_pg = pg->next;
kfree(pg);
pg = start_pg;
}
pr_info("ftrace: FAILED to allocate memory for functions\n");
return NULL;
}
enum {
FTRACE_ITER_FILTER = (1 << 0),
FTRACE_ITER_NOTRACE = (1 << 1),
FTRACE_ITER_PRINTALL = (1 << 2),
FTRACE_ITER_HASH = (1 << 3),
FTRACE_ITER_ENABLED = (1 << 4),
};
static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
{
int cnt;
if (!num_to_init) {
pr_info("ftrace: No functions to be traced?\n");
return -1;
}
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld entries in %d pages\n",
num_to_init, cnt + 1);
return 0;
}
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
......@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l;
if (!(iter->flags & FTRACE_ITER_DO_HASH))
return NULL;
if (iter->func_pos > *pos)
return NULL;
......@@ -2023,7 +2244,7 @@ static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
struct ftrace_ops *ops = &global_ops;
struct ftrace_ops *ops = iter->ops;
struct dyn_ftrace *rec = NULL;
if (unlikely(ftrace_disabled))
......@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
} else {
rec = &iter->pg->records[iter->idx++];
if ((rec->flags & FTRACE_FL_FREE) ||
((iter->flags & FTRACE_ITER_FILTER) &&
if (((iter->flags & FTRACE_ITER_FILTER) &&
!(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
((iter->flags & FTRACE_ITER_NOTRACE) &&
......@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
struct ftrace_ops *ops = &global_ops;
struct ftrace_ops *ops = iter->ops;
void *p = NULL;
loff_t l;
......@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
* off, we can short cut and just print out that all
* functions are enabled.
*/
if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
if (iter->flags & FTRACE_ITER_FILTER &&
ftrace_hash_empty(ops->filter_hash)) {
if (*pos > 0)
return t_hash_start(m, pos);
iter->flags |= FTRACE_ITER_PRINTALL;
......@@ -2126,13 +2346,9 @@ static void *t_start(struct seq_file *m, loff_t *pos)
break;
}
if (!p) {
if (iter->flags & FTRACE_ITER_FILTER)
if (!p)
return t_hash_start(m, pos);
return NULL;
}
return iter;
}
......@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
......@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
iter->pg = ftrace_pages_start;
iter->flags = FTRACE_ITER_ENABLED;
iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
......@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
mutex_unlock(&ftrace_lock);
}
static int
/**
* ftrace_regex_open - initialize function tracer filter files
* @ops: The ftrace_ops that hold the hash filters
* @flag: The type of filter to process
* @inode: The inode, usually passed in to your open routine
* @file: The file, usually passed in to your open routine
*
* ftrace_regex_open() initializes the filter files for the
* @ops. Depending on @flag it may process the filter hash or
* the notrace hash of @ops. With this called from the open
* routine, you can use ftrace_filter_write() for the write
* routine if @flag has FTRACE_ITER_FILTER set, or
* ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
* ftrace_regex_lseek() should be used as the lseek routine, and
* release must call ftrace_regex_release().
*/
int
ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct inode *inode, struct file *file)
{
......@@ -2306,7 +2540,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
static int
ftrace_filter_open(struct inode *inode, struct file *file)
{
return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
return ftrace_regex_open(&global_ops,
FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
inode, file);
}
......@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
inode, file);
}
static loff_t
loff_t
ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
loff_t ret;
......@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
goto out_unlock;
do_for_each_ftrace_rec(pg, rec) {
if (ftrace_match_record(rec, mod, search, search_len, type)) {
ret = enter_record(hash, rec, not);
if (ret < 0) {
......@@ -2871,14 +3105,14 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
return ret;
}
static ssize_t
ssize_t
ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
}
static ssize_t
ssize_t
ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
......@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
&& ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
......@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static void __init
set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
void __init
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
{
char *func;
......@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
static void __init set_ftrace_early_filters(void)
{
if (ftrace_filter_buf[0])
set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
if (ftrace_notrace_buf[0])
set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (ftrace_graph_buf[0])
set_ftrace_early_graph(ftrace_graph_buf);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
}
static int
ftrace_regex_release(struct inode *inode, struct file *file)
int ftrace_regex_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
struct ftrace_iterator *iter;
......@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash, iter->hash);
if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
&& ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
}
......@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
do_for_each_ftrace_rec(pg, rec) {
if (rec->flags & FTRACE_FL_FREE)
continue;
if (ftrace_match_record(rec, NULL, search, search_len, type)) {
/* if it is in the array */
exists = false;
......@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
static void ftrace_swap_recs(void *a, void *b, int size)
{
struct dyn_ftrace *reca = a;
struct dyn_ftrace *recb = b;
struct dyn_ftrace t;
t = *reca;
*reca = *recb;
*recb = t;
}
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
struct ftrace_page *pg;
unsigned long count;
unsigned long *p;
unsigned long addr;
unsigned long flags = 0; /* Shut up gcc */
int ret = -ENOMEM;
count = end - start;
if (!count)
return 0;
pg = ftrace_allocate_pages(count);
if (!pg)
return -ENOMEM;
mutex_lock(&ftrace_lock);
/*
* Core and each module needs their own pages, as
* modules will free them when they are removed.
* Force a new page to be allocated for modules.
*/
if (!mod) {
WARN_ON(ftrace_pages || ftrace_pages_start);
/* First initialization */
ftrace_pages = ftrace_pages_start = pg;
} else {
if (!ftrace_pages)
goto out;
if (WARN_ON(ftrace_pages->next)) {
/* Hmm, we have free pages? */
while (ftrace_pages->next)
ftrace_pages = ftrace_pages->next;
}
ftrace_pages->next = pg;
ftrace_pages = pg;
}
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
......@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!addr)
continue;
ftrace_record_ip(addr);
if (!ftrace_record_ip(addr))
break;
}
/* These new locations need to be initialized */
ftrace_new_pgs = pg;
/* Make each individual set of pages sorted by ips */
for (; pg; pg = pg->next)
sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
ftrace_cmp_recs, ftrace_swap_recs);
/*
* We only need to disable interrupts on start up
* because we are modifying code that an interrupt
......@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
ftrace_update_code(mod);
if (!mod)
local_irq_restore(flags);
ret = 0;
out:
mutex_unlock(&ftrace_lock);
return 0;
return ret;
}
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
void ftrace_release_mod(struct module *mod)
{
struct dyn_ftrace *rec;
struct ftrace_page **last_pg;
struct ftrace_page *pg;
int order;
mutex_lock(&ftrace_lock);
if (ftrace_disabled)
goto out_unlock;
do_for_each_ftrace_rec(pg, rec) {
/*
* Each module has its own ftrace_pages, remove
* them from the list.
*/
last_pg = &ftrace_pages_start;
for (pg = ftrace_pages_start; pg; pg = *last_pg) {
rec = &pg->records[0];
if (within_module_core(rec->ip, mod)) {
/*
* rec->ip is changed in ftrace_free_rec()
* It should not between s and e if record was freed.
* As core pages are first, the first
* page should never be a module page.
*/
FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
ftrace_free_rec(rec);
if (WARN_ON(pg == ftrace_pages_start))
goto out_unlock;
/* Check if we are deleting the last page */
if (pg == ftrace_pages)
ftrace_pages = next_to_ftrace_page(last_pg);
*last_pg = pg->next;
order = get_count_order(pg->size / ENTRIES_PER_PAGE);
free_pages((unsigned long)pg->records, order);
kfree(pg);
} else
last_pg = &pg->next;
}
} while_for_each_ftrace_rec();
out_unlock:
mutex_unlock(&ftrace_lock);
}
......
......@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
return -ENOMEM;
}
static int create_filter_start(char *filter_str, bool set_str,
struct filter_parse_state **psp,
struct event_filter **filterp)
{
struct event_filter *filter;
struct filter_parse_state *ps = NULL;
int err = 0;
WARN_ON_ONCE(*psp || *filterp);
/* allocate everything, and if any fails, free all and fail */
filter = __alloc_filter();
if (filter && set_str)
err = replace_filter_string(filter, filter_str);
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!filter || !ps || err) {
kfree(ps);
__free_filter(filter);
return -ENOMEM;
}
/* we're committed to creating a new filter */
*filterp = filter;
*psp = ps;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err && set_str)
append_filter_err(ps, filter);
return err;
}
static void create_filter_finish(struct filter_parse_state *ps)
{
if (ps) {
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
}
}
/**
* create_filter - create a filter for a ftrace_event_call
* @call: ftrace_event_call to create a filter for
* @filter_str: filter string
* @set_str: remember @filter_str and enable detailed error in filter
* @filterp: out param for created filter (always updated on return)
*
* Creates a filter for @call with @filter_str. If @set_str is %true,
* @filter_str is copied and recorded in the new filter.
*
* On success, returns 0 and *@filterp points to the new filter. On
* failure, returns -errno and *@filterp may point to %NULL or to a new
* filter. In the latter case, the returned filter contains error
* information if @set_str is %true and the caller is responsible for
* freeing it.
*/
static int create_filter(struct ftrace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
struct event_filter *filter = NULL;
struct filter_parse_state *ps = NULL;
int err;
err = create_filter_start(filter_str, set_str, &ps, &filter);
if (!err) {
err = replace_preds(call, filter, ps, filter_str, false);
if (err && set_str)
append_filter_err(ps, filter);
}
create_filter_finish(ps);
*filterp = filter;
return err;
}
/**
* create_system_filter - create a filter for an event_subsystem
* @system: event_subsystem to create a filter for
* @filter_str: filter string
* @filterp: out param for created filter (always updated on return)
*
* Identical to create_filter() except that it creates a subsystem filter
* and always remembers @filter_str.
*/
static int create_system_filter(struct event_subsystem *system,
char *filter_str, struct event_filter **filterp)
{
struct event_filter *filter = NULL;
struct filter_parse_state *ps = NULL;
int err;
err = create_filter_start(filter_str, true, &ps, &filter);
if (!err) {
err = replace_system_preds(system, ps, filter_str);
if (!err) {
/* System filters just show a default message */
kfree(filter->filter_string);
filter->filter_string = NULL;
} else {
append_filter_err(ps, filter);
}
}
create_filter_finish(ps);
*filterp = filter;
return err;
}
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
struct filter_parse_state *ps;
struct event_filter *filter;
struct event_filter *tmp;
int err = 0;
mutex_lock(&event_mutex);
......@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out_unlock;
}
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto out_unlock;
filter = __alloc_filter();
if (!filter) {
kfree(ps);
goto out_unlock;
}
replace_filter_string(filter, filter_string);
err = create_filter(call, filter_string, true, &filter);
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err) {
append_filter_err(ps, filter);
goto out;
}
err = replace_preds(call, filter, ps, filter_string, false);
if (err) {
filter_disable(call);
append_filter_err(ps, filter);
} else
call->flags |= TRACE_EVENT_FL_FILTERED;
out:
/*
* Always swap the call filter with the new filter
* even if there was an error. If there was an error
* in the filter, we disable the filter and show the error
* string
*/
tmp = call->filter;
if (filter) {
struct event_filter *tmp = call->filter;
if (!err)
call->flags |= TRACE_EVENT_FL_FILTERED;
else
filter_disable(call);
rcu_assign_pointer(call->filter, filter);
if (tmp) {
/* Make sure the call is done with the filter */
synchronize_sched();
__free_filter(tmp);
}
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
}
out_unlock:
mutex_unlock(&event_mutex);
......@@ -1811,7 +1902,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
struct filter_parse_state *ps;
struct event_filter *filter;
int err = 0;
......@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out_unlock;
}
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto out_unlock;
filter = __alloc_filter();
if (!filter)
goto out;
/* System filters just show a default message */
kfree(filter->filter_string);
filter->filter_string = NULL;
err = create_system_filter(system, filter_string, &filter);
if (filter) {
/*
* No event actually uses the system filter
* we can free it without synchronize_sched().
*/
__free_filter(system->filter);
system->filter = filter;
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err)
goto err_filter;
err = replace_system_preds(system, ps, filter_string);
if (err)
goto err_filter;
out:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
}
out_unlock:
mutex_unlock(&event_mutex);
return err;
err_filter:
replace_filter_string(filter, filter_string);
append_filter_err(ps, system->filter);
goto out;
}
#ifdef CONFIG_PERF_EVENTS
......@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
{
int err;
struct event_filter *filter;
struct filter_parse_state *ps;
struct ftrace_event_call *call;
mutex_lock(&event_mutex);
......@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter)
goto out_unlock;
filter = __alloc_filter();
if (!filter) {
err = PTR_ERR(filter);
goto out_unlock;
}
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_filter;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
err = create_filter(call, filter_str, false, &filter);
if (!err)
event->filter = filter;
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_filter:
if (err)
else
__free_filter(filter);
out_unlock:
......@@ -1954,43 +1991,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
#define CREATE_TRACE_POINTS
#include "trace_events_filter_test.h"
static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
struct event_filter **pfilter)
{
struct event_filter *filter;
struct filter_parse_state *ps;
int err = -ENOMEM;
filter = __alloc_filter();
if (!filter)
goto out;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_filter;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
if (!err)
*pfilter = filter;
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_filter:
if (err)
__free_filter(filter);
out:
return err;
}
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
{ \
.filter = FILTER, \
......@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
struct test_filter_data_t *d = &test_filter_data[i];
int err;
err = test_get_filter(d->filter, &event_ftrace_test_filter,
&filter);
err = create_filter(&event_ftrace_test_filter, d->filter,
false, &filter);
if (err) {
printk(KERN_INFO
"Failed to get filter for '%s', err %d\n",
d->filter, err);
__free_filter(filter);
break;
}
......
......@@ -13,6 +13,9 @@
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/setup.h>
#include "trace.h"
#define STACK_TRACE_ENTRIES 500
......@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
.flags = FTRACE_OPS_FL_GLOBAL,
};
static ssize_t
......@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release,
};
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
inode, file);
}
static const struct file_operations stack_trace_filter_fops = {
.open = stack_trace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
.llseek = ftrace_regex_lseek,
.release = ftrace_regex_release,
};
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
......@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
return ret;
}
static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
static __init int enable_stacktrace(char *str)
{
if (strncmp(str, "_filter=", 8) == 0)
strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1;
return 1;
......@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
trace_create_file("stack_trace_filter", 0444, d_tracer,
NULL, &stack_trace_filter_fops);
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
......
......@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */
succeed_file();
}
if (w(txthdr->sh_type) != SHT_PROGBITS ||
!(w(txthdr->sh_flags) & SHF_EXECINSTR))
!(_w(txthdr->sh_flags) & SHF_EXECINSTR))
return NULL;
return txtname;
}
......
......@@ -21,6 +21,8 @@ EVENT MODIFIERS
Events can optionally have a modifer by appending a colon and one or
more modifiers. Modifiers allow the user to restrict when events are
counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
for host counting (not in KVM guests).
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier is currently only implemented for
......
tools/perf
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
include/linux/list.h
......
......@@ -235,7 +235,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
}
static const char * const annotate_usage[] = {
"perf annotate [<options>] <command>",
"perf annotate [<options>]",
NULL
};
......@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
annotate.sym_hist_filter = argv[0];
}
if (field_sep && *field_sep == '.') {
pr_err("'.' is the only non valid --field-separator argument\n");
return -1;
}
return __cmd_annotate(&annotate);
}
......@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
continue;
cpunode_map[cpu] = mem;
}
closedir(dir2);
}
closedir(dir1);
}
static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
......@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
break;
if (sort_dimension__add(tok, sort_list) < 0) {
error("Unknown --sort key: '%s'", tok);
free(str);
return -1;
}
}
......
......@@ -22,9 +22,6 @@
static const char *file_name;
static char name_buffer[256];
bool perf_host = 1;
bool perf_guest;
static const char * const kvm_usage[] = {
"perf kvm [<options>] {top|record|report|diff|buildid-list}",
NULL
......@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
int cmd_kvm(int argc, const char **argv, const char *prefix __used)
{
perf_host = perf_guest = 0;
perf_host = 0;
perf_guest = 1;
argc = parse_options(argc, argv, kvm_options, kvm_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
......
......@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
__script_root = get_script_root(&script_dirent, suffix);
if (__script_root && !strcmp(script_root, __script_root)) {
free(__script_root);
closedir(lang_dir);
closedir(scripts_dir);
snprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent.d_name);
return strdup(script_path);
}
free(__script_root);
}
closedir(lang_dir);
}
closedir(scripts_dir);
return NULL;
}
......
......@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
NULL,
};
const struct option test_options[] = {
OPT_INTEGER('v', "verbose", &verbose,
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_END()
};
......
......@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
if (he == NULL)
return NULL;
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
return he;
}
......@@ -889,6 +888,10 @@ static void perf_top__start_counters(struct perf_top *top)
ui__warning("The %s event is not supported.\n",
event_name(counter));
goto out_err;
} else if (err == EMFILE) {
ui__warning("Too many events are opened.\n"
"Try again after reducing the number of events\n");
goto out_err;
}
ui__warning("The sys_perf_event_open() syscall "
......
......@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
struct perf_evsel *evsel;
event_attr_init(&attr);
evsel = perf_evsel__new(&attr, 0);
if (evsel == NULL)
goto error;
......
......@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
}
}
static void hist_entry__add_cpumode_period(struct hist_entry *self,
static void hist_entry__add_cpumode_period(struct hist_entry *he,
unsigned int cpumode, u64 period)
{
switch (cpumode) {
case PERF_RECORD_MISC_KERNEL:
self->period_sys += period;
he->period_sys += period;
break;
case PERF_RECORD_MISC_USER:
self->period_us += period;
he->period_us += period;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
self->period_guest_sys += period;
he->period_guest_sys += period;
break;
case PERF_RECORD_MISC_GUEST_USER:
self->period_guest_us += period;
he->period_guest_us += period;
break;
default:
break;
......@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
static struct hist_entry *hist_entry__new(struct hist_entry *template)
{
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
if (self != NULL) {
*self = *template;
self->nr_events = 1;
if (self->ms.map)
self->ms.map->referenced = true;
if (he != NULL) {
*he = *template;
he->nr_events = 1;
if (he->ms.map)
he->ms.map->referenced = true;
if (symbol_conf.use_callchain)
callchain_init(self->callchain);
callchain_init(he->callchain);
}
return self;
return he;
}
static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
......@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
return ret;
}
static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
u64 total_samples, int left_margin)
static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
u64 total_samples, int left_margin,
FILE *fp)
{
struct rb_node *rb_node;
struct callchain_node *chain;
size_t ret = 0;
u32 entries_printed = 0;
rb_node = rb_first(&self->sorted_chain);
rb_node = rb_first(&he->sorted_chain);
while (rb_node) {
double percent;
......@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
}
}
static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
size_t size, struct hists *pair_hists,
bool show_displacement, long displacement,
bool color, u64 session_total)
bool color, u64 total_period)
{
u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
u64 nr_events;
const char *sep = symbol_conf.field_sep;
int ret;
if (symbol_conf.exclude_other && !self->parent)
if (symbol_conf.exclude_other && !he->parent)
return 0;
if (pair_hists) {
period = self->pair ? self->pair->period : 0;
nr_events = self->pair ? self->pair->nr_events : 0;
period = he->pair ? he->pair->period : 0;
nr_events = he->pair ? he->pair->nr_events : 0;
total = pair_hists->stats.total_period;
period_sys = self->pair ? self->pair->period_sys : 0;
period_us = self->pair ? self->pair->period_us : 0;
period_guest_sys = self->pair ? self->pair->period_guest_sys : 0;
period_guest_us = self->pair ? self->pair->period_guest_us : 0;
period_sys = he->pair ? he->pair->period_sys : 0;
period_us = he->pair ? he->pair->period_us : 0;
period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
period_guest_us = he->pair ? he->pair->period_guest_us : 0;
} else {
period = self->period;
nr_events = self->nr_events;
total = session_total;
period_sys = self->period_sys;
period_us = self->period_us;
period_guest_sys = self->period_guest_sys;
period_guest_us = self->period_guest_us;
period = he->period;
nr_events = he->nr_events;
total = total_period;
period_sys = he->period_sys;
period_us = he->period_us;
period_guest_sys = he->period_guest_sys;
period_guest_us = he->period_guest_us;
}
if (total) {
......@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
if (total > 0)
old_percent = (period * 100.0) / total;
if (session_total > 0)
new_percent = (self->period * 100.0) / session_total;
if (total_period > 0)
new_percent = (he->period * 100.0) / total_period;
diff = new_percent - old_percent;
......@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
return ret;
}
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
struct hists *pair_hists, bool show_displacement,
long displacement, FILE *fp, u64 session_total)
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
struct hists *hists, struct hists *pair_hists,
bool show_displacement, long displacement,
u64 total_period, FILE *fp)
{
char bf[512];
int ret;
......@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
show_displacement, displacement,
true, session_total);
true, total_period);
hist_entry__snprintf(he, bf + ret, size - ret, hists);
return fprintf(fp, "%s\n", bf);
}
static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
struct hists *hists, FILE *fp,
u64 session_total)
static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
struct hists *hists,
u64 total_period, FILE *fp)
{
int left_margin = 0;
......@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
typeof(*se), list);
left_margin = hists__col_len(hists, se->se_width_idx);
left_margin -= thread__comm_len(self->thread);
left_margin -= thread__comm_len(he->thread);
}
return hist_entry_callchain__fprintf(fp, self, session_total,
left_margin);
return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
}
size_t hists__fprintf(struct hists *hists, struct hists *pair,
......@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
struct sort_entry *se;
struct rb_node *nd;
size_t ret = 0;
u64 total_period;
unsigned long position = 1;
long displacement = 0;
unsigned int width;
......@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
if (symbol_conf.show_nr_samples) {
if (sep)
fprintf(fp, "%cSamples", *sep);
else
fputs(" Samples ", fp);
}
if (symbol_conf.show_total_period) {
if (sep)
ret += fprintf(fp, "%cPeriod", *sep);
else
ret += fprintf(fp, " Period ");
}
if (symbol_conf.show_cpu_utilization) {
if (sep) {
ret += fprintf(fp, "%csys", *sep);
......@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
}
}
if (symbol_conf.show_nr_samples) {
if (sep)
fprintf(fp, "%cSamples", *sep);
else
fputs(" Samples ", fp);
}
if (symbol_conf.show_total_period) {
if (sep)
ret += fprintf(fp, "%cPeriod", *sep);
else
ret += fprintf(fp, " Period ");
}
if (pair) {
if (sep)
ret += fprintf(fp, "%cDelta", *sep);
......@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
goto print_entries;
fprintf(fp, "# ........");
if (symbol_conf.show_cpu_utilization)
fprintf(fp, " ....... .......");
if (symbol_conf.show_nr_samples)
fprintf(fp, " ..........");
if (symbol_conf.show_total_period)
......@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
goto out;
print_entries:
total_period = hists->stats.total_period;
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
......@@ -1040,11 +1046,10 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
++position;
}
ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
displacement, fp, hists->stats.total_period);
displacement, total_period, fp);
if (symbol_conf.use_callchain)
ret += hist_entry__fprintf_callchain(h, hists, fp,
hists->stats.total_period);
ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
if (max_rows && ++nr_rows >= max_rows)
goto out;
......
......@@ -66,11 +66,8 @@ struct hists {
struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al,
struct symbol *parent, u64 period);
extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
struct hists *pair_hists, bool show_displacement,
long displacement, FILE *fp, u64 session_total);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
struct hists *hists);
void hist_entry__free(struct hist_entry *);
......
......@@ -735,8 +735,8 @@ static int
parse_event_modifier(const char **strp, struct perf_event_attr *attr)
{
const char *str = *strp;
int exclude = 0;
int eu = 0, ek = 0, eh = 0, precise = 0;
int exclude = 0, exclude_GH = 0;
int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
if (!*str)
return 0;
......@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
if (!exclude)
exclude = eu = ek = eh = 1;
eh = 0;
} else if (*str == 'G') {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eG = 0;
} else if (*str == 'H') {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eH = 0;
} else if (*str == 'p') {
precise++;
} else
......@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
attr->exclude_kernel = ek;
attr->exclude_hv = eh;
attr->precise_ip = precise;
attr->exclude_host = eH;
attr->exclude_guest = eG;
return 0;
}
......@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
for (;;) {
ostr = str;
memset(&attr, 0, sizeof(attr));
event_attr_init(&attr);
ret = parse_event_symbols(evlist, &str, &attr);
if (ret == EVT_FAILED)
return -1;
......
......@@ -18,7 +18,6 @@
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <ctype.h>
#include "util.h"
#include <dirent.h>
#include <mntent.h>
......
#include "../perf.h"
#include "util.h"
#include <sys/mman.h>
/*
* XXX We need to find a better place for these things...
*/
bool perf_host = true;
bool perf_guest = true;
void event_attr_init(struct perf_event_attr *attr)
{
if (!perf_host)
attr->exclude_host = 1;
if (!perf_guest)
attr->exclude_guest = 1;
}
int mkdir_p(char *path, mode_t mode)
{
struct stat st;
......
......@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
unsigned long convert_unit(unsigned long value, char *unit);
int readn(int fd, void *buf, size_t size);
struct perf_event_attr;
void event_attr_init(struct perf_event_attr *attr);
#define _STR(x) #x
#define STR(x) _STR(x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment