Commit 40b46a7d authored by H. Peter Anvin's avatar H. Peter Anvin

Merge remote-tracking branch 'rostedt/tip/perf/urgent-2' into x86-urgent-for-linus

parents bad1a753 5963e317
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern void mcount(void); extern void mcount(void);
extern int modifying_ftrace_code; extern atomic_t modifying_ftrace_code;
static inline unsigned long ftrace_call_adjust(unsigned long addr) static inline unsigned long ftrace_call_adjust(unsigned long addr)
{ {
......
...@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) ...@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
} }
static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
void debug_stack_set_zero(void) void debug_stack_set_zero(void)
{ {
this_cpu_inc(debug_stack_use_ctr);
load_idt((const struct desc_ptr *)&nmi_idt_descr); load_idt((const struct desc_ptr *)&nmi_idt_descr);
} }
void debug_stack_reset(void) void debug_stack_reset(void)
{ {
load_idt((const struct desc_ptr *)&idt_descr); if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
return;
if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
load_idt((const struct desc_ptr *)&idt_descr);
} }
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
......
...@@ -190,6 +190,44 @@ ENDPROC(native_usergs_sysret64) ...@@ -190,6 +190,44 @@ ENDPROC(native_usergs_sysret64)
#endif #endif
.endm .endm
/*
* When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update
* all the code, sync CPUs, then remove the breakpoints. In this time
* if lockdep is enabled, it might jump back into the debug handler
* outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
*
* We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
* make sure the stack pointer does not get reset back to the top
* of the debug stack, and instead just reuses the current stack.
*/
#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
.macro TRACE_IRQS_OFF_DEBUG
call debug_stack_set_zero
TRACE_IRQS_OFF
call debug_stack_reset
.endm
.macro TRACE_IRQS_ON_DEBUG
call debug_stack_set_zero
TRACE_IRQS_ON
call debug_stack_reset
.endm
.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON_DEBUG
1:
.endm
#else
# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
#endif
/* /*
* C code is not supposed to know about undefined top of stack. Every time * C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based * a C function with an pt_regs argument is called from the SYSCALL based
...@@ -1098,7 +1136,7 @@ ENTRY(\sym) ...@@ -1098,7 +1136,7 @@ ENTRY(\sym)
subq $ORIG_RAX-R15, %rsp subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid call save_paranoid
TRACE_IRQS_OFF TRACE_IRQS_OFF_DEBUG
movq %rsp,%rdi /* pt_regs pointer */ movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */ xorl %esi,%esi /* no error code */
subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
...@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) ...@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
ENTRY(paranoid_exit) ENTRY(paranoid_exit)
DEFAULT_FRAME DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */ testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore jnz paranoid_restore
testl $3,CS(%rsp) testl $3,CS(%rsp)
...@@ -1404,7 +1442,7 @@ paranoid_swapgs: ...@@ -1404,7 +1442,7 @@ paranoid_swapgs:
RESTORE_ALL 8 RESTORE_ALL 8
jmp irq_return jmp irq_return
paranoid_restore: paranoid_restore:
TRACE_IRQS_IRETQ 0 TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8 RESTORE_ALL 8
jmp irq_return jmp irq_return
paranoid_userspace: paranoid_userspace:
......
...@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) ...@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
} }
static int static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code, ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code) unsigned const char *new_code)
{ {
unsigned char replaced[MCOUNT_INSN_SIZE]; unsigned char replaced[MCOUNT_INSN_SIZE];
...@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, ...@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
old = ftrace_call_replace(ip, addr); old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace(); new = ftrace_nop_replace();
return ftrace_modify_code(rec->ip, old, new); /*
* On boot up, and when modules are loaded, the MCOUNT_ADDR
* is converted to a nop, and will never become MCOUNT_ADDR
* again. This code is either running before SMP (on boot up)
* or before the code will ever be executed (module load).
* We do not want to use the breakpoint version in this case,
* just modify the code directly.
*/
if (addr == MCOUNT_ADDR)
return ftrace_modify_code_direct(rec->ip, old, new);
/* Normal cases use add_brk_on_nop */
WARN_ONCE(1, "invalid use of ftrace_make_nop");
return -EINVAL;
} }
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
...@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ...@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
old = ftrace_nop_replace(); old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr); new = ftrace_call_replace(ip, addr);
return ftrace_modify_code(rec->ip, old, new); /* Should only be called when module is loaded */
return ftrace_modify_code_direct(rec->ip, old, new);
} }
/*
* The modifying_ftrace_code is used to tell the breakpoint
* handler to call ftrace_int3_handler(). If it fails to
* call this handler for a breakpoint added by ftrace, then
* the kernel may crash.
*
* As atomic_writes on x86 do not need a barrier, we do not
* need to add smp_mb()s for this to work. It is also considered
* that we can not read the modifying_ftrace_code before
* executing the breakpoint. That would be quite remarkable if
* it could do that. Here's the flow that is required:
*
* CPU-0 CPU-1
*
* atomic_inc(mfc);
* write int3s
* <trap-int3> // implicit (r)mb
* if (atomic_read(mfc))
* call ftrace_int3_handler()
*
* Then when we are finished:
*
* atomic_dec(mfc);
*
* If we hit a breakpoint that was not set by ftrace, it does not
* matter if ftrace_int3_handler() is called or not. It will
* simply be ignored. But it is crucial that a ftrace nop/caller
* breakpoint is handled. No other user should ever place a
* breakpoint on an ftrace nop/caller location. It must only
* be done by this code.
*/
atomic_t modifying_ftrace_code __read_mostly;
static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code);
int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_update_ftrace_func(ftrace_func_t func)
{ {
unsigned long ip = (unsigned long)(&ftrace_call); unsigned long ip = (unsigned long)(&ftrace_call);
...@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ...@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func); new = ftrace_call_replace(ip, (unsigned long)func);
/* See comment above by declaration of modifying_ftrace_code */
atomic_inc(&modifying_ftrace_code);
ret = ftrace_modify_code(ip, old, new); ret = ftrace_modify_code(ip, old, new);
atomic_dec(&modifying_ftrace_code);
return ret; return ret;
} }
int modifying_ftrace_code __read_mostly;
/* /*
* A breakpoint was added to the code address we are about to * A breakpoint was added to the code address we are about to
* modify, and this is the handle that will just skip over it. * modify, and this is the handle that will just skip over it.
...@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) ...@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable)
} }
} }
static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
{
int ret;
ret = add_break(ip, old_code);
if (ret)
goto out;
run_sync();
ret = add_update_code(ip, new_code);
if (ret)
goto fail_update;
run_sync();
ret = ftrace_write(ip, new_code, 1);
if (ret) {
ret = -EPERM;
goto out;
}
run_sync();
out:
return ret;
fail_update:
probe_kernel_write((void *)ip, &old_code[0], 1);
goto out;
}
void arch_ftrace_update_code(int command) void arch_ftrace_update_code(int command)
{ {
modifying_ftrace_code++; /* See comment above by declaration of modifying_ftrace_code */
atomic_inc(&modifying_ftrace_code);
ftrace_modify_all_code(command); ftrace_modify_all_code(command);
modifying_ftrace_code--; atomic_dec(&modifying_ftrace_code);
} }
int __init ftrace_dyn_arch_init(void *data) int __init ftrace_dyn_arch_init(void *data)
......
...@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) ...@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
*/ */
if (unlikely(is_debug_stack(regs->sp))) { if (unlikely(is_debug_stack(regs->sp))) {
debug_stack_set_zero(); debug_stack_set_zero();
__get_cpu_var(update_debug_stack) = 1; this_cpu_write(update_debug_stack, 1);
} }
} }
static inline void nmi_nesting_postprocess(void) static inline void nmi_nesting_postprocess(void)
{ {
if (unlikely(__get_cpu_var(update_debug_stack))) if (unlikely(this_cpu_read(update_debug_stack))) {
debug_stack_reset(); debug_stack_reset();
this_cpu_write(update_debug_stack, 0);
}
} }
#endif #endif
......
...@@ -303,8 +303,12 @@ do_general_protection(struct pt_regs *regs, long error_code) ...@@ -303,8 +303,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{ {
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
/* ftrace must be first, everything else may cause a recursive crash */ /*
if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) * ftrace must be first, everything else may cause a recursive crash.
* See note by declaration of modifying_ftrace_code in ftrace.c
*/
if (unlikely(atomic_read(&modifying_ftrace_code)) &&
ftrace_int3_handler(regs))
return; return;
#endif #endif
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment