Commit f5c1bb2a authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Peter Zijlstra

x86/calldepth: Add ret/call counting for debug

Add a debuigfs mechanism to validate the accounting, e.g. vs. call/ret
balance and to gather statistics about the stuffing to call ratio.
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.204285506@infradead.org
parent bbaceb18
......@@ -57,6 +57,22 @@
#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
#define RET_DEPTH_CREDIT 0xffffffffffffffffULL
#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS \
incq %gs:__x86_call_count;
# define CALL_THUNKS_DEBUG_INC_RETS \
incq %gs:__x86_ret_count;
# define CALL_THUNKS_DEBUG_INC_STUFFS \
incq %gs:__x86_stuffs_count;
# define CALL_THUNKS_DEBUG_INC_CTXSW \
incq %gs:__x86_ctxsw_count;
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
# define CALL_THUNKS_DEBUG_INC_STUFFS
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif
#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
#include <asm/asm-offsets.h>
......@@ -75,18 +91,23 @@
#define RESET_CALL_DEPTH_FROM_CALL \
mov $0xfc, %rax; \
shl $56, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
sarq $5, %gs:pcpu_hot + X86_call_depth;
sarq $5, %gs:pcpu_hot + X86_call_depth; \
CALL_THUNKS_DEBUG_INC_CALLS
#define ASM_INCREMENT_CALL_DEPTH \
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth);
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
#define CREDIT_CALL_DEPTH
#define ASM_CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
#define INCREMENT_CALL_DEPTH
#define ASM_INCREMENT_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
#endif
......@@ -137,7 +158,8 @@
jnz 771b; \
/* barrier for jnz misprediction */ \
lfence; \
ASM_CREDIT_CALL_DEPTH
ASM_CREDIT_CALL_DEPTH \
CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
* i386 doesn't unconditionally have LFENCE, as such it can't
......@@ -321,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
{
x86_return_thunk = &__x86_return_skl;
}
#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
#else
static inline void x86_set_skl_return_thunk(void) {}
#endif
......
......@@ -2,6 +2,7 @@
#define pr_fmt(fmt) "callthunks: " fmt
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/moduleloader.h>
......@@ -35,6 +36,15 @@ static int __init debug_thunks(char *str)
}
__setup("debug-callthunks", debug_thunks);
#ifdef CONFIG_CALL_THUNKS_DEBUG
DEFINE_PER_CPU(u64, __x86_call_count);
DEFINE_PER_CPU(u64, __x86_ret_count);
DEFINE_PER_CPU(u64, __x86_stuffs_count);
DEFINE_PER_CPU(u64, __x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
EXPORT_SYMBOL_GPL(__x86_call_count);
#endif
extern s32 __call_sites[], __call_sites_end[];
struct thunk_desc {
......@@ -283,3 +293,46 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
mutex_unlock(&text_mutex);
}
#endif /* CONFIG_MODULES */
#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
static int callthunks_debug_show(struct seq_file *m, void *p)
{
unsigned long cpu = (unsigned long)m->private;
seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
per_cpu(__x86_call_count, cpu),
per_cpu(__x86_ret_count, cpu),
per_cpu(__x86_stuffs_count, cpu),
per_cpu(__x86_ctxsw_count, cpu));
return 0;
}
static int callthunks_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, callthunks_debug_show, inode->i_private);
}
static const struct file_operations dfs_ops = {
.open = callthunks_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init callthunks_debugfs_init(void)
{
struct dentry *dir;
unsigned long cpu;
dir = debugfs_create_dir("callthunks", NULL);
for_each_possible_cpu(cpu) {
void *arg = (void *)cpu;
char name [10];
sprintf(name, "cpu%lu", cpu);
debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
}
return 0;
}
__initcall(callthunks_debugfs_init);
#endif
......@@ -203,13 +203,18 @@ EXPORT_SYMBOL(__x86_return_thunk)
.align 64
SYM_FUNC_START(__x86_return_skl)
ANNOTATE_NOENDBR
/* Keep the hotpath in a 16byte I-fetch */
/*
* Keep the hotpath in a 16byte I-fetch for the non-debug
* case.
*/
CALL_THUNKS_DEBUG_INC_RETS
shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
jz 1f
ANNOTATE_UNRET_SAFE
ret
int3
1:
CALL_THUNKS_DEBUG_INC_STUFFS
.rept 16
ANNOTATE_INTRA_FUNCTION_CALL
call 2f
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment