Commit 5645688f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this development cycle were:

   - a large number of call stack dumping/printing improvements: higher
     robustness, better cross-context dumping, improved output, etc.
     (Josh Poimboeuf)

   - vDSO getcpu() performance improvement for future Intel CPUs with
     the RDPID instruction (Andy Lutomirski)

   - add two new Intel AVX512 features and the CPUID support
     infrastructure for it: AVX512IFMA and AVX512VBMI. (Gayatri Kammela,
     He Chen)

   - more copy-user unification (Borislav Petkov)

   - entry code assembly macro simplifications (Alexander Kuleshov)

   - vDSO C/R support improvements (Dmitry Safonov)

   - misc fixes and cleanups (Borislav Petkov, Paul Bolle)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
  scripts/decode_stacktrace.sh: Fix address line detection on x86
  x86/boot/64: Use defines for page size
  x86/dumpstack: Make stack name tags more comprehensible
  selftests/x86: Add test_vdso to test getcpu()
  x86/vdso: Use RDPID in preference to LSL when available
  x86/dumpstack: Handle NULL stack pointer in show_trace_log_lvl()
  x86/cpufeatures: Enable new AVX512 cpu features
  x86/cpuid: Provide get_scattered_cpuid_leaf()
  x86/cpuid: Cleanup cpuid_regs definitions
  x86/copy_user: Unify the code by removing the 64-bit asm _copy_*_user() variants
  x86/unwind: Ensure stack grows down
  x86/vdso: Set vDSO pointer only after success
  x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE
  x86/unwind: Detect bad stack return address
  x86/dumpstack: Warn on stack recursion
  x86/unwind: Warn on bad frame pointer
  x86/decoder: Use stderr if insn sanity test fails
  x86/decoder: Use stdout if insn decoder test is successful
  mm/page_alloc: Remove kernel address exposure in free_reserved_area()
  x86/dumpstack: Remove raw stack dump
  ...
parents 4ade5b22 53938ee4
...@@ -1963,9 +1963,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -1963,9 +1963,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
kmemcheck=2 (one-shot mode) kmemcheck=2 (one-shot mode)
Default: 2 (one-shot mode) Default: 2 (one-shot mode)
kstack=N [X86] Print N words from the kernel stack
in oops dumps.
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP) Default is 0 (don't ignore, but inject #GP)
......
...@@ -40,7 +40,6 @@ show up in /proc/sys/kernel: ...@@ -40,7 +40,6 @@ show up in /proc/sys/kernel:
- hung_task_warnings - hung_task_warnings
- kexec_load_disabled - kexec_load_disabled
- kptr_restrict - kptr_restrict
- kstack_depth_to_print [ X86 only ]
- l2cr [ PPC only ] - l2cr [ PPC only ]
- modprobe ==> Documentation/debugging-modules.txt - modprobe ==> Documentation/debugging-modules.txt
- modules_disabled - modules_disabled
...@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using ...@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using
============================================================== ==============================================================
kstack_depth_to_print: (X86 only)
Controls the number of words to print when dumping the raw
kernel stack.
==============================================================
l2cr: (PPC only) l2cr: (PPC only)
This flag controls the L2 cache of G3 processor boards. If This flag controls the L2 cache of G3 processor boards. If
......
...@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit) ...@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit)
space might stop working. Use this option if you have devices that space might stop working. Use this option if you have devices that
are accessed from userspace directly on some PCI host bridge. are accessed from userspace directly on some PCI host bridge.
Debugging
kstack=N Print N words from the kernel stack in oops dumps.
Miscellaneous Miscellaneous
nogbpages nogbpages
......
...@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8 #define SIZEOF_PTREGS 21*8
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0 .macro ALLOC_PT_GPREGS_ON_STACK
addq $-(15*8+\addskip), %rsp addq $-(15*8), %rsp
.endm .endm
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
...@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with
movq 5*8+\offset(%rsp), %rbx movq 5*8+\offset(%rsp), %rbx
.endm .endm
.macro ZERO_EXTRA_REGS
xorl %r15d, %r15d
xorl %r14d, %r14d
xorl %r13d, %r13d
xorl %r12d, %r12d
xorl %ebp, %ebp
xorl %ebx, %ebx
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11 .if \rstor_r11
movq 6*8(%rsp), %r11 movq 6*8(%rsp), %r11
...@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with ...@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1 .byte 0xf1
.endm .endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
.if \ptregs_offset
leaq \ptregs_offset(%rsp), %rbp
.else
mov %rsp, %rbp
.endif
orq $0x1, %rbp
#endif
.endm
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
/* /*
......
This diff is collapsed.
...@@ -38,12 +38,6 @@ ...@@ -38,12 +38,6 @@
#include <asm/export.h> #include <asm/export.h>
#include <linux/err.h> #include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_64BIT 0x80000000
#define __AUDIT_ARCH_LE 0x40000000
.code64 .code64
.section .entry.text, "ax" .section .entry.text, "ax"
...@@ -469,6 +463,7 @@ END(irq_entries_start) ...@@ -469,6 +463,7 @@ END(irq_entries_start)
ALLOC_PT_GPREGS_ON_STACK ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS SAVE_C_REGS
SAVE_EXTRA_REGS SAVE_EXTRA_REGS
ENCODE_FRAME_POINTER
testb $3, CS(%rsp) testb $3, CS(%rsp)
jz 1f jz 1f
...@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback) ...@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
ALLOC_PT_GPREGS_ON_STACK ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS SAVE_C_REGS
SAVE_EXTRA_REGS SAVE_EXTRA_REGS
ENCODE_FRAME_POINTER
jmp error_exit jmp error_exit
END(xen_failsafe_callback) END(xen_failsafe_callback)
...@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry) ...@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
cld cld
SAVE_C_REGS 8 SAVE_C_REGS 8
SAVE_EXTRA_REGS 8 SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
movl $1, %ebx movl $1, %ebx
movl $MSR_GS_BASE, %ecx movl $MSR_GS_BASE, %ecx
rdmsr rdmsr
...@@ -1075,6 +1072,7 @@ ENTRY(error_entry) ...@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
cld cld
SAVE_C_REGS 8 SAVE_C_REGS 8
SAVE_EXTRA_REGS 8 SAVE_EXTRA_REGS 8
ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx xorl %ebx, %ebx
testb $3, CS+8(%rsp) testb $3, CS+8(%rsp)
jz .Lerror_kernelspace jz .Lerror_kernelspace
...@@ -1257,6 +1255,7 @@ ENTRY(nmi) ...@@ -1257,6 +1255,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */ pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */ pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */ pushq %r15 /* pt_regs->r15 */
ENCODE_FRAME_POINTER
/* /*
* At this point we no longer need to worry about stack damage * At this point we no longer need to worry about stack damage
...@@ -1270,11 +1269,10 @@ ENTRY(nmi) ...@@ -1270,11 +1269,10 @@ ENTRY(nmi)
/* /*
* Return back to user mode. We must *not* do the normal exit * Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts. Fortunately, * work, because we don't want to enable interrupts.
* do_nmi doesn't modify pt_regs.
*/ */
SWAPGS SWAPGS
jmp restore_c_regs_and_iret jmp restore_regs_and_iret
.Lnmi_from_kernel: .Lnmi_from_kernel:
/* /*
......
...@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) ...@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
} }
text_start = addr - image->sym_vvar_start; text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
/* /*
* MAYWRITE to allow gdb to COW and set breakpoints * MAYWRITE to allow gdb to COW and set breakpoints
...@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) ...@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = PTR_ERR(vma); ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size); do_munmap(mm, text_start, image->size);
} else {
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
} }
up_fail: up_fail:
if (ret) {
current->mm->context.vdso = NULL;
current->mm->context.vdso_image = NULL;
}
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
return ret; return ret;
} }
......
...@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx); ...@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
static struct pt_pmu pt_pmu; static struct pt_pmu pt_pmu;
enum cpuid_regs {
CR_EAX = 0,
CR_ECX,
CR_EDX,
CR_EBX
};
/* /*
* Capabilities of Intel PT hardware, such as number of address bits or * Capabilities of Intel PT hardware, such as number of address bits or
* supported output schemes, are cached and exported to userspace as "caps" * supported output schemes, are cached and exported to userspace as "caps"
...@@ -64,21 +57,21 @@ static struct pt_cap_desc { ...@@ -64,21 +57,21 @@ static struct pt_cap_desc {
u8 reg; u8 reg;
u32 mask; u32 mask;
} pt_caps[] = { } pt_caps[] = {
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)), PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)),
PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)), PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)), PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CR_EBX, BIT(4)), PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)), PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)), PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)), PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
PT_CAP(num_address_ranges, 1, CR_EAX, 0x3), PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
}; };
static u32 pt_cap_get(enum pt_capabilities cap) static u32 pt_cap_get(enum pt_capabilities cap)
...@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void) ...@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
for (i = 0; i < PT_CPUID_LEAVES; i++) { for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i, cpuid_count(20, i,
&pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM], &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM], &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM], &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]); &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
} }
ret = -ENOMEM; ret = -ENOMEM;
......
...@@ -227,6 +227,7 @@ ...@@ -227,6 +227,7 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
...@@ -280,8 +281,10 @@ ...@@ -280,8 +281,10 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
......
...@@ -21,7 +21,6 @@ enum die_val { ...@@ -21,7 +21,6 @@ enum die_val {
DIE_NMIUNKNOWN, DIE_NMIUNKNOWN,
}; };
extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long); extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long); extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs); extern void show_stack_regs(struct pt_regs *regs);
......
...@@ -137,6 +137,17 @@ struct cpuinfo_x86 { ...@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
u32 microcode; u32 microcode;
}; };
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
enum cpuid_regs_idx {
CPUID_EAX = 0,
CPUID_EBX,
CPUID_ECX,
CPUID_EDX,
};
#define X86_VENDOR_INTEL 0 #define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1 #define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2 #define X86_VENDOR_AMD 2
...@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *); ...@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern u32 get_scattered_cpuid_leaf(unsigned int level,
unsigned int sub_leaf,
enum cpuid_regs_idx reg);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
......
...@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, ...@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
int get_stack_info(unsigned long *stack, struct task_struct *task, int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask); struct stack_info *info, unsigned long *visit_mask);
void stack_type_str(enum stack_type type, const char **begin, const char *stack_type_name(enum stack_type type);
const char **end);
static inline bool on_stack(struct stack_info *info, void *addr, size_t len) static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
{ {
...@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len) ...@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
addr + len > begin && addr + len <= end); addr + len > begin && addr + len <= end);
} }
extern int kstack_depth_to_print;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8 #define STACKSLOTS_PER_LINE 8
#else #else
...@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) ...@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl); unsigned long *stack, char *log_lvl);
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl);
extern unsigned int code_bytes; extern unsigned int code_bytes;
/* The form of the top of the frame on the stack */ /* The form of the top of the frame on the stack */
......
...@@ -13,6 +13,7 @@ struct unwind_state { ...@@ -13,6 +13,7 @@ struct unwind_state {
int graph_idx; int graph_idx;
#ifdef CONFIG_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER
unsigned long *bp; unsigned long *bp;
struct pt_regs *regs;
#else #else
unsigned long *sp; unsigned long *sp;
#endif #endif
...@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) ...@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
if (unwind_done(state)) if (unwind_done(state))
return NULL; return NULL;
return state->bp + 1; return state->regs ? &state->regs->ip : state->bp + 1;
}
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
return NULL;
return state->regs;
} }
#else /* !CONFIG_FRAME_POINTER */ #else /* !CONFIG_FRAME_POINTER */
...@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) ...@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL; return NULL;
} }
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
return NULL;
}
#endif /* CONFIG_FRAME_POINTER */ #endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_UNWIND_H */ #endif /* _ASM_X86_UNWIND_H */
...@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void) ...@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
* works on all CPUs. This is volatile so that it orders * works on all CPUs. This is volatile so that it orders
* correctly wrt barrier() and to keep gcc from cleverly * correctly wrt barrier() and to keep gcc from cleverly
* hoisting it out of the calling function. * hoisting it out of the calling function.
*
* If RDPID is available, use it.
*/ */
asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); alternative_io ("lsl %[p],%[seg]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
return p; return p;
} }
......
...@@ -6,10 +6,8 @@ ...@@ -6,10 +6,8 @@
#define ARCH_GET_FS 0x1003 #define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004 #define ARCH_GET_GS 0x1004
#ifdef CONFIG_CHECKPOINT_RESTORE #define ARCH_MAP_VDSO_X32 0x2001
# define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002
# define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003
# define ARCH_MAP_VDSO_64 0x2003
#endif
#endif /* _ASM_X86_PRCTL_H */ #endif /* _ASM_X86_PRCTL_H */
...@@ -17,11 +17,17 @@ struct cpuid_bit { ...@@ -17,11 +17,17 @@ struct cpuid_bit {
u32 sub_leaf; u32 sub_leaf;
}; };
enum cpuid_regs { /* Please keep the leaf sorted by cpuid_bit.level for faster search. */
CR_EAX = 0, static const struct cpuid_bit cpuid_bits[] = {
CR_ECX, { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
CR_EDX, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
CR_EBX { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ 0, 0, 0, 0, 0 }
}; };
void init_scattered_cpuid_features(struct cpuinfo_x86 *c) void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
...@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) ...@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
u32 regs[4]; u32 regs[4];
const struct cpuid_bit *cb; const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
{ 0, 0, 0, 0, 0 }
};
for (cb = cpuid_bits; cb->feature; cb++) { for (cb = cpuid_bits; cb->feature; cb++) {
/* Verify that the level is valid */ /* Verify that the level is valid */
...@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) ...@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
max_level > (cb->level | 0xffff)) max_level > (cb->level | 0xffff))
continue; continue;
cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX], cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
&regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]); &regs[CPUID_EBX], &regs[CPUID_ECX],
&regs[CPUID_EDX]);
if (regs[cb->reg] & (1 << cb->bit)) if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature); set_cpu_cap(c, cb->feature);
} }
} }
u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
enum cpuid_regs_idx reg)
{
const struct cpuid_bit *cb;
u32 cpuid_val = 0;
for (cb = cpuid_bits; cb->feature; cb++) {
if (level > cb->level)
continue;
if (level < cb->level)
break;
if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
if (cpu_has(&boot_cpu_data, cb->feature))
cpuid_val |= BIT(cb->bit);
}
}
return cpuid_val;
}
EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
...@@ -46,10 +46,6 @@ ...@@ -46,10 +46,6 @@
static struct class *cpuid_class; static struct class *cpuid_class;
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
static void cpuid_smp_cpuid(void *cmd_block) static void cpuid_smp_cpuid(void *cmd_block)
{ {
struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
int panic_on_unrecovered_nmi; int panic_on_unrecovered_nmi;
int panic_on_io_nmi; int panic_on_io_nmi;
unsigned int code_bytes = 64; unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter; static int die_counter;
bool in_task_stack(unsigned long *stack, struct task_struct *task, bool in_task_stack(unsigned long *stack, struct task_struct *task,
...@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable, ...@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl) char *log_lvl)
{ {
touch_nmi_watchdog(); touch_nmi_watchdog();
printk("%s [<%p>] %s%pB\n", printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
log_lvl, (void *)address, reliable ? "" : "? ",
(void *)address);
}
void printk_address(unsigned long address)
{
pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
} }
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
...@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl); printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack); unwind_start(&state, task, regs, stack);
stack = stack ? : get_stack_pointer(task, regs);
/* /*
* Iterate through the stacks, starting with the current stack pointer. * Iterate through the stacks, starting with the current stack pointer.
...@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - softirq stack * - softirq stack
* - hardirq stack * - hardirq stack
*/ */
for (; stack; stack = stack_info.next_sp) { for (regs = NULL; stack; stack = stack_info.next_sp) {
const char *str_begin, *str_end; const char *stack_name;
/* /*
* If we overflowed the task stack into a guard page, jump back * If we overflowed the task stack into a guard page, jump back
...@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (get_stack_info(stack, task, &stack_info, &visit_mask)) if (get_stack_info(stack, task, &stack_info, &visit_mask))
break; break;
stack_type_str(stack_info.type, &str_begin, &str_end); stack_name = stack_type_name(stack_info.type);
if (str_begin) if (stack_name)
printk("%s <%s> ", log_lvl, str_begin); printk("%s <%s>\n", log_lvl, stack_name);
/* /*
* Scan the stack, printing any text addresses we find. At the * Scan the stack, printing any text addresses we find. At the
...@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (!__kernel_text_address(addr)) if (!__kernel_text_address(addr))
continue; continue;
/*
* Don't print regs->ip again if it was already printed
* by __show_regs() below.
*/
if (regs && stack == &regs->ip) {
unwind_next_frame(&state);
continue;
}
if (stack == ret_addr_p) if (stack == ret_addr_p)
reliable = 1; reliable = 1;
...@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ...@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* of the addresses will just be printed as unreliable. * of the addresses will just be printed as unreliable.
*/ */
unwind_next_frame(&state); unwind_next_frame(&state);
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
if (regs)
__show_regs(regs, 0);
} }
if (str_end) if (stack_name)
printk("%s <%s> ", log_lvl, str_end); printk("%s </%s>\n", log_lvl, stack_name);
} }
} }
...@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp) ...@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
if (!sp && task == current) if (!sp && task == current)
sp = get_stack_pointer(current, NULL); sp = get_stack_pointer(current, NULL);
show_stack_log_lvl(task, NULL, sp, ""); show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
} }
void show_stack_regs(struct pt_regs *regs) void show_stack_regs(struct pt_regs *regs)
{ {
show_stack_log_lvl(current, regs, NULL, ""); show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
} }
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
...@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err) ...@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
sp = kernel_stack_pointer(regs); sp = kernel_stack_pointer(regs);
savesegment(ss, ss); savesegment(ss, ss);
} }
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
print_symbol("%s", regs->ip); (void *)regs->ip, ss, sp);
printk(" SS:ESP %04x:%08lx\n", ss, sp);
#else #else
/* Executive summary in case the oops scrolled away */ /* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP "); printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
printk_address(regs->ip);
printk(" RSP <%016lx>\n", regs->sp);
#endif #endif
return 0; return 0;
} }
...@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err) ...@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig); oops_end(flags, regs, sig);
} }
static int __init kstack_setup(char *s)
{
ssize_t ret;
unsigned long val;
if (!s)
return -EINVAL;
ret = kstrtoul(s, 0, &val);
if (ret)
return ret;
kstack_depth_to_print = val;
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s) static int __init code_bytes_setup(char *s)
{ {
ssize_t ret; ssize_t ret;
......
...@@ -16,18 +16,15 @@ ...@@ -16,18 +16,15 @@
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
void stack_type_str(enum stack_type type, const char **begin, const char **end) const char *stack_type_name(enum stack_type type)
{ {
switch (type) { if (type == STACK_TYPE_IRQ)
case STACK_TYPE_IRQ: return "IRQ";
case STACK_TYPE_SOFTIRQ:
*begin = "IRQ"; if (type == STACK_TYPE_SOFTIRQ)
*end = "EOI"; return "SOFTIRQ";
break;
default: return NULL;
*begin = NULL;
*end = NULL;
}
} }
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
...@@ -109,8 +106,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, ...@@ -109,8 +106,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
* just break out and report an unknown stack type. * just break out and report an unknown stack type.
*/ */
if (visit_mask) { if (visit_mask) {
if (*visit_mask & (1UL << info->type)) if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown; goto unknown;
}
*visit_mask |= 1UL << info->type; *visit_mask |= 1UL << info->type;
} }
...@@ -121,36 +120,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, ...@@ -121,36 +120,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
return -EINVAL; return -EINVAL;
} }
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *stack;
int i;
if (!try_get_task_stack(task))
return;
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
break;
if ((i % STACKSLOTS_PER_LINE) == 0) {
if (i != 0)
pr_cont("\n");
printk("%s %08lx", log_lvl, *stack++);
} else
pr_cont(" %08lx", *stack++);
touch_nmi_watchdog();
}
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
{ {
int i; int i;
...@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs) ...@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c; unsigned char c;
u8 *ip; u8 *ip;
pr_emerg("Stack:\n"); show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
pr_emerg("Code:"); pr_emerg("Code:");
......
...@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { ...@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
[DEBUG_STACK - 1] = DEBUG_STKSZ [DEBUG_STACK - 1] = DEBUG_STKSZ
}; };
void stack_type_str(enum stack_type type, const char **begin, const char **end) const char *stack_type_name(enum stack_type type)
{ {
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
switch (type) { if (type == STACK_TYPE_IRQ)
case STACK_TYPE_IRQ: return "IRQ";
*begin = "IRQ";
*end = "EOI"; if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
break; return exception_stack_names[type - STACK_TYPE_EXCEPTION];
case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION]; return NULL;
*end = "EOE";
break;
default:
*begin = NULL;
*end = NULL;
}
} }
static bool in_exception_stack(unsigned long *stack, struct stack_info *info) static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
...@@ -128,8 +122,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, ...@@ -128,8 +122,10 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
* just break out and report an unknown stack type. * just break out and report an unknown stack type.
*/ */
if (visit_mask) { if (visit_mask) {
if (*visit_mask & (1UL << info->type)) if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown; goto unknown;
}
*visit_mask |= 1UL << info->type; *visit_mask |= 1UL << info->type;
} }
...@@ -140,56 +136,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, ...@@ -140,56 +136,6 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
return -EINVAL; return -EINVAL;
} }
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
unsigned long *stack;
int i;
if (!try_get_task_stack(task))
return;
irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
unsigned long word;
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
pr_cont(" <EOI> ");
}
} else {
if (kstack_end(stack))
break;
}
if (probe_kernel_address(stack, word))
break;
if ((i % STACKSLOTS_PER_LINE) == 0) {
if (i != 0)
pr_cont("\n");
printk("%s %016lx", log_lvl, word);
} else
pr_cont(" %016lx", word);
stack++;
touch_nmi_watchdog();
}
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
{ {
int i; int i;
...@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs) ...@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c; unsigned char c;
u8 *ip; u8 *ip;
printk(KERN_DEFAULT "Stack:\n"); show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
printk(KERN_DEFAULT "Code: "); printk(KERN_DEFAULT "Code: ");
......
...@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void) ...@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2); setup_clear_cpu_cap(X86_FEATURE_AVX2);
setup_clear_cpu_cap(X86_FEATURE_AVX512F); setup_clear_cpu_cap(X86_FEATURE_AVX512F);
setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD); setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
...@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void) ...@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1); setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
setup_clear_cpu_cap(X86_FEATURE_PKU); setup_clear_cpu_cap(X86_FEATURE_PKU);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
......
...@@ -63,6 +63,8 @@ ...@@ -63,6 +63,8 @@
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif #endif
#define SIZEOF_PTREGS 17*4
/* /*
* Number of possible pages in the lowmem region. * Number of possible pages in the lowmem region.
* *
...@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20); ...@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
/* This is can only trip for a broken bootloader... */ /* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version) cmpw $0x207, pa(boot_params + BP_version)
jb default_entry jb .Ldefault_entry
/* Paravirt-compatible boot parameters. Look to see what architecture /* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */ we're booting under. */
movl pa(boot_params + BP_hardware_subarch), %eax movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax cmpl $num_subarch_entries, %eax
jae bad_subarch jae .Lbad_subarch
movl pa(subarch_entries)(,%eax,4), %eax movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax subl $__PAGE_OFFSET, %eax
jmp *%eax jmp *%eax
bad_subarch: .Lbad_subarch:
WEAK(lguest_entry) WEAK(lguest_entry)
WEAK(xen_entry) WEAK(xen_entry)
/* Unknown implementation; there's really /* Unknown implementation; there's really
...@@ -270,14 +272,14 @@ WEAK(xen_entry) ...@@ -270,14 +272,14 @@ WEAK(xen_entry)
__INITDATA __INITDATA
subarch_entries: subarch_entries:
.long default_entry /* normal x86/PC */ .long .Ldefault_entry /* normal x86/PC */
.long lguest_entry /* lguest hypervisor */ .long lguest_entry /* lguest hypervisor */
.long xen_entry /* Xen hypervisor */ .long xen_entry /* Xen hypervisor */
.long default_entry /* Moorestown MID */ .long .Ldefault_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4 num_subarch_entries = (. - subarch_entries) / 4
.previous .previous
#else #else
jmp default_entry jmp .Ldefault_entry
#endif /* CONFIG_PARAVIRT */ #endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
...@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4 ...@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
ENTRY(start_cpu0) ENTRY(start_cpu0)
movl initial_stack, %ecx movl initial_stack, %ecx
movl %ecx, %esp movl %ecx, %esp
jmp *(initial_code) call *(initial_code)
1: jmp 1b
ENDPROC(start_cpu0) ENDPROC(start_cpu0)
#endif #endif
...@@ -317,7 +320,7 @@ ENTRY(startup_32_smp) ...@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
call load_ucode_ap call load_ucode_ap
#endif #endif
default_entry: .Ldefault_entry:
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
X86_CR0_PG) X86_CR0_PG)
...@@ -347,7 +350,7 @@ default_entry: ...@@ -347,7 +350,7 @@ default_entry:
pushfl pushfl
popl %eax # get EFLAGS popl %eax # get EFLAGS
testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
jz enable_paging # hw disallowed setting of ID bit jz .Lenable_paging # hw disallowed setting of ID bit
# which means no CPUID and no CR4 # which means no CPUID and no CR4
xorl %eax,%eax xorl %eax,%eax
...@@ -357,13 +360,13 @@ default_entry: ...@@ -357,13 +360,13 @@ default_entry:
movl $1,%eax movl $1,%eax
cpuid cpuid
andl $~1,%edx # Ignore CPUID.FPU andl $~1,%edx # Ignore CPUID.FPU
jz enable_paging # No flags or only CPUID.FPU = no CR4 jz .Lenable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax movl pa(mmu_cr4_features),%eax
movl %eax,%cr4 movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled testb $X86_CR4_PAE, %al # check if PAE is enabled
jz enable_paging jz .Lenable_paging
/* Check if extended functions are implemented */ /* Check if extended functions are implemented */
movl $0x80000000, %eax movl $0x80000000, %eax
...@@ -371,7 +374,7 @@ default_entry: ...@@ -371,7 +374,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */ /* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax cmpl $(0x8000ffff-0x80000001), %eax
ja enable_paging ja .Lenable_paging
/* Clear bogus XD_DISABLE bits */ /* Clear bogus XD_DISABLE bits */
call verify_cpu call verify_cpu
...@@ -380,7 +383,7 @@ default_entry: ...@@ -380,7 +383,7 @@ default_entry:
cpuid cpuid
/* Execute Disable bit supported? */ /* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx btl $(X86_FEATURE_NX & 31), %edx
jnc enable_paging jnc .Lenable_paging
/* Setup EFER (Extended Feature Enable Register) */ /* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx movl $MSR_EFER, %ecx
...@@ -390,7 +393,7 @@ default_entry: ...@@ -390,7 +393,7 @@ default_entry:
/* Make changes effective */ /* Make changes effective */
wrmsr wrmsr
enable_paging: .Lenable_paging:
/* /*
* Enable paging * Enable paging
...@@ -419,7 +422,7 @@ enable_paging: ...@@ -419,7 +422,7 @@ enable_paging:
*/ */
movb $4,X86 # at least 486 movb $4,X86 # at least 486
cmpl $-1,X86_CPUID cmpl $-1,X86_CPUID
je is486 je .Lis486
/* get vendor info */ /* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID xorl %eax,%eax # call CPUID with 0 -> return vendor ID
...@@ -430,7 +433,7 @@ enable_paging: ...@@ -430,7 +433,7 @@ enable_paging:
movl %ecx,X86_VENDOR_ID+8 # last 4 chars movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well? orl %eax,%eax # do we have processor info as well?
je is486 je .Lis486
movl $1,%eax # Use the CPUID instruction to get CPU type movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid cpuid
...@@ -444,7 +447,7 @@ enable_paging: ...@@ -444,7 +447,7 @@ enable_paging:
movb %cl,X86_MASK movb %cl,X86_MASK
movl %edx,X86_CAPABILITY movl %edx,X86_CAPABILITY
is486: .Lis486:
movl $0x50022,%ecx # set AM, WP, NE and MP movl $0x50022,%ecx # set AM, WP, NE and MP
movl %cr0,%eax movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET andl $0x80000011,%eax # Save PG,PE,ET
...@@ -470,8 +473,9 @@ is486: ...@@ -470,8 +473,9 @@ is486:
xorl %eax,%eax # Clear LDT xorl %eax,%eax # Clear LDT
lldt %ax lldt %ax
pushl $0 # fake return address for unwinder call *(initial_code)
jmp *(initial_code) 1: jmp 1b
ENDPROC(startup_32_smp)
#include "verify_cpu.S" #include "verify_cpu.S"
...@@ -709,7 +713,12 @@ ENTRY(initial_page_table) ...@@ -709,7 +713,12 @@ ENTRY(initial_page_table)
.data .data
.balign 4 .balign 4
ENTRY(initial_stack) ENTRY(initial_stack)
.long init_thread_union+THREAD_SIZE /*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
* unwinder reliably detect the end of the stack.
*/
.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
TOP_OF_KERNEL_STACK_PADDING;
__INITRODATA __INITRODATA
int_msg: int_msg:
......
...@@ -66,13 +66,8 @@ startup_64: ...@@ -66,13 +66,8 @@ startup_64:
* tables and then reload them. * tables and then reload them.
*/ */
/* /* Set up the stack for verify_cpu(), similar to initial_stack below */
* Setup stack for verify_cpu(). "-8" because initial_stack is defined leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
* this way, see below. Our best guess is a NULL ptr for stack
* termination heuristics and we don't want to break anything which
* might depend on it (kgdb, ...).
*/
leaq (__end_init_task - 8)(%rip), %rsp
/* Sanitize CPU configuration */ /* Sanitize CPU configuration */
call verify_cpu call verify_cpu
...@@ -117,20 +112,20 @@ startup_64: ...@@ -117,20 +112,20 @@ startup_64:
movq %rdi, %rax movq %rdi, %rax
shrq $PGDIR_SHIFT, %rax shrq $PGDIR_SHIFT, %rax
leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
movq %rdx, 0(%rbx,%rax,8) movq %rdx, 0(%rbx,%rax,8)
movq %rdx, 8(%rbx,%rax,8) movq %rdx, 8(%rbx,%rax,8)
addq $4096, %rdx addq $PAGE_SIZE, %rdx
movq %rdi, %rax movq %rdi, %rax
shrq $PUD_SHIFT, %rax shrq $PUD_SHIFT, %rax
andl $(PTRS_PER_PUD-1), %eax andl $(PTRS_PER_PUD-1), %eax
movq %rdx, 4096(%rbx,%rax,8) movq %rdx, PAGE_SIZE(%rbx,%rax,8)
incl %eax incl %eax
andl $(PTRS_PER_PUD-1), %eax andl $(PTRS_PER_PUD-1), %eax
movq %rdx, 4096(%rbx,%rax,8) movq %rdx, PAGE_SIZE(%rbx,%rax,8)
addq $8192, %rbx addq $PAGE_SIZE * 2, %rbx
movq %rdi, %rax movq %rdi, %rax
shrq $PMD_SHIFT, %rdi shrq $PMD_SHIFT, %rdi
addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
...@@ -270,8 +265,12 @@ ENTRY(secondary_startup_64) ...@@ -270,8 +265,12 @@ ENTRY(secondary_startup_64)
/* rsi is pointer to real mode structure with interesting info. /* rsi is pointer to real mode structure with interesting info.
pass it to C */ pass it to C */
movq %rsi, %rdi movq %rsi, %rdi
jmp start_cpu
ENDPROC(secondary_startup_64)
/* Finally jump to run C code and to be on real kernel address ENTRY(start_cpu)
/*
* Jump to run C code and to be on a real kernel address.
* Since we are running on identity-mapped space we have to jump * Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect * to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this * jump. In addition we need to ensure %cs is set so we make this
...@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64) ...@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64)
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64. * address given in m16:64.
*/ */
movq initial_code(%rip),%rax call 1f # put return address on stack for unwinder
pushq $0 # fake return address to stop unwinder 1: xorq %rbp, %rbp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space pushq %rax # target address in negative space
lretq lretq
ENDPROC(secondary_startup_64) ENDPROC(start_cpu)
#include "verify_cpu.S" #include "verify_cpu.S"
...@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64) ...@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64)
/* /*
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
* up already except stack. We just set up stack here. Then call * up already except stack. We just set up stack here. Then call
* start_secondary(). * start_secondary() via start_cpu().
*/ */
ENTRY(start_cpu0) ENTRY(start_cpu0)
movq initial_stack(%rip),%rsp movq initial_stack(%rip), %rsp
movq initial_code(%rip),%rax jmp start_cpu
pushq $0 # fake return address to stop unwinder
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
ENDPROC(start_cpu0) ENDPROC(start_cpu0)
#endif #endif
...@@ -328,7 +324,11 @@ ENDPROC(start_cpu0) ...@@ -328,7 +324,11 @@ ENDPROC(start_cpu0)
GLOBAL(initial_gs) GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union) .quad INIT_PER_CPU_VAR(irq_stack_union)
GLOBAL(initial_stack) GLOBAL(initial_stack)
.quad init_thread_union+THREAD_SIZE-8 /*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
* unwinder reliably detect the end of the stack.
*/
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA __FINITDATA
bad_address: bad_address:
......
...@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs); savesegment(gs, gs);
} }
printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
(u16)regs->cs, regs->ip, regs->flags, printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
smp_processor_id()); smp_processor_id());
print_symbol("EIP is at %s\n", regs->ip);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx); regs->ax, regs->bx, regs->cx, regs->dx);
......
...@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex; unsigned int fsindex, gsindex;
unsigned int ds, cs, es; unsigned int ds, cs, es;
printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
printk_address(regs->ip); (void *)regs->ip);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
regs->sp, regs->flags); regs->sp, regs->flags);
if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else
pr_cont("\n");
printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx); regs->ax, regs->bx, regs->cx);
printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
......
...@@ -987,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) ...@@ -987,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0; int cpu0_nmi_registered = 0;
unsigned long timeout; unsigned long timeout;
idle->thread.sp = (unsigned long) (((struct pt_regs *) idle->thread.sp = (unsigned long)task_pt_regs(idle);
(THREAD_SIZE + task_stack_page(idle))) - 1);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary; initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp; initial_stack = idle->thread.sp;
......
...@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state) ...@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
if (unwind_done(state)) if (unwind_done(state))
return 0; return 0;
if (state->regs && user_mode(state->regs))
return 0;
addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
addr_p); addr_p);
return __kernel_text_address(addr) ? addr : 0; if (!__kernel_text_address(addr)) {
printk_deferred_once(KERN_WARNING
"WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
(void *)addr, addr_p, state->task->comm,
state->task->pid);
return 0;
}
return addr;
} }
EXPORT_SYMBOL_GPL(unwind_get_return_address); EXPORT_SYMBOL_GPL(unwind_get_return_address);
static size_t regs_size(struct pt_regs *regs)
{
/* x86_32 regs from kernel mode are two words shorter: */
if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
return sizeof(*regs) - 2*sizeof(long);
return sizeof(*regs);
}
static bool is_last_task_frame(struct unwind_state *state)
{
unsigned long bp = (unsigned long)state->bp;
unsigned long regs = (unsigned long)task_pt_regs(state->task);
return bp == regs - FRAME_HEADER_SIZE;
}
/*
* This determines if the frame pointer actually contains an encoded pointer to
* pt_regs on the stack. See ENCODE_FRAME_POINTER.
*/
static struct pt_regs *decode_frame_pointer(unsigned long *bp)
{
unsigned long regs = (unsigned long)bp;
if (!(regs & 0x1))
return NULL;
return (struct pt_regs *)(regs & ~0x1);
}
static bool update_stack_state(struct unwind_state *state, void *addr, static bool update_stack_state(struct unwind_state *state, void *addr,
size_t len) size_t len)
{ {
...@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr, ...@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
bool unwind_next_frame(struct unwind_state *state) bool unwind_next_frame(struct unwind_state *state)
{ {
unsigned long *next_bp; struct pt_regs *regs;
unsigned long *next_bp, *next_frame;
size_t next_len;
enum stack_type prev_type = state->stack_info.type;
if (unwind_done(state)) if (unwind_done(state))
return false; return false;
/* have we reached the end? */
if (state->regs && user_mode(state->regs))
goto the_end;
if (is_last_task_frame(state)) {
regs = task_pt_regs(state->task);
/*
* kthreads (other than the boot CPU's idle thread) have some
* partial regs at the end of their stack which were placed
* there by copy_thread_tls(). But the regs don't have any
* useful information, so we can skip them.
*
* This user_mode() check is slightly broader than a PF_KTHREAD
* check because it also catches the awkward situation where a
* newly forked kthread transitions into a user task by calling
* do_execve(), which eventually clears PF_KTHREAD.
*/
if (!user_mode(regs))
goto the_end;
/*
* We're almost at the end, but not quite: there's still the
* syscall regs frame. Entry code doesn't encode the regs
* pointer for syscalls, so we have to set it manually.
*/
state->regs = regs;
state->bp = NULL;
return true;
}
/* get the next frame pointer */
if (state->regs)
next_bp = (unsigned long *)state->regs->bp;
else
next_bp = (unsigned long *)*state->bp; next_bp = (unsigned long *)*state->bp;
/* is the next frame pointer an encoded pointer to pt_regs? */
regs = decode_frame_pointer(next_bp);
if (regs) {
next_frame = (unsigned long *)regs;
next_len = sizeof(*regs);
} else {
next_frame = next_bp;
next_len = FRAME_HEADER_SIZE;
}
/* make sure the next frame's data is accessible */ /* make sure the next frame's data is accessible */
if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE)) if (!update_stack_state(state, next_frame, next_len)) {
return false; /*
* Don't warn on bad regs->bp. An interrupt in entry code
* might cause a false positive warning.
*/
if (state->regs)
goto the_end;
goto bad_address;
}
/* Make sure it only unwinds up and doesn't overlap the last frame: */
if (state->stack_info.type == prev_type) {
if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
goto bad_address;
if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
goto bad_address;
}
/* move to the next frame */ /* move to the next frame */
if (regs) {
state->regs = regs;
state->bp = NULL;
} else {
state->bp = next_bp; state->bp = next_bp;
state->regs = NULL;
}
return true; return true;
bad_address:
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
state->regs, state->task->comm,
state->task->pid, next_frame);
} else {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
state->bp, state->task->comm,
state->task->pid, next_frame);
}
the_end:
state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
} }
EXPORT_SYMBOL_GPL(unwind_next_frame); EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task, void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame) struct pt_regs *regs, unsigned long *first_frame)
{ {
unsigned long *bp, *frame;
size_t len;
memset(state, 0, sizeof(*state)); memset(state, 0, sizeof(*state));
state->task = task; state->task = task;
...@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, ...@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
} }
/* set up the starting stack frame */ /* set up the starting stack frame */
state->bp = get_frame_pointer(task, regs); bp = get_frame_pointer(task, regs);
regs = decode_frame_pointer(bp);
if (regs) {
state->regs = regs;
frame = (unsigned long *)regs;
len = sizeof(*regs);
} else {
state->bp = bp;
frame = bp;
len = FRAME_HEADER_SIZE;
}
/* initialize stack info and make sure the frame data is accessible */ /* initialize stack info and make sure the frame data is accessible */
get_stack_info(state->bp, state->task, &state->stack_info, get_stack_info(frame, state->task, &state->stack_info,
&state->stack_mask); &state->stack_mask);
update_stack_state(state, state->bp, FRAME_HEADER_SIZE); update_stack_state(state, frame, len);
/* /*
* The caller can provide the address of the first frame directly * The caller can provide the address of the first frame directly
......
...@@ -91,10 +91,10 @@ SECTIONS ...@@ -91,10 +91,10 @@ SECTIONS
/* Text and read-only data */ /* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) { .text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .; _text = .;
_stext = .;
/* bootstrapping code */ /* bootstrapping code */
HEAD_TEXT HEAD_TEXT
. = ALIGN(8); . = ALIGN(8);
_stext = .;
TEXT_TEXT TEXT_TEXT
SCHED_TEXT SCHED_TEXT
CPUIDLE_TEXT CPUIDLE_TEXT
......
...@@ -16,53 +16,6 @@ ...@@ -16,53 +16,6 @@
#include <asm/smap.h> #include <asm/smap.h>
#include <asm/export.h> #include <asm/export.h>
/* Standard copy_to_user with segment limit checking */
ENTRY(_copy_to_user)
mov PER_CPU_VAR(current_task), %rax
movq %rdi,%rcx
addq %rdx,%rcx
jc bad_to_user
cmpq TASK_addr_limit(%rax),%rcx
ja bad_to_user
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
ENDPROC(_copy_to_user)
EXPORT_SYMBOL(_copy_to_user)
/* Standard copy_from_user with segment limit checking */
ENTRY(_copy_from_user)
mov PER_CPU_VAR(current_task), %rax
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq TASK_addr_limit(%rax),%rcx
ja bad_from_user
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
ENDPROC(_copy_from_user)
EXPORT_SYMBOL(_copy_from_user)
.section .fixup,"ax"
/* must zero dest */
ENTRY(bad_from_user)
bad_from_user:
movl %edx,%ecx
xorl %eax,%eax
rep
stosb
bad_to_user:
movl %edx,%eax
ret
ENDPROC(bad_from_user)
.previous
/* /*
* copy_user_generic_unrolled - memory copy with exception handling. * copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro * This version is for CPUs like P4 that don't have efficient micro
......
...@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) ...@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(copy_from_user_nmi); EXPORT_SYMBOL_GPL(copy_from_user_nmi);
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from kernel space to user space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
EXPORT_SYMBOL(_copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
* @to: Destination address, in kernel space.
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from user space to kernel space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
EXPORT_SYMBOL(_copy_from_user);
...@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr ...@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
return n; return n;
} }
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from kernel space to user space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
EXPORT_SYMBOL(_copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
* @to: Destination address, in kernel space.
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Copy data from user space to kernel space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
EXPORT_SYMBOL(_copy_from_user);
...@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, ...@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "paging request"); printk(KERN_CONT "paging request");
printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:"); printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
printk_address(regs->ip);
dump_pagetable(address); dump_pagetable(address);
} }
......
...@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void) ...@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
/* Dump Instruction Pointer info */ /* Dump Instruction Pointer info */
static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs) static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
{ {
pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm); pr_info("UV: %4d %6d %-32.32s %pS",
printk_address(regs->ip); cpu, current->pid, current->comm, (void *)regs->ip);
} }
/* /*
......
...@@ -269,7 +269,8 @@ int main(int argc, char **argv) ...@@ -269,7 +269,8 @@ int main(int argc, char **argv)
insns++; insns++;
} }
fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", fprintf((errors) ? stderr : stdout,
"%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
prog, prog,
(errors) ? "Failure" : "Success", (errors) ? "Failure" : "Success",
insns, insns,
......
...@@ -167,7 +167,7 @@ int main(int argc, char **argv) ...@@ -167,7 +167,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Warning: decoded and checked %d" fprintf(stderr, "Warning: decoded and checked %d"
" instructions with %d warnings\n", insns, warnings); " instructions with %d warnings\n", insns, warnings);
else else
fprintf(stderr, "Succeed: decoded and checked %d" fprintf(stdout, "Success: decoded and checked %d"
" instructions\n", insns); " instructions\n", insns);
return 0; return 0;
} }
...@@ -982,13 +982,6 @@ static struct ctl_table kern_table[] = { ...@@ -982,13 +982,6 @@ static struct ctl_table kern_table[] = {
.mode = 0444, .mode = 0444,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "kstack_depth_to_print",
.data = &kstack_depth_to_print,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ {
.procname = "io_delay_type", .procname = "io_delay_type",
.data = &io_delay_type, .data = &io_delay_type,
......
...@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) ...@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
} }
if (pages && s) if (pages && s)
pr_info("Freeing %s memory: %ldK (%p - %p)\n", pr_info("Freeing %s memory: %ldK\n",
s, pages << (PAGE_SHIFT - 10), start, end); s, pages << (PAGE_SHIFT - 10));
return pages; return pages;
} }
......
...@@ -139,7 +139,8 @@ handle_line() { ...@@ -139,7 +139,8 @@ handle_line() {
while read line; do while read line; do
# Let's see if we have an address in the line # Let's see if we have an address in the line
if [[ $line =~ \[\<([^]]+)\>\] ]]; then if [[ $line =~ \[\<([^]]+)\>\] ]] ||
[[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
# Translate address to line numbers # Translate address to line numbers
handle_line "$line" handle_line "$line"
# Is it a code line? # Is it a code line?
......
...@@ -105,9 +105,18 @@ __faddr2line() { ...@@ -105,9 +105,18 @@ __faddr2line() {
# In rare cases there might be duplicates. # In rare cases there might be duplicates.
while read symbol; do while read symbol; do
local fields=($symbol) local fields=($symbol)
local sym_base=0x${fields[1]} local sym_base=0x${fields[0]}
local sym_size=${fields[2]} local sym_type=${fields[1]}
local sym_type=${fields[3]} local sym_end=0x${fields[3]}
# calculate the size
local sym_size=$(($sym_end - $sym_base))
if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
warn "bad symbol size: base: $sym_base end: $sym_end"
DONE=1
return
fi
sym_size=0x$(printf %x $sym_size)
# calculate the address # calculate the address
local addr=$(($sym_base + $offset)) local addr=$(($sym_base + $offset))
...@@ -116,26 +125,26 @@ __faddr2line() { ...@@ -116,26 +125,26 @@ __faddr2line() {
DONE=1 DONE=1
return return
fi fi
local hexaddr=0x$(printf %x $addr) addr=0x$(printf %x $addr)
# weed out non-function symbols # weed out non-function symbols
if [[ $sym_type != "FUNC" ]]; then if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
[[ $print_warnings = 1 ]] && [[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to non-function symbol" echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
continue continue
fi fi
# if the user provided a size, make sure it matches the symbol's size # if the user provided a size, make sure it matches the symbol's size
if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
[[ $print_warnings = 1 ]] && [[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)" echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
continue; continue;
fi fi
# make sure the provided offset is within the symbol's range # make sure the provided offset is within the symbol's range
if [[ $offset -gt $sym_size ]]; then if [[ $offset -gt $sym_size ]]; then
[[ $print_warnings = 1 ]] && [[ $print_warnings = 1 ]] &&
echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)" echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
continue continue
fi fi
...@@ -143,12 +152,12 @@ __faddr2line() { ...@@ -143,12 +152,12 @@ __faddr2line() {
[[ $FIRST = 0 ]] && echo [[ $FIRST = 0 ]] && echo
FIRST=0 FIRST=0
local hexsize=0x$(printf %x $sym_size) # pass real address to addr2line
echo "$func+$offset/$hexsize:" echo "$func+$offset/$sym_size:"
addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;" addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
DONE=1 DONE=1
done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }')
} }
[[ $# -lt 2 ]] && usage [[ $# -lt 2 ]] && usage
......
...@@ -6,7 +6,7 @@ include ../lib.mk ...@@ -6,7 +6,7 @@ include ../lib.mk
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
check_initial_reg_state sigreturn ldt_gdt iopl \ check_initial_reg_state sigreturn ldt_gdt iopl \
protection_keys protection_keys test_vdso
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer vdso_restorer
......
/*
* ldt_gdt.c - Test cases for LDT and GDT access
* Copyright (c) 2011-2015 Andrew Lutomirski
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <dlfcn.h>
#include <string.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
#ifndef SYS_getcpu
# ifdef __x86_64__
# define SYS_getcpu 309
# else
# define SYS_getcpu 318
# endif
#endif
int nerrs = 0;
#ifdef __x86_64__
# define VSYS(x) (x)
#else
# define VSYS(x) 0
#endif
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
getcpu_t vdso_getcpu;
void fill_function_pointers()
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso)
vdso = dlopen("linux-gate.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
printf("[WARN]\tfailed to find vDSO\n");
return;
}
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
void* cache)
{
return syscall(__NR_getcpu, cpu, node, cache);
}
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
for (int cpu = 0; ; cpu++) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
return;
unsigned cpu_sys, cpu_vdso, cpu_vsys,
node_sys, node_vdso, node_vsys;
long ret_sys, ret_vdso = 1, ret_vsys = 1;
unsigned node;
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
if (vgetcpu)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (!ret_sys)
node = node_sys;
else if (!ret_vdso)
node = node_vdso;
else if (!ret_vsys)
node = node_vsys;
bool ok = true;
if (!ret_sys && (cpu_sys != cpu || node_sys != node))
ok = false;
if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
ok = false;
if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
ok = false;
printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
if (!ret_sys)
printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
if (!ret_vdso)
printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
if (!ret_vsys)
printf(" vsyscall: cpu %u, node %u", cpu_vsys,
node_vsys);
printf("\n");
if (!ok)
nerrs++;
}
}
int main(int argc, char **argv)
{
fill_function_pointers();
test_getcpu();
return nerrs ? 1 : 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment