Commit b556b35e authored by Jan Beulich's avatar Jan Beulich Committed by Linus Torvalds

[PATCH] x86_64: Move int 3 handler to debug stack and allow to increase it.

This
- switches the INT3 handler to run on an IST stack (to cope with
  breakpoints set by a kernel debugger on places where the kernel's
  %gs base hasn't been set up, yet); the IST stack used is shared with
  the INT1 handler's
[AK: this also allows setting a kprobe on the interrupt/exception entry
points]
- allows nesting of INT1/INT3 handlers so that one can, with a kernel
  debugger, debug (at least) the user-mode portions of the INT1/INT3
  handling; the nesting isn't actively enabled here since a kernel-
  debugger-free kernel doesn't need it
Signed-Off-By: default avatarJan Beulich <jbeulich@novell.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ed8388a5
...@@ -64,5 +64,9 @@ int main(void) ...@@ -64,5 +64,9 @@ int main(void)
DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
DEFINE(pbe_next, offsetof(struct pbe, next)); DEFINE(pbe_next, offsetof(struct pbe, next));
BLANK();
#if DEBUG_STKSZ > EXCEPTION_STKSZ
DEFINE(DEBUG_IST, DEBUG_STACK);
#endif
return 0; return 0;
} }
...@@ -673,7 +673,10 @@ ENTRY(spurious_interrupt) ...@@ -673,7 +673,10 @@ ENTRY(spurious_interrupt)
/* error code is on the stack already */ /* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */ /* handle NMI like exceptions that can happen everywhere */
.macro paranoidentry sym #ifndef DEBUG_IST
# define DEBUG_IST 0
#endif
.macro paranoidentry sym, ist=0
SAVE_ALL SAVE_ALL
cld cld
movl $1,%ebx movl $1,%ebx
...@@ -683,10 +686,20 @@ ENTRY(spurious_interrupt) ...@@ -683,10 +686,20 @@ ENTRY(spurious_interrupt)
js 1f js 1f
swapgs swapgs
xorl %ebx,%ebx xorl %ebx,%ebx
1: movq %rsp,%rdi 1:
.if \ist
movq %gs:pda_data_offset, %rbp
.endif
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp) movq $-1,ORIG_RAX(%rsp)
.if \ist
subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
call \sym call \sym
.if \ist
addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
cli cli
.endm .endm
...@@ -904,7 +917,7 @@ KPROBE_ENTRY(debug) ...@@ -904,7 +917,7 @@ KPROBE_ENTRY(debug)
INTR_FRAME INTR_FRAME
pushq $0 pushq $0
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug paranoidentry do_debug, DEBUG_IST
jmp paranoid_exit jmp paranoid_exit
CFI_ENDPROC CFI_ENDPROC
.previous .text .previous .text
...@@ -959,7 +972,12 @@ paranoid_schedule: ...@@ -959,7 +972,12 @@ paranoid_schedule:
CFI_ENDPROC CFI_ENDPROC
KPROBE_ENTRY(int3) KPROBE_ENTRY(int3)
zeroentry do_int3 INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_int3, DEBUG_IST
jmp paranoid_exit
CFI_ENDPROC
.previous .text .previous .text
ENTRY(overflow) ENTRY(overflow)
......
...@@ -145,7 +145,7 @@ void pda_init(int cpu) ...@@ -145,7 +145,7 @@ void pda_init(int cpu)
pda->irqstackptr += IRQSTACKSIZE-64; pda->irqstackptr += IRQSTACKSIZE-64;
} }
char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ]
__attribute__((section(".bss.page_aligned"))); __attribute__((section(".bss.page_aligned")));
/* May not be marked __init: used by software suspend */ /* May not be marked __init: used by software suspend */
...@@ -236,13 +236,27 @@ void __cpuinit cpu_init (void) ...@@ -236,13 +236,27 @@ void __cpuinit cpu_init (void)
*/ */
for (v = 0; v < N_EXCEPTION_STACKS; v++) { for (v = 0; v < N_EXCEPTION_STACKS; v++) {
if (cpu) { if (cpu) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, static const unsigned int order[N_EXCEPTION_STACKS] = {
EXCEPTION_STACK_ORDER); [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
};
estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
if (!estacks) if (!estacks)
panic("Cannot allocate exception stack %ld %d\n", panic("Cannot allocate exception stack %ld %d\n",
v, cpu); v, cpu);
} }
switch (v + 1) {
#if DEBUG_STKSZ > EXCEPTION_STKSZ
case DEBUG_STACK:
cpu_pda[cpu].debugstack = (unsigned long)estacks;
estacks += DEBUG_STKSZ;
break;
#endif
default:
estacks += EXCEPTION_STKSZ; estacks += EXCEPTION_STKSZ;
break;
}
t->ist[v] = (unsigned long)estacks; t->ist[v] = (unsigned long)estacks;
} }
......
...@@ -121,19 +121,31 @@ int printk_address(unsigned long address) ...@@ -121,19 +121,31 @@ int printk_address(unsigned long address)
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, const char **idp) unsigned *usedp, const char **idp)
{ {
static const char ids[N_EXCEPTION_STACKS][8] = { static char ids[][8] = {
[DEBUG_STACK - 1] = "#DB", [DEBUG_STACK - 1] = "#DB",
[NMI_STACK - 1] = "NMI", [NMI_STACK - 1] = "NMI",
[DOUBLEFAULT_STACK - 1] = "#DF", [DOUBLEFAULT_STACK - 1] = "#DF",
[STACKFAULT_STACK - 1] = "#SS", [STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC", [MCE_STACK - 1] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
}; };
unsigned k; unsigned k;
for (k = 0; k < N_EXCEPTION_STACKS; k++) { for (k = 0; k < N_EXCEPTION_STACKS; k++) {
unsigned long end; unsigned long end;
switch (k + 1) {
#if DEBUG_STKSZ > EXCEPTION_STKSZ
case DEBUG_STACK:
end = cpu_pda[cpu].debugstack + DEBUG_STKSZ;
break;
#endif
default:
end = per_cpu(init_tss, cpu).ist[k]; end = per_cpu(init_tss, cpu).ist[k];
break;
}
if (stack >= end) if (stack >= end)
continue; continue;
if (stack >= end - EXCEPTION_STKSZ) { if (stack >= end - EXCEPTION_STKSZ) {
...@@ -143,6 +155,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, ...@@ -143,6 +155,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
*idp = ids[k]; *idp = ids[k];
return (unsigned long *)end; return (unsigned long *)end;
} }
#if DEBUG_STKSZ > EXCEPTION_STKSZ
if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
unsigned j = N_EXCEPTION_STACKS - 1;
do {
++j;
end -= EXCEPTION_STKSZ;
ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
} while (stack < end - EXCEPTION_STKSZ);
if (*usedp & (1U << j))
break;
*usedp |= 1U << j;
*idp = ids[j];
return (unsigned long *)end;
}
#endif
} }
return NULL; return NULL;
} }
...@@ -613,6 +641,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) ...@@ -613,6 +641,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
io_check_error(reason, regs); io_check_error(reason, regs);
} }
/* runs on IST stack. */
asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
{ {
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
...@@ -894,7 +923,7 @@ void __init trap_init(void) ...@@ -894,7 +923,7 @@ void __init trap_init(void)
set_intr_gate(0,&divide_error); set_intr_gate(0,&divide_error);
set_intr_gate_ist(1,&debug,DEBUG_STACK); set_intr_gate_ist(1,&debug,DEBUG_STACK);
set_intr_gate_ist(2,&nmi,NMI_STACK); set_intr_gate_ist(2,&nmi,NMI_STACK);
set_system_gate(3,&int3); set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
set_system_gate(4,&overflow); /* int4 can be called from all */ set_system_gate(4,&overflow); /* int4 can be called from all */
set_intr_gate(5,&bounds); set_intr_gate(5,&bounds);
set_intr_gate(6,&invalid_op); set_intr_gate(6,&invalid_op);
......
...@@ -114,6 +114,11 @@ static inline void set_system_gate(int nr, void *func) ...@@ -114,6 +114,11 @@ static inline void set_system_gate(int nr, void *func)
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
} }
static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
{
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
}
static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type,
unsigned size) unsigned size)
{ {
......
...@@ -14,13 +14,18 @@ ...@@ -14,13 +14,18 @@
#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
#define THREAD_ORDER 1 #define THREAD_ORDER 1
#ifdef __ASSEMBLY__ #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
#define THREAD_SIZE (1 << (PAGE_SHIFT + THREAD_ORDER))
#else
#define THREAD_SIZE (1UL << (PAGE_SHIFT + THREAD_ORDER))
#endif
#define CURRENT_MASK (~(THREAD_SIZE-1)) #define CURRENT_MASK (~(THREAD_SIZE-1))
#define EXCEPTION_STACK_ORDER 0
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define DEBUG_STACK_ORDER EXCEPTION_STACK_ORDER
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
#define IRQSTACK_ORDER 2
#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <asm/page.h>
/* Per processor datastructure. %gs points to it while the kernel runs */ /* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda { struct x8664_pda {
...@@ -12,6 +13,9 @@ struct x8664_pda { ...@@ -12,6 +13,9 @@ struct x8664_pda {
unsigned long data_offset; /* Per cpu data offset from linker address */ unsigned long data_offset; /* Per cpu data offset from linker address */
unsigned long kernelstack; /* top of kernel stack for current */ unsigned long kernelstack; /* top of kernel stack for current */
unsigned long oldrsp; /* user rsp for system call */ unsigned long oldrsp; /* user rsp for system call */
#if DEBUG_STKSZ > EXCEPTION_STKSZ
unsigned long debugstack; /* #DB/#BP stack. */
#endif
int irqcount; /* Irq nesting counter. Starts with -1 */ int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */ int cpunumber; /* Logical CPU number */
char *irqstackptr; /* top of irqstack */ char *irqstackptr; /* top of irqstack */
...@@ -23,10 +27,6 @@ struct x8664_pda { ...@@ -23,10 +27,6 @@ struct x8664_pda {
unsigned apic_timer_irqs; unsigned apic_timer_irqs;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
#define IRQSTACK_ORDER 2
#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
extern struct x8664_pda cpu_pda[]; extern struct x8664_pda cpu_pda[];
/* /*
......
...@@ -273,8 +273,6 @@ struct thread_struct { ...@@ -273,8 +273,6 @@ struct thread_struct {
#define DEBUG_STACK 4 #define DEBUG_STACK 4
#define MCE_STACK 5 #define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define EXCEPTION_STACK_ORDER 0
#define start_thread(regs,new_rip,new_rsp) do { \ #define start_thread(regs,new_rip,new_rsp) do { \
asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment