Commit e90e147c authored by Denys Vlasenko's avatar Denys Vlasenko Committed by Ingo Molnar

x86/asm/entry/64: Fix comments

 - Misleading and slightly incorrect comments in "struct pt_regs" are
   fixed (four instances).

 - Fix incorrect comment atop EMPTY_FRAME macro.

 - Explain in more detail what we do with stack layout during hw interrupt.

 - Correct comments about "partial stack frame" which are no longer
   true.
Signed-off-by: default avatarDenys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-3-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/e1f4429c491fe6ceeddb879dea2786e0f8920f9c.1424989793.git.luto@amacapital.netSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 76f5df43
...@@ -31,13 +31,17 @@ struct pt_regs { ...@@ -31,13 +31,17 @@ struct pt_regs {
#else /* __i386__ */ #else /* __i386__ */
struct pt_regs { struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15; unsigned long r15;
unsigned long r14; unsigned long r14;
unsigned long r13; unsigned long r13;
unsigned long r12; unsigned long r12;
unsigned long bp; unsigned long bp;
unsigned long bx; unsigned long bx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11; unsigned long r11;
unsigned long r10; unsigned long r10;
unsigned long r9; unsigned long r9;
...@@ -47,9 +51,12 @@ struct pt_regs { ...@@ -47,9 +51,12 @@ struct pt_regs {
unsigned long dx; unsigned long dx;
unsigned long si; unsigned long si;
unsigned long di; unsigned long di;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_ax; unsigned long orig_ax;
/* end of arguments */ /* Return frame for iretq */
/* cpu exception frame or undefined */
unsigned long ip; unsigned long ip;
unsigned long cs; unsigned long cs;
unsigned long flags; unsigned long flags;
......
...@@ -25,13 +25,17 @@ ...@@ -25,13 +25,17 @@
#else /* __i386__ */ #else /* __i386__ */
#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
#define R15 0 #define R15 0
#define R14 8 #define R14 8
#define R13 16 #define R13 16
#define R12 24 #define R12 24
#define RBP 32 #define RBP 32
#define RBX 40 #define RBX 40
/* arguments: interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 48 #define R11 48
#define R10 56 #define R10 56
#define R9 64 #define R9 64
...@@ -41,9 +45,12 @@ ...@@ -41,9 +45,12 @@
#define RDX 96 #define RDX 96
#define RSI 104 #define RSI 104
#define RDI 112 #define RDI 112
#define ORIG_RAX 120 /* = ERROR */ /*
/* end of arguments */ * On syscall entry, this is syscall#. On CPU exception, this is error code.
/* cpu exception frame or undefined in case of fast syscall. */ * On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 120
/* Return frame for iretq */
#define RIP 128 #define RIP 128
#define CS 136 #define CS 136
#define EFLAGS 144 #define EFLAGS 144
......
...@@ -41,13 +41,17 @@ struct pt_regs { ...@@ -41,13 +41,17 @@ struct pt_regs {
#ifndef __KERNEL__ #ifndef __KERNEL__
struct pt_regs { struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15; unsigned long r15;
unsigned long r14; unsigned long r14;
unsigned long r13; unsigned long r13;
unsigned long r12; unsigned long r12;
unsigned long rbp; unsigned long rbp;
unsigned long rbx; unsigned long rbx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11; unsigned long r11;
unsigned long r10; unsigned long r10;
unsigned long r9; unsigned long r9;
...@@ -57,9 +61,12 @@ struct pt_regs { ...@@ -57,9 +61,12 @@ struct pt_regs {
unsigned long rdx; unsigned long rdx;
unsigned long rsi; unsigned long rsi;
unsigned long rdi; unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax; unsigned long orig_rax;
/* end of arguments */ /* Return frame for iretq */
/* cpu exception frame or undefined */
unsigned long rip; unsigned long rip;
unsigned long cs; unsigned long cs;
unsigned long eflags; unsigned long eflags;
......
...@@ -14,9 +14,6 @@ ...@@ -14,9 +14,6 @@
* NOTE: This code handles signal-recognition, which happens every time * NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call. * after an interrupt and after each system call.
* *
* Normal syscalls and interrupts don't save a full stack frame, this is
* only done for syscall tracing, signals or fork/exec et.al.
*
* A note on terminology: * A note on terminology:
* - top of stack: Architecture defined interrupt frame from SS to RIP * - top of stack: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack. * at the top of the kernel process stack.
...@@ -151,7 +148,7 @@ ENDPROC(native_usergs_sysret64) ...@@ -151,7 +148,7 @@ ENDPROC(native_usergs_sysret64)
.endm .endm
/* /*
* initial frame state for interrupts (and exceptions without error code) * empty frame
*/ */
.macro EMPTY_FRAME start=1 offset=0 .macro EMPTY_FRAME start=1 offset=0
.if \start .if \start
...@@ -379,7 +376,7 @@ tracesys_phase2: ...@@ -379,7 +376,7 @@ tracesys_phase2:
call syscall_trace_enter_phase2 call syscall_trace_enter_phase2
/* /*
* Reload arg registers from stack in case ptrace changed them. * Reload registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_entry_phase2() returned * We don't reload %rax because syscall_trace_entry_phase2() returned
* the value it wants us to use in the table lookup. * the value it wants us to use in the table lookup.
*/ */
...@@ -629,6 +626,13 @@ END(interrupt) ...@@ -629,6 +626,13 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */ /* 0(%rsp): ~(interrupt number) */
.macro interrupt func .macro interrupt func
cld cld
/*
* Since nothing in interrupt handling code touches r12...r15 members
* of "struct pt_regs", and since interrupts can nest, we can save
* four stack slots and simultaneously provide
* an unwind-friendly stack layout by saving "truncated" pt_regs
* exactly up to rbp slot, without these members.
*/
ALLOC_PT_GPREGS_ON_STACK -RBP ALLOC_PT_GPREGS_ON_STACK -RBP
SAVE_C_REGS -RBP SAVE_C_REGS -RBP
/* this goes to 0(%rsp) for unwinder, not for saving the value: */ /* this goes to 0(%rsp) for unwinder, not for saving the value: */
...@@ -641,6 +645,7 @@ END(interrupt) ...@@ -641,6 +645,7 @@ END(interrupt)
SWAPGS SWAPGS
1: 1:
/* /*
* Save previous stack pointer, optionally switch to interrupt stack.
* irq_count is used to check if a CPU is already on an interrupt stack * irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is * or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of * a little cheaper to use a separate counter in the PDA (short of
...@@ -681,6 +686,7 @@ ret_from_intr: ...@@ -681,6 +686,7 @@ ret_from_intr:
/* Restore saved previous stack */ /* Restore saved previous stack */
popq %rsi popq %rsi
CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
/* return code expects complete pt_regs - adjust rsp accordingly: */
leaq ARGOFFSET-RBP(%rsi), %rsp leaq ARGOFFSET-RBP(%rsi), %rsp
CFI_DEF_CFA_REGISTER rsp CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
...@@ -692,7 +698,7 @@ exit_intr: ...@@ -692,7 +698,7 @@ exit_intr:
/* Interrupt came from user space */ /* Interrupt came from user space */
/* /*
* Has a correct top of stack, but a partial stack frame * Has a correct top of stack.
* %rcx: thread info. Interrupts off. * %rcx: thread info. Interrupts off.
*/ */
retint_with_reschedule: retint_with_reschedule:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment