Commit 63648dd2 authored by Will Deacon's avatar Will Deacon

arm64: entry: use ldp/stp instead of push/pop when saving/restoring regs

The push/pop instructions can be suboptimal when saving/restoring large
amounts of data to/from the stack, for example on entry/exit from the
kernel. This is because:

  (1) They act on descending addresses (i.e. the newly decremented sp),
      which may defeat some hardware prefetchers

  (2) They introduce an implicit dependency between each instruction, as
      the sp has to be updated in order to resolve the address of the
      next access.

This patch removes the push/pop instructions from our kernel entry/exit
macros in favour of ldp/stp plus offset.
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent d54e81f9
...@@ -64,25 +64,26 @@ ...@@ -64,25 +64,26 @@
#define BAD_ERROR 3 #define BAD_ERROR 3
.macro kernel_entry, el, regsize = 64 .macro kernel_entry, el, regsize = 64
sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR sub sp, sp, #S_FRAME_SIZE
.if \regsize == 32 .if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0 mov w0, w0 // zero upper 32 bits of x0
.endif .endif
push x28, x29 stp x0, x1, [sp, #16 * 0]
push x26, x27 stp x2, x3, [sp, #16 * 1]
push x24, x25 stp x4, x5, [sp, #16 * 2]
push x22, x23 stp x6, x7, [sp, #16 * 3]
push x20, x21 stp x8, x9, [sp, #16 * 4]
push x18, x19 stp x10, x11, [sp, #16 * 5]
push x16, x17 stp x12, x13, [sp, #16 * 6]
push x14, x15 stp x14, x15, [sp, #16 * 7]
push x12, x13 stp x16, x17, [sp, #16 * 8]
push x10, x11 stp x18, x19, [sp, #16 * 9]
push x8, x9 stp x20, x21, [sp, #16 * 10]
push x6, x7 stp x22, x23, [sp, #16 * 11]
push x4, x5 stp x24, x25, [sp, #16 * 12]
push x2, x3 stp x26, x27, [sp, #16 * 13]
push x0, x1 stp x28, x29, [sp, #16 * 14]
.if \el == 0 .if \el == 0
mrs x21, sp_el0 mrs x21, sp_el0
get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
...@@ -118,33 +119,31 @@ ...@@ -118,33 +119,31 @@
.if \el == 0 .if \el == 0
ct_user_enter ct_user_enter
ldr x23, [sp, #S_SP] // load return stack pointer ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
.endif .endif
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
.if \ret .if \ret
ldr x1, [sp, #S_X1] // preserve x0 (syscall return) ldr x1, [sp, #S_X1] // preserve x0 (syscall return)
add sp, sp, S_X2
.else .else
pop x0, x1 ldp x0, x1, [sp, #16 * 0]
.endif
pop x2, x3 // load the rest of the registers
pop x4, x5
pop x6, x7
pop x8, x9
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
.if \el == 0
msr sp_el0, x23
.endif .endif
pop x10, x11 ldp x2, x3, [sp, #16 * 1]
pop x12, x13 ldp x4, x5, [sp, #16 * 2]
pop x14, x15 ldp x6, x7, [sp, #16 * 3]
pop x16, x17 ldp x8, x9, [sp, #16 * 4]
pop x18, x19 ldp x10, x11, [sp, #16 * 5]
pop x20, x21 ldp x12, x13, [sp, #16 * 6]
pop x22, x23 ldp x14, x15, [sp, #16 * 7]
pop x24, x25 ldp x16, x17, [sp, #16 * 8]
pop x26, x27 ldp x18, x19, [sp, #16 * 9]
pop x28, x29 ldp x20, x21, [sp, #16 * 10]
ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP ldp x22, x23, [sp, #16 * 11]
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
eret // return to kernel eret // return to kernel
.endm .endm
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment