Commit 0138ba57 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman

powerpc/64/signal: Balance return predictor stack in signal trampoline

Returning from an interrupt or syscall to a signal handler currently
begins execution directly at the handler's entry point, with LR set to
the address of the sigreturn trampoline. When the signal handler
function returns, it runs the trampoline. It looks like this:

    # interrupt at user address xyz
    # kernel stuff... signal is raised
    rfid
    # void handler(int sig)
    addis 2,12,.TOC.-.LCF0@ha
    addi 2,2,.TOC.-.LCF0@l
    mflr 0
    std 0,16(1)
    stdu 1,-96(1)
    # handler stuff
    ld 0,16(1)
    mtlr 0
    blr
    # __kernel_sigtramp_rt64
    addi    r1,r1,__SIGNAL_FRAMESIZE
    li      r0,__NR_rt_sigreturn
    sc
    # kernel executes rt_sigreturn
    rfid
    # back to user address xyz

Note the blr with no matching bl. This can corrupt the return
predictor.

Solve this by instead resuming execution at the signal trampoline
which then calls the signal handler. qtrace-tools link_stack checker
confirms the entire user/kernel/vdso cycle is balanced after this
patch, whereas it's not upstream.

Alan confirms the dwarf unwind info still looks good. gdb still
recognises the signal frame and can step into parent frames if it
break inside a signal handler.

Performance is pretty noisy, not a very significant change on a POWER9
here, but branch misses are consistently a lot lower on a
microbenchmark:

 Performance counter stats for './signal':

       13,085.72 msec task-clock                #    1.000 CPUs utilized
  45,024,760,101      cycles                    #    3.441 GHz
  65,102,895,542      instructions              #    1.45  insn per cycle
  11,271,673,787      branches                  #  861.372 M/sec
      59,468,979      branch-misses             #    0.53% of all branches

       12,989.09 msec task-clock                #    1.000 CPUs utilized
  44,692,719,559      cycles                    #    3.441 GHz
  65,109,984,964      instructions              #    1.46  insn per cycle
  11,282,136,057      branches                  #  868.585 M/sec
      39,786,942      branch-misses             #    0.35% of all branches
Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200511101952.1463138-1-npiggin@gmail.com
parent b648a513
...@@ -332,6 +332,7 @@ ...@@ -332,6 +332,7 @@
#define PPC_INST_BLR 0x4e800020 #define PPC_INST_BLR 0x4e800020
#define PPC_INST_BLRL 0x4e800021 #define PPC_INST_BLRL 0x4e800021
#define PPC_INST_BCTR 0x4e800420 #define PPC_INST_BCTR 0x4e800420
#define PPC_INST_BCTRL 0x4e800421
#define PPC_INST_MULLD 0x7c0001d2 #define PPC_INST_MULLD 0x7c0001d2
#define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULLW 0x7c0001d6
#define PPC_INST_MULHWU 0x7c000016 #define PPC_INST_MULHWU 0x7c000016
......
...@@ -39,8 +39,8 @@ ...@@ -39,8 +39,8 @@
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
#define FP_REGS_SIZE sizeof(elf_fpregset_t) #define FP_REGS_SIZE sizeof(elf_fpregset_t)
#define TRAMP_TRACEBACK 3 #define TRAMP_TRACEBACK 4
#define TRAMP_SIZE 6 #define TRAMP_SIZE 7
/* /*
* When we have signals to deliver, we set up on the user stack, * When we have signals to deliver, we set up on the user stack,
...@@ -600,13 +600,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) ...@@ -600,13 +600,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
int i; int i;
long err = 0; long err = 0;
/* bctrl # call the handler */
err |= __put_user(PPC_INST_BCTRL, &tramp[0]);
/* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
(__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]);
/* li r0, __NR_[rt_]sigreturn| */ /* li r0, __NR_[rt_]sigreturn| */
err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]); err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]);
/* sc */ /* sc */
err |= __put_user(PPC_INST_SC, &tramp[2]); err |= __put_user(PPC_INST_SC, &tramp[3]);
/* Minimal traceback info */ /* Minimal traceback info */
for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
...@@ -864,12 +866,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, ...@@ -864,12 +866,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
/* Set up to return from userspace. */ /* Set up to return from userspace. */
if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
} else { } else {
err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
if (err) if (err)
goto badframe; goto badframe;
regs->link = (unsigned long) &frame->tramp[0]; regs->nip = (unsigned long) &frame->tramp[0];
} }
/* Allocate a dummy caller frame for the signal handler. */ /* Allocate a dummy caller frame for the signal handler. */
...@@ -878,8 +880,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, ...@@ -878,8 +880,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
/* Set up "regs" so we "return" to the signal handler. */ /* Set up "regs" so we "return" to the signal handler. */
if (is_elf2_task()) { if (is_elf2_task()) {
regs->nip = (unsigned long) ksig->ka.sa.sa_handler; regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
regs->gpr[12] = regs->nip; regs->gpr[12] = regs->ctr;
} else { } else {
/* Handler is *really* a pointer to the function descriptor for /* Handler is *really* a pointer to the function descriptor for
* the signal routine. The first entry in the function * the signal routine. The first entry in the function
...@@ -889,7 +891,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, ...@@ -889,7 +891,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
func_descr_t __user *funct_desc_ptr = func_descr_t __user *funct_desc_ptr =
(func_descr_t __user *) ksig->ka.sa.sa_handler; (func_descr_t __user *) ksig->ka.sa.sa_handler;
err |= get_user(regs->nip, &funct_desc_ptr->entry); err |= get_user(regs->ctr, &funct_desc_ptr->entry);
err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
*/ */
#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/ppc_asm.h> #include <asm/ppc_asm.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -14,21 +15,17 @@ ...@@ -14,21 +15,17 @@
.text .text
/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
the return address to get an address in the middle of the presumed
call instruction. Since we don't have a call here, we artificially
extend the range covered by the unwind info by padding before the
real start. */
nop
.balign 8 .balign 8
.balign IFETCH_ALIGN_BYTES
V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
.Lsigrt_start = . - 4 .Lsigrt_start:
bctrl /* call the handler */
addi r1, r1, __SIGNAL_FRAMESIZE addi r1, r1, __SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn li r0,__NR_rt_sigreturn
sc sc
.Lsigrt_end: .Lsigrt_end:
V_FUNCTION_END(__kernel_sigtramp_rt64) V_FUNCTION_END(__kernel_sigtramp_rt64)
/* The ".balign 8" above and the following zeros mimic the old stack /* The .balign 8 above and the following zeros mimic the old stack
trampoline layout. The last magic value is the ucontext pointer, trampoline layout. The last magic value is the ucontext pointer,
chosen in such a way that older libgcc unwind code returns a zero chosen in such a way that older libgcc unwind code returns a zero
for a sigcontext pointer. */ for a sigcontext pointer. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment