Commit 10c1031f authored by Martin Schwidefsky's avatar Martin Schwidefsky

[S390] Minor fault path optimization.

The minor fault path has grown a lot in terms of cycles. In particular
the kprobes hook is very costly. Optimize the path to save a couple of
cycles. If kprobes is enabled more than 300 cycles can be avoided if
kprobes_running() is false.
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
parent c0007f1a
...@@ -63,21 +63,25 @@ int unregister_page_fault_notifier(struct notifier_block *nb) ...@@ -63,21 +63,25 @@ int unregister_page_fault_notifier(struct notifier_block *nb)
return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb); return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
} }
static inline int notify_page_fault(enum die_val val, const char *str, static int __kprobes __notify_page_fault(struct pt_regs *regs, long err)
struct pt_regs *regs, long err, int trap, int sig)
{ {
struct die_args args = { struct die_args args = { .str = "page fault",
.regs = regs, .trapnr = 14,
.str = str, .signr = SIGSEGV };
.err = err, args.regs = regs;
.trapnr = trap, args.err = err;
.signr = sig return atomic_notifier_call_chain(&notify_page_fault_chain,
}; DIE_PAGE_FAULT, &args);
return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args); }
static inline int notify_page_fault(struct pt_regs *regs, long err)
{
if (unlikely(kprobe_running()))
return __notify_page_fault(regs, err);
return NOTIFY_DONE;
} }
#else #else
static inline int notify_page_fault(enum die_val val, const char *str, static inline int notify_page_fault(struct pt_regs *regs, long err)
struct pt_regs *regs, long err, int trap, int sig)
{ {
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -170,6 +174,89 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, ...@@ -170,6 +174,89 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
force_sig_info(SIGSEGV, &si, current); force_sig_info(SIGSEGV, &si, current);
} }
static void do_no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
const struct exception_table_entry *fixup;
/* Are we prepared to handle this kernel fault? */
fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
if (fixup) {
regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
return;
}
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
if (check_space(current) == 0)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" at virtual kernel address %p\n", (void *)address);
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" at virtual user address %p\n", (void *)address);
die("Oops", regs, error_code);
do_exit(SIGKILL);
}
static void do_low_address(struct pt_regs *regs, unsigned long error_code)
{
/* Low-address protection hit in kernel mode means
NULL pointer write access in kernel mode. */
if (regs->psw.mask & PSW_MASK_PSTATE) {
/* Low-address protection hit in user mode 'cannot happen'. */
die ("Low-address protection", regs, error_code);
do_exit(SIGKILL);
}
do_no_context(regs, error_code, 0);
}
/*
* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.
*/
static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
up_read(&mm->mmap_sem);
if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
return 1;
}
printk("VM: killing process %s\n", tsk->comm);
if (regs->psw.mask & PSW_MASK_PSTATE)
do_exit(SIGKILL);
do_no_context(regs, error_code, address);
return 0;
}
static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
up_read(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
tsk->thread.prot_addr = address;
tsk->thread.trap_no = error_code;
force_sig(SIGBUS, tsk);
/* Kernel mode? Handle exceptions or die */
if (!(regs->psw.mask & PSW_MASK_PSTATE))
do_no_context(regs, error_code, address);
}
#ifdef CONFIG_S390_EXEC_PROTECT #ifdef CONFIG_S390_EXEC_PROTECT
extern long sys_sigreturn(struct pt_regs *regs); extern long sys_sigreturn(struct pt_regs *regs);
extern long sys_rt_sigreturn(struct pt_regs *regs); extern long sys_rt_sigreturn(struct pt_regs *regs);
...@@ -253,48 +340,22 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs, ...@@ -253,48 +340,22 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
* 3b Region third trans. -> Not present (nullification) * 3b Region third trans. -> Not present (nullification)
*/ */
static inline void static inline void
do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) do_exception(struct pt_regs *regs, unsigned long error_code, int write)
{ {
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct * vma; struct vm_area_struct *vma;
unsigned long address; unsigned long address;
const struct exception_table_entry *fixup;
int si_code;
int space; int space;
int si_code;
tsk = current; if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
mm = tsk->mm;
if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return; return;
/* tsk = current;
* Check for low-address protection. This needs to be treated mm = tsk->mm;
* as a special case because the translation exception code
* field is not guaranteed to contain valid data in this case.
*/
if (is_protection && !(S390_lowcore.trans_exc_code & 4)) {
/* Low-address protection hit in kernel mode means
NULL pointer write access in kernel mode. */
if (!(regs->psw.mask & PSW_MASK_PSTATE)) {
address = 0;
space = 0;
goto no_context;
}
/* Low-address protection hit in user mode 'cannot happen'. */
die ("Low-address protection", regs, error_code);
do_exit(SIGKILL);
}
/* /* get the failing address and the affected space */
* get the failing address
* more specific the segment and page table portion of
* the address
*/
address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
space = check_space(tsk); space = check_space(tsk);
...@@ -342,7 +403,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) ...@@ -342,7 +403,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
*/ */
good_area: good_area:
si_code = SEGV_ACCERR; si_code = SEGV_ACCERR;
if (!is_protection) { if (!write) {
/* page not present, check vm flags */ /* page not present, check vm flags */
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area; goto bad_area;
...@@ -357,7 +418,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) ...@@ -357,7 +418,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
* the fault. * the fault.
*/ */
switch (handle_mm_fault(mm, vma, address, is_protection)) { switch (handle_mm_fault(mm, vma, address, write)) {
case VM_FAULT_MINOR: case VM_FAULT_MINOR:
tsk->min_flt++; tsk->min_flt++;
break; break;
...@@ -365,9 +426,12 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) ...@@ -365,9 +426,12 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
tsk->maj_flt++; tsk->maj_flt++;
break; break;
case VM_FAULT_SIGBUS: case VM_FAULT_SIGBUS:
goto do_sigbus; do_sigbus(regs, error_code, address);
return;
case VM_FAULT_OOM: case VM_FAULT_OOM:
goto out_of_memory; if (do_out_of_memory(regs, error_code, address))
goto survive;
return;
default: default:
BUG(); BUG();
} }
...@@ -396,64 +460,23 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) ...@@ -396,64 +460,23 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
} }
no_context: no_context:
/* Are we prepared to handle this kernel fault? */ do_no_context(regs, error_code, address);
fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
if (fixup) {
regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
return;
}
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
if (space == 0)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" at virtual kernel address %p\n", (void *)address);
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" at virtual user address %p\n", (void *)address);
die("Oops", regs, error_code);
do_exit(SIGKILL);
/*
* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
printk("VM: killing process %s\n", tsk->comm);
if (regs->psw.mask & PSW_MASK_PSTATE)
do_exit(SIGKILL);
goto no_context;
do_sigbus:
up_read(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
tsk->thread.prot_addr = address;
tsk->thread.trap_no = error_code;
force_sig(SIGBUS, tsk);
/* Kernel mode? Handle exceptions or die */
if (!(regs->psw.mask & PSW_MASK_PSTATE))
goto no_context;
} }
void __kprobes do_protection_exception(struct pt_regs *regs, void __kprobes do_protection_exception(struct pt_regs *regs,
unsigned long error_code) unsigned long error_code)
{ {
/* Protection exception is supressing, decrement psw address. */
regs->psw.addr -= (error_code >> 16); regs->psw.addr -= (error_code >> 16);
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
* field is not guaranteed to contain valid data in this case.
*/
if (unlikely(!(S390_lowcore.trans_exc_code & 4))) {
do_low_address(regs, error_code);
return;
}
do_exception(regs, 4, 1); do_exception(regs, 4, 1);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment