Commit 6f7d4d64 authored by Kirill Smelkov's avatar Kirill Smelkov

Low-level pagefault handler

We hook into SIGSEGV and handle read/write pagefaults this way.

In this patch there goes stub code that only detects faults and
determines (in arch specific way) whether fault was for read or write
and there is a TODO to pass that information to higher level.

It also comes with tests to detect we still crash if we access something
incorrectly, so people could have coredumps and investigate them.
parent 45af76e6
...@@ -60,7 +60,7 @@ CFLAGS := -g -Wall -D_GNU_SOURCE -std=gnu99 -fplan9-extensions ...@@ -60,7 +60,7 @@ CFLAGS := -g -Wall -D_GNU_SOURCE -std=gnu99 -fplan9-extensions
# XXX hack ugly # XXX hack ugly
LOADLIBES=lib/bug.c lib/utils.c 3rdparty/ccan/ccan/tap/tap.c LOADLIBES=lib/bug.c lib/utils.c 3rdparty/ccan/ccan/tap/tap.c
TESTS := $(patsubst %.c,%,$(wildcard bigfile/tests/test_*.c)) TESTS := $(patsubst %.c,%,$(wildcard bigfile/tests/test_*.c))
test : test.t test.asan test.tsan test.vgmem test.vghel test.vgdrd test : test.t test.fault test.asan test.tsan test.vgmem test.vghel test.vgdrd
# extract what goes after RUNWITH: marker from command source, or empty if no marker # extract what goes after RUNWITH: marker from command source, or empty if no marker
...@@ -144,3 +144,13 @@ test.vghel: $(TESTS:%=%.vghelrun) ...@@ -144,3 +144,13 @@ test.vghel: $(TESTS:%=%.vghelrun)
test.vgdrd: $(TESTS:%=%.vgdrdrun) test.vgdrd: $(TESTS:%=%.vgdrdrun)
%.vgdrdrun: %.t %.vgdrdrun: %.t
$(call vgxrun,--tool=drd, $<) $(call vgxrun,--tool=drd, $<)
# test pagefault for double/real faults - it should crash
tfault := bigfile/tests/tfault
# XXX FAULTS extraction fragile
FAULTS := $(shell grep '{"fault.*"' $(tfault).c | sed 's/"/ /g' |awk '{print $$2}')
test.fault : $(FAULTS:%=%.tfault)
%.tfault : $(tfault).t
t/tfault-run $< $* $(shell grep '{"$*"' $(tfault).c | awk '{print $$NF}')
/* Wendelin.bigfile | Low-level pagefault handler
* Copyright (C) 2014-2015 Nexedi SA and Contributors.
* Kirill Smelkov <kirr@nexedi.com>
*
* This program is free software: you can Use, Study, Modify and Redistribute
* it under the terms of the GNU General Public License version 3, or (at your
* option) any later version, as published by the Free Software Foundation.
*
* You can also Link and Combine this program with other software covered by
* the terms of any of the Open Source Initiative approved licenses and Convey
* the resulting work. Corresponding source of such a combination shall include
* the source code for all other software used.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See COPYING file for full licensing terms.
*
* ~~~~~~~~
*
* low-level pagefault handler entry from OS
*
* its job is to lookup vma which is being accessed and whether it is
* read/write, and tail to vma_on_pagefault().
*/
#include <wendelin/bug.h>
#include <signal.h>
#include <stdlib.h>
#include <errno.h>
#include <stdint.h>
/* "before us" previously installed SIGSEGV sigaction */
static struct sigaction prev_segv_act;
static int segv_act_installed;
static int faulted_by(const struct ucontext *uc);
/* SIGSEGV handler */
static void on_pagefault(int sig, siginfo_t *si, void *_uc)
{
struct ucontext *uc = _uc;
unsigned write;
BUG_ON(sig != SIGSEGV);
BUG_ON(si->si_signo != SIGSEGV);
/* determine what client wants - read or write */
write = faulted_by(uc);
/* we'll try to only handle "invalid permissions" faults (= read of page
* with PROT_NONE | write to page with PROT_READ only).
*
* "address not mapped" (SEGV_MAPERR) and possibly anything else (e.g.
* SI_USER for signals sent by kill - not by kernel) could not result from
* valid access to prepared file address space, so we don't handle those. */
if (si->si_code != SEGV_ACCERR)
goto dont_handle;
// XXX locking
/* (1) addr -> vma ;lookup VMA covering faulting memory address */
// TODO
goto dont_handle;
/* now, since we found faulting address in registered memory areas, we know
* we should serve this pagefault. */
// TODO protect against different threads
/* save/restore errno XXX & the like ? */
int save_errno = errno;
// TODO handle pagefault at si->si_addr / write
errno = save_errno;
return;
dont_handle:
/* pagefault not resulted from correct access to file memory.
* Crash if no previous SIGSEGV handler was set, or tail to that. */
if (prev_segv_act.sa_flags & SA_SIGINFO)
prev_segv_act.sa_sigaction(sig, si, _uc);
else
if (prev_segv_act.sa_handler != SIG_DFL &&
prev_segv_act.sa_handler != SIG_IGN /* yes, SIGSEGV can't be ignored */)
prev_segv_act.sa_handler(sig);
else {
/* no previous SIGSEGV handler was set - re-trigger to die
*
* NOTE here SIGSEGV was set blocked in thread sigmask by kernel
* when invoking signal handler (we explicitly did not specify
* SA_NODEFER when setting it up).
*
* Re-access original memory location, and it will fault with
* coredump directly without calling signal handler again. */
// XXX how to know access size? we just proceed here with 1byte ...
// FIXME don't touch memory on SI_USER - just BUG.
volatile uint8_t *p = (uint8_t *)si->si_addr;
if (write)
*p = *p;
else
*p;
/* could get here because ex. other thread remapped something in place
* of old page. Die unconditionally */
BUG();
}
}
/* ensures pagefault handler for SIGSEGV is installed */
int pagefault_init(void)
{
struct sigaction act;
int err;
/* protect from double sigaction installing. It is ok to be called twice. */
if (segv_act_installed)
goto done;
act.sa_sigaction = on_pagefault;
// |SA_RESTART(but does not work for read/write vs SIGSEGV?)
/* NOTE no SA_ONSTACK - continue executing on the same stack
* TODO stack overflow protection
*/
/* NOTE we do not set SA_NODEFER. This means upon entry to signal handler,
* SIGSEGV will be automatically blocked by kernel for faulting thread.
*
* This in particular means we'll get automatic protection from double
* faults - in case handler or any other code it calls accesses memory
* without appropriate protection prior set, the kernel will coredump.
*/
act.sa_flags = SA_SIGINFO;
/* do not want to block any other signals */
err = sigemptyset(&act.sa_mask);
if (err)
return err;
err = sigaction(SIGSEGV, &act, &prev_segv_act);
if (err)
return err;
segv_act_installed = 1;
done:
return 0;
}
/* determine what client faulted by - read or write
*
* @return 0 - read !0 - write
*/
static int faulted_by(const struct ucontext *uc)
{
int write;
#if defined(__x86_64__) || defined(__i386__)
/*
* http://stackoverflow.com/questions/17671869/how-to-identify-read-or-write-operations-of-page-fault-when-using-sigaction-hand
* http://wiki.osdev.org/Exceptions#Page_Fault
*/
write = uc->uc_mcontext.gregs[REG_ERR] & 0x2;
#else
# error TODO: implement read/write detection for pagefaults for your arch
#endif
return write;
}
/* Wendelin.bigfile | tests for real faults leading to crash
* Copyright (C) 2014-2015 Nexedi SA and Contributors.
* Kirill Smelkov <kirr@nexedi.com>
*
* This program is free software: you can Use, Study, Modify and Redistribute
* it under the terms of the GNU General Public License version 3, or (at your
* option) any later version, as published by the Free Software Foundation.
*
* You can also Link and Combine this program with other software covered by
* the terms of any of the Open Source Initiative approved licenses and Convey
* the resulting work. Corresponding source of such a combination shall include
* the source code for all other software used.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See COPYING file for full licensing terms.
*
* ~~~~
*
* All tests here end up crashing via segmentation violation. The calling
* driver verifies test output prior to crash and that the crash happenned in
* the right place.
*
* See t/tfault-run and `test.fault` in Makefile for driver details.
*/
// XXX better link with it
#include "../pagefault.c"
#include <ccan/tap/tap.h>
#include <ccan/array_size/array_size.h>
#include <stdio.h>
#include <string.h>
static void prefault()
{
diag("going to fault...");
fflush(stdout);
fflush(stderr);
}
void fault_read()
{
diag("Testing pagefault v.s. incorrect read");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
prefault();
((volatile int *)NULL) [0];
}
void fault_write()
{
diag("Testing pagefault v.s. incorrect write");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
prefault();
((volatile int *)NULL) [0] = 0;
}
static const struct {
const char *name;
void (*test)(void);
} tests[] = {
// XXX fragile - test names must start exactly with `{"fault` - Makefile extracts them this way
// name func-where-it-dies
{"faultr", fault_read}, // on_pagefault
{"faultw", fault_write}, // on_pagefault
};
int main(int argc, char *argv[])
{
int i;
if (argc != 2) {
fprintf(stderr, "Usage: %s <test>\n", argv[0]);
exit(1);
}
tap_fail_callback = abort; // XXX to catch failure immediately
for (i=0; i<ARRAY_SIZE(tests); i++) {
if (strcmp(argv[1], tests[i].name))
continue;
tests[i].test();
fail("should not get here");
}
fail("unknown test '%s'", argv[1]);
return 1;
}
...@@ -27,6 +27,7 @@ import os ...@@ -27,6 +27,7 @@ import os
_bigfile = Extension('wendelin.bigfile._bigfile', _bigfile = Extension('wendelin.bigfile._bigfile',
sources = [ sources = [
'bigfile/_bigfile.c', 'bigfile/_bigfile.c',
'bigfile/pagefault.c',
'bigfile/pagemap.c', 'bigfile/pagemap.c',
'lib/bug.c', 'lib/bug.c',
'lib/utils.c', 'lib/utils.c',
......
#!/bin/sh -e
# tfault-run <tfault> <arg> <mustdie>
# run `<tfault> <arg>` and verify that it produces correct coredump, dieing for
# SIGSEGV in function <mustdie>.
# die <msg> ...
die() {
echo "E: $*" 1>&2
exit 1
}
tfault=$(realpath $1)
arg=$2
mustdie=$3
# XXX ok to hardcode t/ ?
workdir=t/tfault-run.$arg
cwd=`pwd`
mkdir "$workdir"
cd "$workdir"
ulimit -c unlimited
$tfault $arg 2>&1 |tee run.out
grep -q "^# going to fault" run.out || die "test didn't run to faulting point"
test -e core || die "no core after run"
gdb -q -batch $tfault core >core.info || die "can't gdb(core)"
grep -q "Program terminated with signal SIGSEGV, Segmentation fault." core.info || die "not SIGSEGV"
# #0 0x00000000004031ae in doublefault_loadblk (file=0x7fff0f25d9c0, blk=0, buf=0x7ff85a553000) at t/tfault.c:93
diefunc=$(grep '^#0' core.info | awk '{print $4}')
test -n "$diefunc" || die "can't extract diefunc"
test "$diefunc" == "$mustdie" || die "must die in $mustdie, died in $diefunc"
# run ok - cleanup
cd "$cwd"
rm -rf "$workdir"
echo "ok - crashed OK (in $diefunc)"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment