Commit 9adb8f17 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Crawl the stacks of all threads for GC roots

I think threading now "works" ie doesn't crash Pyston,
though we don't release the GIL until the thread exits.
parent 7fa92f6c
......@@ -61,13 +61,14 @@ There seem to be some lingering issues with the LLVM build that haven't been ide
```
cd ~/pyston_deps
wget http://download.savannah.gnu.org/releases/libunwind/libunwind-1.1.tar.gz
tar xvf libunwind-1.1.tar.gz
mkdir libunwind-1.1-install
cd libunwind-1.1
sudo apt-get install texlive-extra-utils autoconf
git clone git://git.sv.gnu.org/libunwind.git libunwind-trunk
mkdir libunwind-trunk-install
cd libunwind-trunk
git checkout 65ac867416
# disable shared libraries because we'll be installing this in a place that the loader can't find it:
./configure --prefix=$HOME/pyston_deps/libunwind-1.1-install --enable-shared=0
patch -p1 <~/pyston/libunwind_patches/0001-Change-the-RBP-validation-heuristic-to-allow-size-0-.patch
autoreconf -i
./configure --prefix=$HOME/pyston_deps/libunwind-trunk-install --enable-shared=0
make -j4
make install
```
......@@ -119,10 +120,10 @@ Assuming you've already built the normal version above:
```
cd ~/pyston_deps
cp -rv libunwind-1.1 libunwind-1.1-debug
mkdir libunwind-1.1-debug-install
cd libunwind-1.1-debug
CFLAGS="-g -O0" CXXFLAGS="-g -O0" ./configure --prefix=$HOME/pyston_deps/libunwind-1.1-debug-install --enable-shared=0 --enable-debug --enable-debug-frame
cp -rv libunwind-trunk libunwind-trunk-debug
mkdir libunwind-trunk-debug-install
cd libunwind-trunk-debug
CFLAGS="-g -O0" CXXFLAGS="-g -O0" ./configure --prefix=$HOME/pyston_deps/libunwind-trunk-debug-install --enable-shared=0 --enable-debug --enable-debug-frame
make -j4
make install
echo "USE_DEBUG_LIBUNWIND := 1" >> ~/pyston/src/Makefile.local
......
......@@ -48,6 +48,9 @@ TESTS_DIR := ../test/tests
GPP := $(GCC_DIR)/bin/g++
ifeq ($(V),1)
VERBOSE := 1
endif
ifeq ($(VERBOSE),1)
VERB :=
ECHO := @\#
......@@ -127,20 +130,9 @@ else
VALGRIND := VALGRIND_LIB=$(DEPS_DIR)/valgrind-3.9.0-install/lib/valgrind $(DEPS_DIR)/valgrind-3.9.0-install/bin/valgrind
endif
# libunwind's include files warn on -Wextern-c-compat, so turn that off;
# ideally would just turn it off for header files in libunwind, maybe by
# having an internal libunwind.h that pushed/popped the diagnostic state,
# but it doesn't seem like that important a warning so just turn it off.
COMMON_CXXFLAGS += -I$(DEPS_DIR)/libunwind-1.1-install/include -Wno-extern-c-compat
COMMON_CXXFLAGS += -DGITREV=$(shell git rev-parse HEAD | head -c 12) -DLLVMREV=$(LLVM_REVISION)
COMMON_CXXFLAGS += -DDEFAULT_PYTHON_MAJOR_VERSION=$(PYTHON_MAJOR_VERSION) -DDEFAULT_PYTHON_MINOR_VERSION=$(PYTHON_MINOR_VERSION) -DDEFAULT_PYTHON_MICRO_VERSION=$(PYTHON_MICRO_VERSION)
EXTRA_CXXFLAGS ?=
CXXFLAGS := $(LLVM_CXXFLAGS) $(COMMON_CXXFLAGS) -O0 -DBINARY_SUFFIX= -DBINARY_STRIPPED_SUFFIX=_stripped $(EXTRA_CXXFLAGS)
CXXFLAGS_PROFILE = $(LLVM_PROFILE_CXXFLAGS) $(COMMON_CXXFLAGS) -pg -O3 -DNDEBUG -DNVALGRIND -DBINARY_SUFFIX=_release -DBINARY_STRIPPED_SUFFIX= -fno-function-sections $(EXTRA_CXXFLAGS)
CXXFLAGS_RELEASE := $(LLVM_RELEASE_CXXFLAGS) $(COMMON_CXXFLAGS) -O3 -fstrict-aliasing -enable-tbaa -DNDEBUG -DNVALGRIND -DBINARY_SUFFIX=_release -DBINARY_STRIPPED_SUFFIX= $(EXTRA_CXXFLAGS)
# Use our "custom linker" that calls gold if available
COMMON_LDFLAGS := -B../tools/build_system -L/usr/local/lib -lpthread -ldl -lcurses -lm -lunwind -lz -llzma -L$(DEPS_DIR)/gcc-4.8.2-install/lib64
# Make sure that we put all symbols in the dynamic symbol table so that MCJIT can load them;
......@@ -160,6 +152,25 @@ COMMON_LDFLAGS += -Wl,-E
# end up linking to the system libstdc++ instead.)
COMMON_LDFLAGS += -static-libstdc++
ifneq ($(USE_DEBUG_LIBUNWIND),0)
COMMON_LDFLAGS += -L$(DEPS_DIR)/libunwind-trunk-debug-install/lib
# libunwind's include files warn on -Wextern-c-compat, so turn that off;
# ideally would just turn it off for header files in libunwind, maybe by
# having an internal libunwind.h that pushed/popped the diagnostic state,
# but it doesn't seem like that important a warning so just turn it off.
COMMON_CXXFLAGS += -I$(DEPS_DIR)/libunwind-trunk-debug-install/include -Wno-extern-c-compat
else
COMMON_LDFLAGS += -L$(DEPS_DIR)/libunwind-trunk-install/lib
COMMON_CXXFLAGS += -I$(DEPS_DIR)/libunwind-trunk-install/include -Wno-extern-c-compat
endif
EXTRA_CXXFLAGS ?=
CXXFLAGS := $(LLVM_CXXFLAGS) $(COMMON_CXXFLAGS) -O0 -DBINARY_SUFFIX= -DBINARY_STRIPPED_SUFFIX=_stripped $(EXTRA_CXXFLAGS)
CXXFLAGS_PROFILE = $(LLVM_PROFILE_CXXFLAGS) $(COMMON_CXXFLAGS) -pg -O3 -DNDEBUG -DNVALGRIND -DBINARY_SUFFIX=_release -DBINARY_STRIPPED_SUFFIX= -fno-function-sections $(EXTRA_CXXFLAGS)
CXXFLAGS_RELEASE := $(LLVM_RELEASE_CXXFLAGS) $(COMMON_CXXFLAGS) -O3 -fstrict-aliasing -enable-tbaa -DNDEBUG -DNVALGRIND -DBINARY_SUFFIX=_release -DBINARY_STRIPPED_SUFFIX= $(EXTRA_CXXFLAGS)
LDFLAGS := $(LLVM_LDFLAGS) $(COMMON_LDFLAGS)
LDFLAGS_DEBUG := $(LLVM_DEBUG_LDFLAGS) $(COMMON_LDFLAGS)
LDFLAGS_PROFILE = $(LLVM_PROFILE_LDFLAGS) -pg $(COMMON_LDFLAGS)
......@@ -168,16 +179,6 @@ LDFLAGS_RELEASE := $(LLVM_RELEASE_LDFLAGS) $(COMMON_LDFLAGS)
# Can't add this, because there are functions in the compiler that look unused but are hooked back from the runtime:
# LDFLAGS_RELEASE += -Wl,--gc-sections
ifneq ($(USE_DEBUG_LIBUNWIND),0)
LDFLAGS += -L$(DEPS_DIR)/libunwind-1.1-debug-install/lib
LDFLAGS_DEBUG += -L$(DEPS_DIR)/libunwind-1.1-debug-install/lib
else
LDFLAGS += -L$(DEPS_DIR)/libunwind-1.1-install/lib
LDFLAGS_DEBUG += -L$(DEPS_DIR)/libunwind-1.1-install/lib
endif
LDFLAGS_RELEASE += -L$(DEPS_DIR)/libunwind-1.1-install/lib
LDFLAGS_PROFILE += -L$(DEPS_DIR)/libunwind-1.1-install/lib
BUILD_SYSTEM_DEPS := Makefile Makefile.local $(wildcard build_system/*)
CLANG_DEPS := $(CLANG_EXE) $(abspath $(dir $(CLANG_EXE))/../../built_release)
......
......@@ -27,12 +27,6 @@
#define UNW_LOCAL_ONLY
#include <libunwind.h>
#ifndef LIBUNWIND_PYSTON_PATCH_VERSION
#error "Please use a patched version of libunwind; see docs/INSTALLING.md"
#elif LIBUNWIND_PYSTON_PATCH_VERSION != 0x01
#error "Please repatch your version of libunwind; see docs/INSTALLING.md"
#endif
// Definition from libunwind, but standardized I suppose by the format of the .eh_frame_hdr section:
struct uw_table_entry {
int32_t start_ip_offset;
......
......@@ -24,6 +24,8 @@
#include "core/common.h"
#include "core/options.h"
extern "C" int start_thread(void* arg);
namespace pyston {
namespace threading {
......@@ -60,7 +62,7 @@ struct ThreadStartArgs {
};
static pthread_mutex_t threading_lock = PTHREAD_MUTEX_INITIALIZER;
static std::vector<pid_t> current_threads;
static std::unordered_set<pid_t> current_threads;
static std::atomic<int> signals_waiting(0);
static std::vector<ThreadState> thread_states;
......@@ -110,32 +112,17 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) {
ucontext_t* context = static_cast<ucontext_t*>(_context);
if (VERBOSITY()) {
pid_t tid = gettid();
pid_t tid = gettid();
if (VERBOSITY() >= 2) {
printf("in thread_context_dump, tid=%d\n", tid);
printf("%p %p %p\n", context, &context, context->uc_mcontext.fpregs);
printf("old rip: 0x%lx\n", context->uc_mcontext.gregs[REG_RIP]);
}
thread_states.push_back(ThreadState(context->uc_mcontext.gregs));
thread_states.push_back(ThreadState(tid, context));
signals_waiting--; // atomic on std::atomic
}
void registerMainThread() {
LockedRegion _lock(&threading_lock);
current_threads.push_back(gettid());
struct sigaction act;
act.sa_flags = SA_SIGINFO;
act.sa_sigaction = _thread_context_dump;
struct sigaction oldact;
int code = sigaction(SIGUSR2, &act, &oldact);
if (code)
err(1, NULL);
}
static void* _thread_start(void* _arg) {
pid_t tid = gettid();
ThreadStartArgs* arg = static_cast<ThreadStartArgs*>(_arg);
......@@ -148,7 +135,7 @@ static void* _thread_start(void* _arg) {
{
LockedRegion _lock(&threading_lock);
current_threads.push_back(tid);
current_threads.insert(tid);
num_starting_threads--;
if (VERBOSITY() >= 2)
......@@ -157,7 +144,17 @@ static void* _thread_start(void* _arg) {
threading::GLReadRegion _glock;
return start_func(arg1, arg2, arg3);
void* rtn = start_func(arg1, arg2, arg3);
{
LockedRegion _lock(&threading_lock);
current_threads.erase(tid);
if (VERBOSITY() >= 2)
printf("thread tid=%d exited\n", tid);
}
return rtn;
}
intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg2, Box* arg3) {
......@@ -178,6 +175,27 @@ intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg
return thread_id;
}
intptr_t call_frame_base;
void registerMainThread() {
LockedRegion _lock(&threading_lock);
// Would be nice if we could set this to the pthread start_thread,
// since _thread_start doesn't always show up in the traceback.
// call_frame_base = (intptr_t)::start_thread;
call_frame_base = (intptr_t)_thread_start;
current_threads.insert(gettid());
struct sigaction act;
act.sa_flags = SA_SIGINFO;
act.sa_sigaction = _thread_context_dump;
struct sigaction oldact;
int code = sigaction(SIGUSR2, &act, &oldact);
if (code)
err(1, NULL);
}
#if THREADING_USE_GIL
static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER;
......
......@@ -27,12 +27,20 @@ namespace threading {
intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg2, Box* arg3);
// The base of the call frame stack for a thread;
// useful for knowing when to stop unwinding.
extern intptr_t call_frame_base;
void registerMainThread();
struct ThreadState {
gregset_t gregs;
pid_t tid; // useful mostly for debugging
ucontext_t ucontext;
ThreadState(gregset_t gregs) { memcpy(this->gregs, gregs, sizeof(gregset_t)); }
ThreadState(pid_t tid, ucontext_t* ucontext) : tid(tid) {
memcpy(&this->ucontext, ucontext, sizeof(ucontext_t));
this->ucontext.uc_mcontext.fpregs = &this->ucontext.__fpregs_mem;
}
};
// Gets a ThreadState per thread, not including the thread calling this function.
// For this call to make sense, the threads all should be blocked;
......
......@@ -161,9 +161,6 @@ void runCollection() {
threading::GLPromoteRegion _lock;
std::vector<threading::ThreadState> threads = threading::getAllThreadStates();
assert(threads.size() == 0);
if (VERBOSITY("gc") >= 2)
printf("Collection #%d\n", ++ncollections);
......
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gc/root_finder.h"
#define UNW_LOCAL_ONLY
#include <libunwind.h>
......@@ -22,25 +24,17 @@
#include <cassert>
#include <vector>
#include "core/common.h"
#include "codegen/codegen.h"
#include "codegen/llvm_interpreter.h"
#include "core/common.h"
#include "core/threading.h"
#include "gc/collector.h"
#include "gc/heap.h"
#include "gc/root_finder.h"
#ifndef NVALGRIND
#include "valgrind.h"
#endif
#ifndef LIBUNWIND_PYSTON_PATCH_VERSION
#error "Please use a patched version of libunwind; see docs/INSTALLING.md"
#elif LIBUNWIND_PYSTON_PATCH_VERSION != 0x01
#error "Please repatch your version of libunwind; see docs/INSTALLING.md"
#endif
extern "C" void __libc_start_main();
namespace pyston {
......@@ -58,58 +52,41 @@ void collectRoots(void* start, void* end, TraceStack* stack) {
}
}
void collectStackRoots(TraceStack* stack) {
unw_cursor_t cursor;
unw_context_t uc;
unw_word_t ip, sp, bp;
// force callee-save registers onto the stack:
// Actually, I feel like this is pretty brittle:
// collectStackRoots itself is allowed to save the callee-save registers
// on its own stack.
jmp_buf registers __attribute__((aligned(sizeof(void*))));
static void _unwindStack(unw_cursor_t* cursor, TraceStack* stack) {
TraceStackGCVisitor visitor(stack);
unw_word_t ip, sp, bp;
#ifndef NVALGRIND
if (RUNNING_ON_VALGRIND) {
memset(&registers, 0, sizeof(registers));
memset(&cursor, 0, sizeof(cursor));
memset(&uc, 0, sizeof(uc));
memset(&ip, 0, sizeof(ip));
memset(&sp, 0, sizeof(sp));
memset(&bp, 0, sizeof(bp));
}
#endif
setjmp(registers);
assert(sizeof(registers) % 8 == 0);
// void* stack_bottom = __builtin_frame_address(0);
collectRoots(&registers, &registers + 1, stack);
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
TraceStackGCVisitor visitor(stack);
int code;
while (true) {
int code = unw_step(&cursor);
int code = unw_step(cursor);
// Negative codes are errors, zero means that there isn't a new frame.
ASSERT(code >= 0 && "something broke unwinding!", "%d '%s'", code, unw_strerror(code));
assert(code != 0 && "didn't get to the top of the stack!");
RELEASE_ASSERT(code >= 0 && "something broke unwinding!", "%d '%s'", code, unw_strerror(code));
RELEASE_ASSERT(code != 0, "didn't get to the top of the stack!");
unw_get_reg(&cursor, UNW_REG_IP, &ip);
unw_get_reg(&cursor, UNW_REG_SP, &sp);
unw_get_reg(&cursor, UNW_TDEP_BP, &bp);
unw_get_reg(cursor, UNW_REG_IP, &ip);
unw_get_reg(cursor, UNW_REG_SP, &sp);
unw_get_reg(cursor, UNW_TDEP_BP, &bp);
void* cur_sp = (void*)sp;
void* cur_bp = (void*)bp;
// std::string name = g.func_addr_registry.getFuncNameAtAddress((void*)ip, true);
// if (VERBOSITY()) printf("ip = %lx (%s), stack = [%p, %p)\n", (long) ip, name.c_str(), cur_sp, cur_bp);
unw_proc_info_t pip;
unw_get_proc_info(&cursor, &pip);
unw_get_proc_info(cursor, &pip);
// if (VERBOSITY()) printf("ip = 0x%lx (start_ip = 0x%lx), stack = [%p, %p)\n", (long) ip, pip.start_ip, cur_sp,
// cur_bp);
if (pip.start_ip == (uintptr_t)&__libc_start_main) {
break;
......@@ -121,7 +98,74 @@ void collectStackRoots(TraceStack* stack) {
}
collectRoots(cur_sp, (char*)cur_bp, stack);
if (pip.start_ip == threading::call_frame_base) {
break;
}
if (cur_bp == NULL) {
// TODO I think this indicates an unwind mistake by libunwind? Not sure.
// But if it returns cur_bp=NULL, this is probably just a thread where libunwind
// didn't reconstruct the call stack exactly the way we thought.
// TODO we probably don't need to do any unwinding here at all; we can just track
// the stack min and max for every thread.
break;
}
}
}
void collectOtherThreadsStacks(TraceStack* stack) {
std::vector<threading::ThreadState> threads = threading::getAllThreadStates();
// unw_addr_space_t as = getOtherAddrSpace();
for (threading::ThreadState& tstate : threads) {
unw_cursor_t cursor;
// int code = unw_init_remote(&cursor, as, &tstate);
int code = unw_init_local(&cursor, (ucontext_t*)&tstate.ucontext);
assert(code == 0);
// printf("Collecting thread %d\n", tstate.tid);
collectRoots(&tstate.ucontext, (&tstate.ucontext) + 1, stack);
_unwindStack(&cursor, stack);
}
}
static void collectLocalStack(TraceStack* stack) {
unw_cursor_t cursor;
unw_context_t uc;
// force callee-save registers onto the stack:
// Actually, I feel like this is pretty brittle:
// collectLocalStack itself is allowed to save the callee-save registers
// on its own stack.
jmp_buf registers __attribute__((aligned(sizeof(void*))));
#ifndef NVALGRIND
if (RUNNING_ON_VALGRIND) {
memset(&registers, 0, sizeof(registers));
memset(&cursor, 0, sizeof(cursor));
memset(&uc, 0, sizeof(uc));
}
#endif
setjmp(registers);
assert(sizeof(registers) % 8 == 0);
// void* stack_bottom = __builtin_frame_address(0);
collectRoots(&registers, &registers + 1, stack);
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
_unwindStack(&cursor, stack);
}
void collectStackRoots(TraceStack* stack) {
collectLocalStack(stack);
collectOtherThreadsStacks(stack);
}
}
}
......@@ -27,13 +27,6 @@
#define UNW_LOCAL_ONLY
#include <libunwind.h>
#ifndef LIBUNWIND_PYSTON_PATCH_VERSION
#error "Please use a patched version of libunwind; see docs/INSTALLING.md"
#elif LIBUNWIND_PYSTON_PATCH_VERSION != 0x01
#error "Please repatch your version of libunwind; see docs/INSTALLING.md"
#endif
namespace pyston {
// from http://www.nongnu.org/libunwind/man/libunwind(3).html
......
# skip-if: True
# - Pyston is not yet thread safe. this test sometimes works and sometimes doesn't.
# - threads also seem to cause much greater memory usage for both CPython and Pyston and
# blow out the memory limits set by the tester.
from thread import start_new_thread
import time
......@@ -25,4 +20,5 @@ for i in xrange(nthreads):
while len(done) < nthreads:
time.sleep(0)
print work, sum(work)
# print work
assert sum(work) == 0
# skip-if: True
# - Pyston is not yet thread safe. this test sometimes works and sometimes doesn't.
# - threads also seem to cause much greater memory usage for both CPython and Pyston and
# blow out the memory limits set by the tester.
from thread import start_new_thread
import time
......
......@@ -50,7 +50,7 @@ def set_ulimits():
resource.setrlimit(resource.RLIMIT_CPU, (TIME_LIMIT + 1, TIME_LIMIT + 1))
MAX_MEM_MB = 100
resource.setrlimit(resource.RLIMIT_AS, (MAX_MEM_MB * 1024 * 1024, MAX_MEM_MB * 1024 * 1024))
resource.setrlimit(resource.RLIMIT_RSS, (MAX_MEM_MB * 1024 * 1024, MAX_MEM_MB * 1024 * 1024))
def get_expected_output(fn):
sys.stdout.flush()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment