Commit 6446c7d5 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Restructure the way we collect generator stacks

Previously it was pretty implicit where we rely on the fact that the generator
object will exist on the previous stack, and once we notice the generator
its GC handler will crawl the generator stack.

I think we also missed the generator's registers, but only the top generator
and only if it wasn't the current thread (was unable to reproduce this).

Now, keep explicit track of all the previous stacks that a thread had been on.
This cleans that up, and since things are explicit now, we can also track
where we stopped executing on each stack.  We were doing this hackily for the main
stack before, but now we can do that for generator stacks, which will let
us make those auto-growing as well.

My confidence in this change: low.
parent 8fbdb77a
......@@ -18,6 +18,7 @@
#include <cstdio>
#include <cstdlib>
#include <err.h>
#include <setjmp.h>
#include <sys/syscall.h>
#include <unistd.h>
......@@ -28,6 +29,7 @@
#include "core/stats.h"
#include "core/thread_utils.h"
#include "core/util.h"
#include "gc/collector.h"
namespace pyston {
namespace threading {
......@@ -50,21 +52,37 @@ private:
bool saved;
ucontext_t ucontext;
ucontext_t* context_from_generator;
int generator_depth;
public:
void* stack_bottom;
void* stack_start;
struct StackInfo {
BoxedGenerator* next_generator;
void* stack_start;
void* stack_limit;
StackInfo(BoxedGenerator* next_generator, void* stack_start, void* stack_limit)
: next_generator(next_generator), stack_start(stack_start), stack_limit(stack_limit) {
#if STACK_GROWS_DOWN
assert(stack_start > stack_limit);
assert((char*)stack_start - (char*)stack_limit < (1L << 30));
#else
assert(stack_start < stack_limit);
assert((char*)stack_limit - (char*)stack_start < (1L << 30));
#endif
}
};
std::vector<StackInfo> previous_stacks;
pthread_t pthread_id;
ThreadStateInternal(void* stack_bottom, pthread_t pthread_id)
: saved(false), generator_depth(0), stack_bottom(stack_bottom), pthread_id(pthread_id) {}
ThreadState* public_thread_state;
ThreadStateInternal(void* stack_start, pthread_t pthread_id, ThreadState* public_thread_state)
: saved(false), stack_start(stack_start), pthread_id(pthread_id), public_thread_state(public_thread_state) {}
void saveCurrent() {
assert(!saved);
if (generator_depth == 0) {
getcontext(&ucontext);
}
saved = true;
}
......@@ -73,74 +91,118 @@ public:
saved = false;
}
bool isValid() { return saved || generator_depth; }
bool isValid() { return saved; }
ucontext_t* getContext() {
if (generator_depth)
return context_from_generator;
return &ucontext;
}
ucontext_t* getContext() { return &ucontext; }
void pushGenerator(ucontext_t* prev_context) {
if (generator_depth == 0)
context_from_generator = prev_context;
generator_depth++;
void pushGenerator(BoxedGenerator* g, void* new_stack_start, void* old_stack_limit) {
previous_stacks.emplace_back(g, this->stack_start, old_stack_limit);
this->stack_start = new_stack_start;
}
void popGenerator() {
generator_depth--;
assert(generator_depth >= 0);
assert(previous_stacks.size());
StackInfo& stack = previous_stacks.back();
stack_start = stack.stack_start;
previous_stacks.pop_back();
}
void assertNoGenerators() { assert(generator_depth == 0); }
void assertNoGenerators() { assert(previous_stacks.size() == 0); }
void accept(gc::GCVisitor* v) {
auto pub_state = public_thread_state;
if (pub_state->exc_type)
v->visit(pub_state->exc_type);
if (pub_state->exc_value)
v->visit(pub_state->exc_value);
if (pub_state->exc_traceback)
v->visit(pub_state->exc_traceback);
friend void* getStackTop();
for (auto& stack_info : previous_stacks) {
v->visit(stack_info.next_generator);
#if STACK_GROWS_DOWN
v->visitPotentialRange((void**)stack_info.stack_limit, (void**)stack_info.stack_start);
#else
v->visitPotentialRange((void**)stack_info.stack_start, (void**)stack_info.stack_limit);
#endif
}
}
};
static std::unordered_map<pthread_t, ThreadStateInternal*> current_threads;
// TODO could optimize these by keeping a __thread local reference to current_threads[pthread_self()]
void* getStackBottom() {
return current_threads[pthread_self()]->stack_bottom;
}
void* getStackTop() {
ThreadStateInternal* state = current_threads[pthread_self()];
int depth = state->generator_depth;
if (depth == 0) {
return __builtin_frame_address(0);
}
return (void*)state->context_from_generator->uc_mcontext.gregs[REG_RSP];
void pushGenerator(BoxedGenerator* g, void* new_stack_start, void* old_stack_limit) {
current_threads[pthread_self()]->pushGenerator(g, new_stack_start, old_stack_limit);
}
void pushGenerator(ucontext_t* prev_context) {
current_threads[pthread_self()]->pushGenerator(prev_context);
}
void popGenerator() {
current_threads[pthread_self()]->popGenerator();
}
// These are guarded by threading_lock
static int signals_waiting(0);
static std::vector<ThreadGCState> thread_states;
static gc::GCVisitor* cur_visitor = NULL;
// This function should only be called with the threading_lock held:
static void pushThreadState(ThreadStateInternal* thread_state, ucontext_t* context) {
assert(cur_visitor);
cur_visitor->visitPotentialRange((void**)context, (void**)(context + 1));
#if STACK_GROWS_DOWN
void* stack_low = (void*)context->uc_mcontext.gregs[REG_RSP];
void* stack_high = thread_state->stack_start;
#else
void* stack_low = thread_state->stack_start;
void* stack_high = (void*)context->uc_mcontext.gregs[REG_RSP];
#endif
assert(stack_low < stack_high);
cur_visitor->visitPotentialRange((void**)stack_low, (void**)stack_high);
thread_state->accept(cur_visitor);
}
// This better not get inlined:
void* getCurrentStackLimit() __attribute__((noinline));
void* getCurrentStackLimit() {
return __builtin_frame_address(0);
}
static void visitLocalStack(gc::GCVisitor* v) {
// force callee-save registers onto the stack:
jmp_buf registers __attribute__((aligned(sizeof(void*))));
setjmp(registers);
assert(sizeof(registers) % 8 == 0);
v->visitPotentialRange((void**)&registers, (void**)((&registers) + 1));
ThreadStateInternal* thread_state = current_threads[pthread_self()];
static void pushThreadState(pthread_t tid, ucontext_t* context) {
#if STACK_GROWS_DOWN
void* stack_start = (void*)context->uc_mcontext.gregs[REG_RSP];
void* stack_end = current_threads[tid]->stack_bottom;
void* stack_low = getCurrentStackLimit();
void* stack_high = thread_state->stack_start;
#else
void* stack_start = current_threads[tid]->stack_bottom;
void* stack_end = (void*)(context->uc_mcontext.gregs[REG_RSP] + sizeof(void*));
void* stack_low = thread_state->stack_start;
void* stack_high = getCurrentStackLimit();
#endif
assert(stack_start < stack_end);
thread_states.push_back(ThreadGCState(tid, context, stack_start, stack_end, &cur_thread_state));
assert(stack_low < stack_high);
v->visitPotentialRange((void**)stack_low, (void**)stack_high);
thread_state->accept(v);
}
std::vector<ThreadGCState> getAllThreadStates() {
void visitAllStacks(gc::GCVisitor* v) {
visitLocalStack(v);
// TODO need to prevent new threads from starting,
// though I suppose that will have been taken care of
// by the caller of this function.
LOCK_REGION(&threading_lock);
assert(cur_visitor == NULL);
cur_visitor = v;
while (true) {
// TODO shouldn't busy-wait:
if (num_starting_threads) {
......@@ -153,7 +215,6 @@ std::vector<ThreadGCState> getAllThreadStates() {
}
signals_waiting = (current_threads.size() - 1);
thread_states.clear();
// Current strategy:
// Let the other threads decide whether they want to cooperate and save their state before we get here.
......@@ -163,18 +224,13 @@ std::vector<ThreadGCState> getAllThreadStates() {
pthread_t mytid = pthread_self();
for (auto& pair : current_threads) {
pthread_t tid = pair.first;
ThreadStateInternal* state = pair.second;
if (tid == mytid)
continue;
// TODO I'm pretty skeptical about this... are we really guaranteed that this is still valid?
// (in the non-generator case where the thread saved its own state)
// ex what if an object pointer got pushed onto the stack, below where we thought the stack
// ended. We might be able to handle that case by examining the entire stack region, but are
// there other issues as well?
ThreadStateInternal* state = pair.second;
if (state->isValid()) {
pushThreadState(tid, state->getContext());
pushThreadState(state, state->getContext());
signals_waiting--;
continue;
}
......@@ -192,7 +248,7 @@ std::vector<ThreadGCState> getAllThreadStates() {
assert(num_starting_threads == 0);
return std::move(thread_states);
cur_visitor = NULL;
}
static void _thread_context_dump(int signum, siginfo_t* info, void* _context) {
......@@ -207,7 +263,7 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) {
printf("old rip: 0x%lx\n", (intptr_t)context->uc_mcontext.gregs[REG_RIP]);
}
pushThreadState(tid, context);
pushThreadState(current_threads[tid], context);
signals_waiting--;
}
......@@ -246,7 +302,7 @@ static void* _thread_start(void* _arg) {
#else
void* stack_bottom = stack_start;
#endif
current_threads[current_thread] = new ThreadStateInternal(stack_bottom, current_thread);
current_threads[current_thread] = new ThreadStateInternal(stack_bottom, current_thread, &cur_thread_state);
num_starting_threads--;
......@@ -343,7 +399,7 @@ static void* find_stack() {
void registerMainThread() {
LOCK_REGION(&threading_lock);
current_threads[pthread_self()] = new ThreadStateInternal(find_stack(), pthread_self());
current_threads[pthread_self()] = new ThreadStateInternal(find_stack(), pthread_self(), &cur_thread_state);
struct sigaction act;
memset(&act, 0, sizeof(act));
......
......@@ -25,6 +25,11 @@
namespace pyston {
class Box;
class BoxedGenerator;
namespace gc {
class GCVisitor;
}
namespace threading {
......@@ -39,40 +44,19 @@ extern __thread ThreadState cur_thread_state;
// returns a thread id (currently, the pthread_t id)
intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg2, Box* arg3);
// Hooks to tell the threading machinery about the main thread:
void registerMainThread();
void finishMainThread();
struct ThreadGCState {
pthread_t tid; // useful mostly for debugging
ucontext_t* ucontext;
// start and end (start < end) of the threads main stack.
// The thread may not be actually executing on that stack, since it may be
// in a generator, but those generators will be tracked separately.
void* stack_start, *stack_end;
// Hook for the GC; will visit all the threads (including the current one), visiting their
// stacks and thread-local ThreadState objects
void visitAllStacks(gc::GCVisitor* v);
ThreadState* thread_state;
ThreadGCState(pthread_t tid, ucontext_t* ucontext, void* stack_start, void* stack_end, ThreadState* thread_state)
: tid(tid), ucontext(ucontext), stack_start(stack_start), stack_end(stack_end), thread_state(thread_state) {}
};
// Gets a ThreadGCState per thread, not including the thread calling this function.
// For this call to make sense, the threads all should be blocked;
// as a corollary, this thread is very much not thread safe.
std::vector<ThreadGCState> getAllThreadStates();
// Get the stack "bottom" (ie first pushed data. For stacks that grow down, this
// will be the highest address).
void* getStackBottom();
void* getStackTop();
// We need to track the state of the thread's main stack. This can get complicated when
// generators are involved, so we add some hooks for the generator code to notify the threading
// code that it has switched onto of off of a generator.
// A generator should call pushGenerator() when it gets switched to, with a pointer to the context
// that it will return to (ie the context of the thing that called the generator).
// The generator should call popGenerator() when it is about to switch back to the caller.
void pushGenerator(ucontext_t* prev_context);
// Some hooks to keep track of the list of stacks that this thread has been using.
// Every time we switch to a new generator, we need to pass a reference to the generator
// itself (so we can access the registers it is saving), the location of the new stack, and
// where we stopped executing on the old stack.
void pushGenerator(BoxedGenerator* g, void* new_stack_start, void* old_stack_limit);
void popGenerator();
......
......@@ -18,13 +18,13 @@
#include <cstdio>
#include <cstdlib>
#include "codegen/ast_interpreter.h"
#include "codegen/codegen.h"
#include "core/common.h"
#include "core/threading.h"
#include "core/types.h"
#include "core/util.h"
#include "gc/heap.h"
#include "gc/root_finder.h"
#include "runtime/types.h"
#ifndef NVALGRIND
......@@ -138,6 +138,12 @@ void GCVisitor::visit(void* p) {
}
void GCVisitor::visitRange(void* const* start, void* const* end) {
ASSERT((char*)end - (char*)start <= 1000000000, "Asked to scan %.1fGB -- a bug?",
((char*)end - (char*)start) * 1.0 / (1 << 30));
assert((uintptr_t)start % sizeof(void*) == 0);
assert((uintptr_t)end % sizeof(void*) == 0);
while (start < end) {
visit(*start);
start++;
......@@ -152,6 +158,12 @@ void GCVisitor::visitPotential(void* p) {
}
void GCVisitor::visitPotentialRange(void* const* start, void* const* end) {
ASSERT((char*)end - (char*)start <= 1000000000, "Asked to scan %.1fGB -- a bug?",
((char*)end - (char*)start) * 1.0 / (1 << 30));
assert((uintptr_t)start % sizeof(void*) == 0);
assert((uintptr_t)end % sizeof(void*) == 0);
while (start < end) {
visitPotential(*start);
start++;
......@@ -166,10 +178,11 @@ static void markPhase() {
#endif
TraceStack stack(roots);
collectStackRoots(&stack);
GCVisitor visitor(&stack);
threading::visitAllStacks(&visitor);
gatherInterpreterRoots(&visitor);
for (void* p : nonheap_roots) {
Box* b = reinterpret_cast<Box*>(p);
BoxedClass* cls = b->cls;
......
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gc/root_finder.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <setjmp.h>
#include <vector>
#include "codegen/ast_interpreter.h"
#include "codegen/codegen.h"
#include "core/common.h"
#include "core/threading.h"
#include "gc/collector.h"
#include "gc/heap.h"
#ifndef NVALGRIND
#include "valgrind.h"
#endif
extern "C" void __libc_start_main();
namespace pyston {
namespace gc {
void collectRoots(void* start, void* end, TraceStack* stack) {
assert(start <= end);
ASSERT((char*)end - (char*)start <= 1000000000, "Asked to scan %.1fGB -- a bug?",
((char*)end - (char*)start) * 1.0 / (1 << 30));
GCVisitor(stack).visitPotentialRange((void**)start, (void**)end);
}
void collectOtherThreadsStacks(TraceStack* stack) {
GCVisitor v(stack);
std::vector<threading::ThreadGCState> threads = threading::getAllThreadStates();
for (threading::ThreadGCState& tstate : threads) {
collectRoots(tstate.stack_start, tstate.stack_end, stack);
collectRoots(tstate.ucontext, tstate.ucontext + 1, stack);
if (tstate.thread_state->exc_type)
v.visit(tstate.thread_state->exc_type);
if (tstate.thread_state->exc_value)
v.visit(tstate.thread_state->exc_value);
if (tstate.thread_state->exc_traceback)
v.visit(tstate.thread_state->exc_traceback);
}
}
static void collectLocalStack(TraceStack* stack) {
// force callee-save registers onto the stack:
// Actually, I feel like this is pretty brittle:
// collectLocalStack itself is allowed to save the callee-save registers
// on its own stack.
jmp_buf registers __attribute__((aligned(sizeof(void*))));
setjmp(registers);
assert(sizeof(registers) % 8 == 0);
// void* stack_bottom = __builtin_frame_address(0);
collectRoots(&registers, (&registers) + 1, stack);
void* stack_bottom = threading::getStackBottom();
void* stack_top = threading::getStackTop();
#if STACK_GROWS_DOWN
collectRoots(stack_top, stack_bottom, stack);
#else
collectRoots(stack_bottom, stack_top, stack);
#endif
GCVisitor v(stack);
if (threading::cur_thread_state.exc_type)
v.visit(threading::cur_thread_state.exc_type);
if (threading::cur_thread_state.exc_value)
v.visit(threading::cur_thread_state.exc_value);
if (threading::cur_thread_state.exc_traceback)
v.visit(threading::cur_thread_state.exc_traceback);
}
void collectStackRoots(TraceStack* stack) {
collectLocalStack(stack);
collectOtherThreadsStacks(stack);
GCVisitor visitor(stack);
gatherInterpreterRoots(&visitor);
}
}
}
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_GC_ROOTFINDER_H
#define PYSTON_GC_ROOTFINDER_H
namespace pyston {
namespace gc {
class TraceStack;
void collectStackRoots(TraceStack*);
}
}
#endif
......@@ -34,7 +34,8 @@ namespace pyston {
static void generatorEntry(BoxedGenerator* g) {
assert(g->cls == generator_cls);
assert(g->function->cls == function_cls);
threading::pushGenerator(&g->returnContext);
threading::pushGenerator(g, g->stack + BoxedGenerator::STACK_SIZE,
(void*)g->returnContext.uc_mcontext.gregs[REG_RSP]);
try {
// call body of the generator
......@@ -114,7 +115,8 @@ extern "C" Box* yield(BoxedGenerator* obj, Box* value) {
threading::popGenerator();
swapcontext(&self->context, &self->returnContext);
threading::pushGenerator(&self->returnContext);
threading::pushGenerator(obj, obj->stack + BoxedGenerator::STACK_SIZE,
(void*)obj->returnContext.uc_mcontext.gregs[REG_RSP]);
// if the generator receives a exception from the caller we have to throw it
if (self->exception) {
......@@ -174,10 +176,16 @@ extern "C" void generatorGCHandler(GCVisitor* v, Box* b) {
if (g->exception)
v->visit(g->exception);
v->visitPotentialRange((void**)&g->context, ((void**)&g->context) + sizeof(g->context) / sizeof(void*));
if (g->running) {
v->visitPotentialRange((void**)&g->returnContext,
((void**)&g->returnContext) + sizeof(g->returnContext) / sizeof(void*));
v->visitPotentialRange((void**)&g->stack[0], (void**)&g->stack[BoxedGenerator::STACK_SIZE]);
} else {
v->visitPotentialRange((void**)&g->context, ((void**)&g->context) + sizeof(g->context) / sizeof(void*));
#if STACK_GROWS_DOWN
v->visitPotentialRange((void**)g->context.uc_mcontext.gregs[REG_RSP], (void**)g->stack + BoxedGenerator::STACK_SIZE);
#endif
}
}
......
def g():
l1 = [1]
l2 = [2]
l3 = [3]
l4 = [4]
l5 = [5]
l6 = [6]
l7 = [7]
l8 = [8]
l9 = [9]
l10 = [10]
l11 = [11]
l12 = [12]
l13 = [13]
yield 1
print l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13
yield 2
g = g()
print g.next()
l = [None] * 10
for i in xrange(1000):
l * 1000
print g.next()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment