Commit cc91d9b5 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Proactively save the thread registers when calling AllowThreads

And then use that register info rather than sending a signal to the
thread; this lets the thread that called AllowThreads avoid receiving
signals ex during a syscall.

I'm not sure if this is valid though; are we really guaranteed that
the thread can't invalidate the saved state?
parent 28b1e352
...@@ -46,11 +46,6 @@ int tgkill(int tgid, int tid, int sig) { ...@@ -46,11 +46,6 @@ int tgkill(int tgid, int tid, int sig) {
// and wait until they start up. // and wait until they start up.
int num_starting_threads(0); int num_starting_threads(0);
struct ThreadStartArgs {
void* (*start_func)(Box*, Box*, Box*);
Box* arg1, *arg2, *arg3;
};
static pthread_mutex_t threading_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t threading_lock = PTHREAD_MUTEX_INITIALIZER;
struct ThreadInfo { struct ThreadInfo {
// "bottom" in the sense of a stack, which in a down-growing stack is the highest address: // "bottom" in the sense of a stack, which in a down-growing stack is the highest address:
...@@ -59,12 +54,33 @@ struct ThreadInfo { ...@@ -59,12 +54,33 @@ struct ThreadInfo {
}; };
static std::unordered_map<pid_t, ThreadInfo> current_threads; static std::unordered_map<pid_t, ThreadInfo> current_threads;
struct ThreadStateInternal {
bool valid;
ucontext_t ucontext;
ThreadStateInternal() : valid(false) {}
};
static std::unordered_map<pid_t, ThreadStateInternal> saved_thread_states;
void* getStackBottom() { void* getStackBottom() {
return current_threads[gettid()].stack_bottom; return current_threads[gettid()].stack_bottom;
} }
static int signals_waiting(0); static int signals_waiting(0);
static std::vector<ThreadState> thread_states; static std::vector<ThreadState> thread_states;
static void pushThreadState(pid_t tid, ucontext_t* context) {
#if STACK_GROWS_DOWN
void* stack_start = (void*)context->uc_mcontext.gregs[REG_RSP];
void* stack_end = current_threads[tid].stack_bottom;
#else
void* stack_start = current_threads[tid].stack_bottom;
void* stack_end = (void*)(context->uc_mcontext.gregs[REG_RSP] + sizeof(void*));
#endif
assert(stack_start < stack_end);
thread_states.push_back(ThreadState(tid, context, stack_start, stack_end));
}
std::vector<ThreadState> getAllThreadStates() { std::vector<ThreadState> getAllThreadStates() {
// TODO need to prevent new threads from starting, // TODO need to prevent new threads from starting,
// though I suppose that will have been taken care of // though I suppose that will have been taken care of
...@@ -86,10 +102,26 @@ std::vector<ThreadState> getAllThreadStates() { ...@@ -86,10 +102,26 @@ std::vector<ThreadState> getAllThreadStates() {
signals_waiting = (current_threads.size() - 1); signals_waiting = (current_threads.size() - 1);
thread_states.clear(); thread_states.clear();
// Current strategy:
// Let the other threads decide whether they want to cooperate and save their state before we get here.
// If they did save their state (as indicated by saved_thread_states[tid].valid), then we use that.
// Otherwise, we send them a signal and use the signal handler to look at their thread state.
pid_t tgid = getpid(); pid_t tgid = getpid();
pid_t mytid = gettid(); pid_t mytid = gettid();
for (auto& pair : current_threads) { for (auto& pair : current_threads) {
pid_t tid = pair.first; pid_t tid = pair.first;
// TODO I'm pretty skeptical about this... are we really guaranteed that this is still valid?
// ex what if an object pointer got pushed onto the stack, below where we thought the stack
// ended. We might be able to handle that case by examining the entire stack region, but are
// there other issues as well?
if (saved_thread_states[tid].valid) {
pushThreadState(tid, &saved_thread_states[tid].ucontext);
signals_waiting--;
continue;
}
if (tid == mytid) if (tid == mytid)
continue; continue;
tgkill(tgid, tid, SIGUSR2); tgkill(tgid, tid, SIGUSR2);
...@@ -119,18 +151,15 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) { ...@@ -119,18 +151,15 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) {
printf("old rip: 0x%lx\n", context->uc_mcontext.gregs[REG_RIP]); printf("old rip: 0x%lx\n", context->uc_mcontext.gregs[REG_RIP]);
} }
#if STACK_GROWS_DOWN pushThreadState(tid, context);
void* stack_start = (void*)context->uc_mcontext.gregs[REG_RSP];
void* stack_end = current_threads[tid].stack_bottom;
#else
void* stack_start = current_threads[tid].stack_bottom;
void* stack_end = (void*)(context->uc_mcontext.gregs[REG_RSP] + sizeof(void*));
#endif
assert(stack_start < stack_end);
thread_states.push_back(ThreadState(tid, context, stack_start, stack_end));
signals_waiting--; signals_waiting--;
} }
struct ThreadStartArgs {
void* (*start_func)(Box*, Box*, Box*);
Box* arg1, *arg2, *arg3;
};
static void* _thread_start(void* _arg) { static void* _thread_start(void* _arg) {
ThreadStartArgs* arg = static_cast<ThreadStartArgs*>(_arg); ThreadStartArgs* arg = static_cast<ThreadStartArgs*>(_arg);
auto start_func = arg->start_func; auto start_func = arg->start_func;
...@@ -165,6 +194,7 @@ static void* _thread_start(void* _arg) { ...@@ -165,6 +194,7 @@ static void* _thread_start(void* _arg) {
#endif #endif
.pthread_id = current_thread, .pthread_id = current_thread,
}; };
saved_thread_states[tid] = ThreadStateInternal();
num_starting_threads--; num_starting_threads--;
...@@ -180,6 +210,7 @@ static void* _thread_start(void* _arg) { ...@@ -180,6 +210,7 @@ static void* _thread_start(void* _arg) {
LockedRegion _lock(&threading_lock); LockedRegion _lock(&threading_lock);
current_threads.erase(gettid()); current_threads.erase(gettid());
saved_thread_states.erase(gettid());
if (VERBOSITY() >= 2) if (VERBOSITY() >= 2)
printf("thread tid=%d exited\n", gettid()); printf("thread tid=%d exited\n", gettid());
} }
...@@ -273,6 +304,38 @@ void registerMainThread() { ...@@ -273,6 +304,38 @@ void registerMainThread() {
} }
// For the "AllowThreads" regions, let's save the thread state at the beginning of the region.
// This means that the thread won't get interrupted by the signals we would otherwise need to
// send to get the GC roots.
// It adds some perf overhead I suppose, though I haven't measured it.
// It also means that you're not allowed to do that much inside an AllowThreads region...
// TODO maybe we should let the client decide which way to handle it
GLAllowThreadsReadRegion::GLAllowThreadsReadRegion() {
// I don't think it matters whether the GL release happens before or after the state
// saving; do it before, then, to reduce the amount we hold the GL:
releaseGLRead();
{
LockedRegion _lock(&threading_lock);
ThreadStateInternal& state = saved_thread_states[gettid()];
assert(!state.valid);
getcontext(&state.ucontext);
state.valid = true;
}
}
GLAllowThreadsReadRegion::~GLAllowThreadsReadRegion() {
{
LockedRegion _lock(&threading_lock);
saved_thread_states[gettid()].valid = false;
}
acquireGLRead();
}
#if THREADING_USE_GIL #if THREADING_USE_GIL
static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER;
......
...@@ -69,6 +69,26 @@ void demoteGL(); ...@@ -69,6 +69,26 @@ void demoteGL();
#define MAKE_REGION(name, start, end) \
class name { \
public: \
name() { start(); } \
~name() { end(); } \
};
MAKE_REGION(GLReadRegion, acquireGLRead, releaseGLRead);
MAKE_REGION(GLPromoteRegion, promoteGL, demoteGL);
// MAKE_REGION(GLReadReleaseRegion, releaseGLRead, acquireGLRead);
// MAKE_REGION(GLWriteReleaseRegion, releaseGLWrite, acquireGLWrite);
#undef MAKE_REGION
class GLAllowThreadsReadRegion {
public:
GLAllowThreadsReadRegion();
~GLAllowThreadsReadRegion();
};
#if THREADING_USE_GIL #if THREADING_USE_GIL
inline void acquireGLRead() { inline void acquireGLRead() {
acquireGLWrite(); acquireGLWrite();
...@@ -82,18 +102,6 @@ inline void demoteGL() { ...@@ -82,18 +102,6 @@ inline void demoteGL() {
} }
#endif #endif
#define MAKE_REGION(name, start, end) \
class name { \
public: \
name() { start(); } \
~name() { end(); } \
};
MAKE_REGION(GLReadRegion, acquireGLRead, releaseGLRead);
MAKE_REGION(GLPromoteRegion, promoteGL, demoteGL);
MAKE_REGION(GLReadReleaseRegion, releaseGLRead, acquireGLRead);
MAKE_REGION(GLWriteReleaseRegion, releaseGLWrite, acquireGLWrite);
#undef MAKE_REGION
} // namespace threading } // namespace threading
} // namespace pyston } // namespace pyston
......
...@@ -52,15 +52,13 @@ Box* timeSleep(Box* arg) { ...@@ -52,15 +52,13 @@ Box* timeSleep(Box* arg) {
req.tv_sec = (int)(fullsecs + 0.01); req.tv_sec = (int)(fullsecs + 0.01);
req.tv_nsec = (int)(nanosecs * 1000000000); req.tv_nsec = (int)(nanosecs * 1000000000);
int code;
{ {
threading::GLReadReleaseRegion _allow_threads; threading::GLAllowThreadsReadRegion _allow_threads;
code = nanosleep(&req, NULL); int code = nanosleep(&req, NULL);
if (code) if (code)
err(1, NULL); err(1, NULL);
} }
RELEASE_ASSERT(code == 0, "%d", code);
return None; return None;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment