Commit 6eea2b47 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Move our ThreadState handling closer to CPython's

Our underlying implementation still looks pretty different, but
rather than implement some newly-needed APIs completely from scratch,
I copied in some of CPython's implementation.

The result is a bit messy (multiple ways of doing similar things),
but I think it's a step in the right direction.

Regardless, this commit adds "clean up thread-local storage when the
local object dies" functionality, as well as better cleanup when
there are multiple threads.  I think this should help with the fork
issues as well.
parent 1a223d56
......@@ -12,7 +12,6 @@ extern "C" {
/* State shared between threads */
// Pyston change: this is not our format
#if 0
struct _ts; /* Forward */
struct _is; /* Forward */
......@@ -21,6 +20,9 @@ typedef struct _is {
struct _is *next;
struct _ts *tstate_head;
// Pyston change
// Note: any changes here need to show up in PyInterpreterState_Clear as well
#if 0
PyObject *modules;
PyObject *sysdict;
PyObject *builtins;
......@@ -37,10 +39,8 @@ typedef struct _is {
int tscdump;
#endif
} PyInterpreterState;
#endif
struct _PyInterpreterState;
typedef struct _PyInterpreterState PyInterpreterState;
} PyInterpreterState;
/* State unique per thread */
......@@ -59,16 +59,32 @@ typedef int (*Py_tracefunc)(PyObject *, struct _frame *, int, PyObject *);
#define PyTrace_C_EXCEPTION 5
#define PyTrace_C_RETURN 6
// Pyston change: this is not our format
#if 0
typedef struct _ts {
/* See Python/ceval.c for comments explaining most fields */
struct _ts *next;
PyInterpreterState *interp;
struct _frame *frame;
// Pyston change:
//struct _frame *frame;
void* frame_info;
int recursion_depth;
int gilstate_counter;
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
PyObject *dict; /* Stores per-thread state */
int trash_delete_nesting;
PyObject *trash_delete_later;
// Pyston change:
// Pyston note: additions in here need to be mirrored in PyThreadState_Clear
#if 0
/* 'tracing' keeps track of the execution depth when tracing/profiling.
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
......@@ -80,16 +96,10 @@ typedef struct _ts {
PyObject *c_profileobj;
PyObject *c_traceobj;
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
PyObject *exc_type;
PyObject *exc_value;
PyObject *exc_traceback;
PyObject *dict; /* Stores per-thread state */
/* tick_counter is incremented whenever the check_interval ticker
* reaches zero. The purpose is to give a useful measure of the number
* of interpreted bytecode instructions in a given thread. This
......@@ -98,37 +108,13 @@ typedef struct _ts {
*/
int tick_counter;
int gilstate_counter;
PyObject *async_exc; /* Asynchronous exception to raise */
long thread_id; /* Thread id where this tstate was created */
int trash_delete_nesting;
PyObject *trash_delete_later;
/* XXX signal handlers should also be here */
} PyThreadState;
#endif
typedef struct _ts {
void* frame_info; // This points to top python FrameInfo object
int recursion_depth;
int gilstate_counter;
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
PyObject *dict; /* Stores per-thread state */
int trash_delete_nesting;
PyObject *trash_delete_later;
// Pyston note: additions in here need to be mirrored in ThreadStateInternal::accept
} PyThreadState;
PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *) PYSTON_NOEXCEPT;
......
......@@ -398,7 +398,6 @@ local_clear(localobject *self)
Py_CLEAR(self->dummies);
Py_CLEAR(self->wr_callback);
/* Remove all strong references to dummies from the thread states */
/*
if (self->key
&& (tstate = PyThreadState_Get())
&& tstate->interp) {
......@@ -409,8 +408,6 @@ local_clear(localobject *self)
PyDict_GetItem(tstate->dict, self->key))
PyDict_DelItem(tstate->dict, self->key);
}
*/
printf("threadmodule.c:413: can't really free up the thread-specific dummy storage\n");
return 0;
}
......
......@@ -36,11 +36,33 @@
namespace pyston {
namespace threading {
#ifdef WITH_THREAD
#include "pythread.h"
static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */
#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
#define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
/* The single PyInterpreterState used by this process'
GILState implementation
*/
// Pyston change:
// static PyInterpreterState *autoInterpreterState = NULL;
// static int autoTLSkey = 0;
#else
#define HEAD_INIT() /* Nothing */
#define HEAD_LOCK() /* Nothing */
#define HEAD_UNLOCK() /* Nothing */
#endif
PyInterpreterState interpreter_state;
std::unordered_set<PerThreadSetBase*> PerThreadSetBase::all_instances;
extern "C" {
__thread PyThreadState cur_thread_state
= { NULL, 0, 1, NULL, NULL, NULL, NULL, 0, NULL }; // not sure if we need to explicitly request zero-initialization
__thread PyThreadState cur_thread_state = { NULL, &interpreter_state, NULL, 0, 1, NULL, NULL, NULL, NULL, 0,
NULL }; // not sure if we need to explicitly request zero-initialization
}
PthreadFastMutex threading_lock;
......@@ -83,8 +105,13 @@ public:
PyThreadState* public_thread_state;
ThreadStateInternal(void* stack_start, pthread_t pthread_id, PyThreadState* public_thread_state)
: saved(false), stack_start(stack_start), pthread_id(pthread_id), public_thread_state(public_thread_state) {}
ThreadStateInternal(void* stack_start, pthread_t pthread_id, PyThreadState* tstate)
: saved(false), stack_start(stack_start), pthread_id(pthread_id), public_thread_state(tstate) {
HEAD_LOCK();
tstate->next = interpreter_state.tstate_head;
interpreter_state.tstate_head = tstate;
HEAD_UNLOCK();
}
void saveCurrent() {
assert(!saved);
......@@ -181,8 +208,46 @@ static void registerThread(bool is_starting_thread) {
printf("child initialized; tid=%ld\n", current_thread);
}
/* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */
static void tstate_delete_common(PyThreadState* tstate) {
PyInterpreterState* interp;
PyThreadState** p;
PyThreadState* prev_p = NULL;
if (tstate == NULL)
Py_FatalError("PyThreadState_Delete: NULL tstate");
interp = tstate->interp;
if (interp == NULL)
Py_FatalError("PyThreadState_Delete: NULL interp");
HEAD_LOCK();
for (p = &interp->tstate_head;; p = &(*p)->next) {
if (*p == NULL)
Py_FatalError("PyThreadState_Delete: invalid tstate");
if (*p == tstate)
break;
/* Sanity check. These states should never happen but if
* they do we must abort. Otherwise we'll end up spinning in
* in a tight loop with the lock held. A similar check is done
* in thread.c find_key(). */
if (*p == prev_p)
Py_FatalError("PyThreadState_Delete: small circular list(!)"
" and tstate not found.");
prev_p = *p;
if ((*p)->next == interp->tstate_head)
Py_FatalError("PyThreadState_Delete: circular list(!) and"
" tstate not found.");
}
*p = tstate->next;
HEAD_UNLOCK();
// Pyston change:
// free(tstate);
}
static void unregisterThread() {
current_internal_thread_state->assertNoGenerators();
tstate_delete_common(current_internal_thread_state->public_thread_state);
PyThreadState_Clear(current_internal_thread_state->public_thread_state);
{
pthread_t current_thread = pthread_self();
LOCK_REGION(&threading_lock);
......@@ -191,6 +256,7 @@ static void unregisterThread() {
if (VERBOSITY() >= 2)
printf("thread tid=%ld exited\n", current_thread);
}
delete current_internal_thread_state;
current_internal_thread_state = 0;
}
......@@ -228,6 +294,7 @@ extern "C" void PyGILState_Release(PyGILState_STATE oldstate) noexcept {
if (cur_thread_state.gilstate_counter == 0) {
assert(oldstate == PyGILState_UNLOCKED);
RELEASE_ASSERT(0, "this is currently untested");
// Pyston change:
unregisterThread();
}
}
......@@ -336,8 +403,11 @@ static long main_thread_id;
void registerMainThread() {
LOCK_REGION(&threading_lock);
HEAD_INIT();
main_thread_id = pthread_self();
assert(!interpreter_state.tstate_head);
assert(!current_internal_thread_state);
current_internal_thread_state = new ThreadStateInternal(find_stack(), pthread_self(), &cur_thread_state);
current_threads[pthread_self()] = current_internal_thread_state;
......@@ -428,6 +498,9 @@ extern "C" void PyEval_ReInitThreads() noexcept {
if (it->second->pthread_id == current_thread) {
++it;
} else {
PyThreadState_Clear(it->second->public_thread_state);
tstate_delete_common(it->second->public_thread_state);
delete it->second;
it = current_threads.erase(it);
}
}
......@@ -615,6 +688,43 @@ extern "C" PyObject* _PyThread_CurrentFrames(void) noexcept {
}
}
extern "C" void PyInterpreterState_Clear(PyInterpreterState* interp) noexcept {
PyThreadState* p;
HEAD_LOCK();
for (p = interp->tstate_head; p != NULL; p = p->next)
PyThreadState_Clear(p);
HEAD_UNLOCK();
// Py_CLEAR(interp->codec_search_path);
// Py_CLEAR(interp->codec_search_cache);
// Py_CLEAR(interp->codec_error_registry);
// Py_CLEAR(interp->modules);
// Py_CLEAR(interp->modules_reloading);
// Py_CLEAR(interp->sysdict);
// Py_CLEAR(interp->builtins);
}
extern "C" void PyThreadState_Clear(PyThreadState* tstate) noexcept {
assert(tstate);
assert(!tstate->trash_delete_later);
// TODO: should we try to clean this up at all?
// CPython decrefs the frame object:
// assert(!tstate->frame_info);
Py_CLEAR(tstate->dict);
Py_CLEAR(tstate->curexc_type);
Py_CLEAR(tstate->curexc_value);
Py_CLEAR(tstate->curexc_traceback);
}
extern "C" PyThreadState* PyInterpreterState_ThreadHead(PyInterpreterState* interp) noexcept {
return interp->tstate_head;
}
extern "C" PyThreadState* PyThreadState_Next(PyThreadState* tstate) noexcept {
return tstate->next;
}
} // namespace threading
} // namespace pyston
......@@ -1305,15 +1305,6 @@ extern "C" BORROWED(PyObject*) PyThreadState_GetDict(void) noexcept {
return dict;
}
extern "C" void PyThreadState_Clear(PyThreadState* tstate) noexcept {
assert(tstate == NULL);
Py_CLEAR(cur_thread_state.dict);
Py_CLEAR(cur_thread_state.curexc_type);
Py_CLEAR(cur_thread_state.curexc_value);
Py_CLEAR(cur_thread_state.curexc_traceback);
}
extern "C" int _PyOS_URandom(void* buffer, Py_ssize_t size) noexcept {
if (size < 0) {
PyErr_Format(PyExc_ValueError, "negative argument not allowed");
......
......@@ -4834,7 +4834,7 @@ extern "C" void Py_Finalize() noexcept {
_PyCodecRegistry_Deinit();
// TODO: we might have to do this in a loop:
_PyUnicode_Fini();
PyThreadState_Clear(NULL);
PyInterpreterState_Clear(PyThreadState_GET()->interp);
for (auto b : late_constants) {
Py_DECREF(b);
......@@ -4865,6 +4865,9 @@ extern "C" void Py_Finalize() noexcept {
}
Py_DECREF(garbage);
auto ts = PyThreadState_GET();
bool other_threads = ((bool)ts->next) || (ts != ts->interp->tstate_head);
#endif
// PyGC_Collect());
......@@ -4927,15 +4930,19 @@ extern "C" void Py_Finalize() noexcept {
if (VERBOSITY())
PRINT_TOTAL_REFS();
if (num_garbage_objects == 0) {
if (num_garbage_objects) {
if (VERBOSITY())
fprintf(stderr, "[%d garbage objects]\n", num_garbage_objects);
} else if (other_threads) {
if (VERBOSITY())
fprintf(stderr, "[Other threads alive, can't free their refs]\n");
} else {
#ifdef Py_TRACE_REFS
if (_Py_RefTotal != 0)
_Py_PrintReferenceAddressesCapped(stderr, 10);
#endif
RELEASE_ASSERT(_Py_RefTotal == 0, "%ld refs remaining!", _Py_RefTotal);
} else if (VERBOSITY()) {
fprintf(stderr, "[%d garbage objects]\n", num_garbage_objects);
}
#endif
}
......
# expected: reffail
from decimal import Decimal
for d in (Decimal("0.5"), Decimal("0"), Decimal(0), Decimal(1.0)):
......
# expected: reffail
import gc
import threading
......
import thread
import _weakref
def f():
global r
l = thread._local()
class C(object):
pass
o = C()
r = _weakref.ref(o)
l.o = o
del o
print type(r())
del l
f()
print type(r())
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment