Commit 0c0a7da1 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Check destructors and weakrefs for extension objects

Add a new allocation type CONSERVATIVE_PYTHON for extensions objects, for which
we don't have heap maps, but should still have Python finalization semantics
(ie destructors and weakrefs).  Previously we were just marking them as
CONSERVATIVE and skipping them during the sweep phase, and not running destructors
or handling weakrefs.

It's a bit tricky to figure out when to mark an allocation as conservative vs
conservative-python; the approach in this commit is to mark all capi-originated
allocations as conservative, and then when we call PyObject_Init or PyObject_InitVar,
switch them from conservative to conservative-python.  I think this is more expensive
but safer than assuming that certain apis will always/never be used as object
memory.

Unfortunately there are quite a few extension classes that request a custom tp_dealloc,
and this commit just keeps the old (bad) behavior of ignoring those.  I tried to verify
as many as I could and they all seem benign, but it will be nice to have
real destructor support :)
parent 393ee878
......@@ -390,6 +390,7 @@ enum class GCKind : uint8_t {
PRECISE = 3,
UNTRACKED = 4,
HIDDEN_CLASS = 5,
CONSERVATIVE_PYTHON = 6,
};
extern "C" void* gc_alloc(size_t nbytes, GCKind kind);
......
......@@ -183,6 +183,16 @@ bool isValidGCObject(void* p) {
return isNonheapRoot(p) || (global_heap.getAllocationFromInteriorPointer(p)->user_data == p);
}
void setIsPythonObject(Box* b) {
auto al = global_heap.getAllocationFromInteriorPointer(b);
assert(al->user_data == (char*)b);
if (al->kind_id == GCKind::CONSERVATIVE) {
al->kind_id = GCKind::CONSERVATIVE_PYTHON;
} else {
assert(al->kind_id == GCKind::PYTHON);
}
}
static std::unordered_set<GCRootHandle*>* getRootHandles() {
static std::unordered_set<GCRootHandle*> root_handles;
return &root_handles;
......@@ -289,7 +299,7 @@ void markPhase() {
GCKind kind_id = al->kind_id;
if (kind_id == GCKind::UNTRACKED) {
continue;
} else if (kind_id == GCKind::CONSERVATIVE) {
} else if (kind_id == GCKind::CONSERVATIVE || kind_id == GCKind::CONSERVATIVE_PYTHON) {
uint32_t bytes = al->kind_data;
if (DEBUG >= 2) {
if (global_heap.small_arena.contains(p)) {
......
......@@ -61,6 +61,7 @@ void enableGC();
// These are mostly for debugging:
bool isValidGCObject(void* p);
bool isNonheapRoot(void* p);
void setIsPythonObject(Box* b);
// Debugging/validation helpers: if a GC should not happen in certain sections (ex during unwinding),
// use these functions to mark that. This is different from disableGC/enableGC, since it causes an
......
......@@ -152,7 +152,7 @@ bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
if (alloc_kind == GCKind::PYTHON) {
if (alloc_kind == GCKind::PYTHON || alloc_kind == GCKind::CONSERVATIVE_PYTHON) {
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
......@@ -170,7 +170,11 @@ bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
}
}
ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
// XXX: we are currently ignoring destructors (tp_dealloc) for extension objects, since we have
// historically done that (whoops) and there are too many to be worth changing for now as long
// as we can get real destructor support soon.
ASSERT(b->cls->tp_dealloc == NULL || alloc_kind == GCKind::CONSERVATIVE_PYTHON, "%s", getTypeName(b));
if (b->cls->simple_destructor)
b->cls->simple_destructor(b);
}
......@@ -208,7 +212,7 @@ struct HeapStatistics {
int num_hcls_by_attrs[HCLS_ATTRS_STAT_MAX + 1];
int num_hcls_by_attrs_exceed;
TypeStats python, conservative, untracked, hcls, precise;
TypeStats python, conservative, conservative_python, untracked, hcls, precise;
TypeStats total;
HeapStatistics(bool collect_cls_stats, bool collect_hcls_stats)
......@@ -247,6 +251,17 @@ void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
} else if (al->kind_id == GCKind::CONSERVATIVE) {
stats->conservative.nallocs++;
stats->conservative.nbytes += nbytes;
} else if (al->kind_id == GCKind::CONSERVATIVE_PYTHON) {
stats->conservative_python.nallocs++;
stats->conservative_python.nbytes += nbytes;
if (stats->collect_cls_stats) {
Box* b = (Box*)al->user_data;
auto& t = stats->by_cls[b->cls];
t.nallocs++;
t.nbytes += nbytes;
}
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++;
stats->untracked.nbytes += nbytes;
......@@ -288,6 +303,7 @@ void Heap::dumpHeapStatistics(int level) {
stats.python.print("python");
stats.conservative.print("conservative");
stats.conservative_python.print("conservative_python");
stats.untracked.print("untracked");
stats.hcls.print("hcls");
stats.precise.print("precise");
......
......@@ -91,8 +91,12 @@ extern "C" PyVarObject* PyObject_InitVar(PyVarObject* op, PyTypeObject* tp, Py_s
RELEASE_ASSERT(op, "");
RELEASE_ASSERT(tp, "");
gc::setIsPythonObject(op);
Py_TYPE(op) = tp;
op->ob_size = size;
return op;
}
......
......@@ -144,8 +144,8 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
extern "C" PyObject* PyType_GenericAlloc(PyTypeObject* type, Py_ssize_t nitems) noexcept {
PyObject* obj;
const size_t size = _PyObject_VAR_SIZE(type, nitems);
/* note that we need to add one, for the sentinel */
const size_t size = _PyObject_VAR_SIZE(type, nitems + 1);
/* note that we need to add one, for the sentinel [CPython comment] */
if (PyType_IS_GC(type))
obj = _PyObject_GC_Malloc(size);
......@@ -2216,6 +2216,8 @@ extern "C" PyObject* PyObject_Init(PyObject* op, PyTypeObject* tp) noexcept {
assert(gc::isValidGCObject(op));
assert(gc::isValidGCObject(tp));
gc::setIsPythonObject(op);
Py_TYPE(op) = tp;
if (PyType_SUPPORTS_WEAKREFS(tp)) {
......
# Make sure we can support weakrefs on extension objects.
# The _sre.SRE_Pattern type is one of the few builtin types that supports weakrefs natively.
import _sre
def f(n):
if n:
return f(n-1)
p = _sre.compile('', 0, [1])
print type(p)
return weakref.ref(p)
import weakref
r = f(20)
import gc
gc.collect()
print r()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment