Commit ce6e1531 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Add as many refcounting annotations as I can find to str/int/dict.cpp

parent 3b9634c9
# Refcounting
Pyston for the most part handles refcounts in the same way that CPython does.
`pyston_dbg` automatically turns on CPython's `Py_REF_DEBUG`, which adds some extra refcount checks. This checks for some simple forms of mistakes such as decref'ing an object to a refcount below 0. We also turn on `WITH_PYMALLOC` and `PYMALLOC_DEBUG`, which means that at as soon as an object gets freed it has all of its memory wiped, so any use-after-free bugs will also get caught.
In addition, on exit we do some more serious checking than CPython does. We go to decent lengths to completely tear down the runtime and then check that there are zero objects alive.
Also of note is that in the debug build, there is the `_Py_RefTotal` global variable that contains the sum of all objects' refcounts. It can be handy to watch this variable (useful gdb command: `display _Py_RefTotal`), and this is how we assert that zero objects are alive at the end of the program.
## Refcounting discipline
Unless specified, arguments are borrowed and return values are owned. This can be specified by saying an argument is `STOLEN(Box*)` or that a return value is `BORROWED(Box*)`. STOLEN and BORROWED are no-ops that are just for documentation (and eventually linting).
Exception safety
Helpers:
AUTO_DECREF, autoDecref, incref
If you get a use-after-free (typically a segfault where we tried to access dead memory that looks like 0xdbdbdbdb): `watch -l op->ob_refcnt`. I find it helpful to open up a text file and write down what each of the ref operations means, and then figure out which ones were extra or which ones were missing.
If you get extra refs at the end, try to bisect down the program until you find the point that the ref was leaked. Unfortunately there is not a good way to enumerate all live objects to figure out what they are.
## Refcounting in the rewriter, baseline jit, and llvm jit
......@@ -655,7 +655,7 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void) PYSTON_NOEXCEPT;
*/
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyAPI_FUNC(BORROWED(PyObject *)) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *) PYSTON_NOEXCEPT;
/* Returns the currently active default encoding.
......
......@@ -253,8 +253,6 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
}
}
assert(!rewrite_args && "check refcounting");
STAT_TIMER(t0, "us_timer_boxedmethoddescriptor__call__", 10);
assert(_self->cls == method_cls);
......@@ -339,7 +337,8 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
if (rewrite_args)
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->rewriter->loadConst(0, Location::forArg(1)));
rewrite_args->rewriter->loadConst(0, Location::forArg(1)))
->setType(RefType::OWNED);
} else if (call_flags == METH_VARARGS) {
{
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_builtins");
......@@ -347,15 +346,16 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
}
if (rewrite_args)
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2);
rewrite_args->arg2)->setType(RefType::OWNED);
} else if (call_flags == (METH_VARARGS | METH_KEYWORDS)) {
{
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_builtins");
rtn = (Box*)((PyCFunctionWithKeywords)self->method->ml_meth)(arg1, arg2, arg3);
}
if (rewrite_args)
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2, rewrite_args->arg3);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2, rewrite_args->arg3)->setType(RefType::OWNED);
} else if (call_flags == METH_O) {
{
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_builtins");
......@@ -363,7 +363,7 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
}
if (rewrite_args)
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2);
rewrite_args->arg2)->setType(RefType::OWNED);
} else if ((call_flags & ~(METH_O3 | METH_D3)) == 0) {
{
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_builtins");
......@@ -371,15 +371,18 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
}
if (rewrite_args) {
if (paramspec.totalReceived() == 2)
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)self->method->ml_meth,
rewrite_args->arg1, rewrite_args->arg2);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2)->setType(RefType::OWNED);
else if (paramspec.totalReceived() == 3)
rewrite_args->out_rtn = rewrite_args->rewriter->call(
true, (void*)self->method->ml_meth, rewrite_args->arg1, rewrite_args->arg2, rewrite_args->arg3);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2, rewrite_args->arg3)->setType(RefType::OWNED);
else if (paramspec.totalReceived() > 3)
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)self->method->ml_meth, rewrite_args->arg1,
rewrite_args->arg2, rewrite_args->arg3, rewrite_args->args);
rewrite_args->arg2, rewrite_args->arg3,
rewrite_args->args)->setType(RefType::OWNED);
else
abort();
}
......
......@@ -58,7 +58,9 @@ Box* dictRepr(BoxedDict* self) {
}
first = false;
BoxedString* k = static_cast<BoxedString*>(repr(p.first));
AUTO_DECREF(k);
BoxedString* v = static_cast<BoxedString*>(repr(p.second));
AUTO_DECREF(v);
chars.insert(chars.end(), k->s().begin(), k->s().end());
chars.push_back(':');
chars.push_back(' ');
......@@ -73,19 +75,12 @@ Box* dictRepr(BoxedDict* self) {
return boxString(llvm::StringRef(&chars[0], chars.size()));
}
Box* dictClear(BoxedDict* self) {
if (!PyDict_Check(self))
raiseExcHelper(TypeError, "descriptor 'clear' requires a 'dict' object but received a '%s'", getTypeName(self));
self->d.clear();
return None;
}
Box* dictCopy(BoxedDict* self) {
if (!PyDict_Check(self))
raiseExcHelper(TypeError, "descriptor 'copy' requires a 'dict' object but received a '%s'", getTypeName(self));
BoxedDict* r = new BoxedDict();
assert(0 && "check refcounting");
r->d = self->d;
return r;
}
......@@ -207,6 +202,14 @@ extern "C" void PyDict_Clear(PyObject* op) noexcept {
static_cast<BoxedDict*>(op)->d.freeAllMemory();
}
Box* dictClear(BoxedDict* self) {
if (!PyDict_Check(self))
raiseExcHelper(TypeError, "descriptor 'clear' requires a 'dict' object but received a '%s'", getTypeName(self));
PyDict_Clear(self);
Py_RETURN_NONE;
}
extern "C" PyObject* PyDict_Copy(PyObject* o) noexcept {
RELEASE_ASSERT(PyDict_Check(o), "");
try {
......@@ -251,6 +254,7 @@ template <enum ExceptionStyle S> Box* dictGetitem(BoxedDict* self, Box* k) noexc
// Special-case defaultdict, assuming that it's the main time we will actually hit this.
// We could just use a single runtime IC here, or have a small cache that maps type->runtimeic.
// Or use a polymorphic runtime ic.
assert(0 && "check refcountinG");
static BoxedClass* defaultdict_cls = NULL;
static CallattrIC defaultdict_ic;
if (defaultdict_cls == NULL && strcmp(self->cls->tp_name, "collections.defaultdict") == 0) {
......@@ -283,7 +287,7 @@ template <enum ExceptionStyle S> Box* dictGetitem(BoxedDict* self, Box* k) noexc
} else
raiseExcHelper(KeyError, k);
}
return it->second;
return incref(it->second);
}
extern "C" PyObject* PyDict_New() noexcept {
......@@ -439,7 +443,9 @@ Box* dictDelitem(BoxedDict* self, Box* k) {
raiseExcHelper(KeyError, k);
}
Box* v = it->second;
self->d.erase(it);
Py_DECREF(v);
Py_RETURN_NONE;
}
......@@ -491,7 +497,7 @@ Box* dictPop(BoxedDict* self, Box* k, Box* d) {
auto it = self->d.find(k);
if (it == self->d.end()) {
if (d)
return d;
return incref(d);
raiseExcHelper(KeyError, k);
}
......@@ -516,6 +522,8 @@ Box* dictPopitem(BoxedDict* self) {
self->d.erase(it);
auto rtn = BoxedTuple::create({ key, value });
Py_DECREF(key);
Py_DECREF(value);
return rtn;
}
......@@ -525,9 +533,9 @@ Box* dictGet(BoxedDict* self, Box* k, Box* d) {
auto it = self->d.find(k);
if (it == self->d.end())
return d;
return incref(d);
return it->second;
return incref(it->second);
}
Box* dictSetdefault(BoxedDict* self, Box* k, Box* v) {
......@@ -538,10 +546,13 @@ Box* dictSetdefault(BoxedDict* self, Box* k, Box* v) {
BoxAndHash k_hash(k);
auto it = self->d.find(k_hash);
if (it != self->d.end())
return it->second;
return incref(it->second);
Py_INCREF(k);
Py_INCREF(v);
self->d.insert(std::make_pair(k_hash, v));
return v;
return incref(v);
}
Box* dictContains(BoxedDict* self, Box* k) {
......@@ -554,7 +565,6 @@ Box* dictContains(BoxedDict* self, Box* k) {
/* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
extern "C" int PyDict_Contains(PyObject* op, PyObject* key) noexcept {
try {
if (op->cls == attrwrapper_cls) {
if (key->cls == str_cls) {
......@@ -566,7 +576,7 @@ extern "C" int PyDict_Contains(PyObject* op, PyObject* key) noexcept {
Box* rtn = PyObject_CallMethod(op, "__contains__", "O", key);
if (!rtn)
return -1;
return rtn == True;
return autoDecref(rtn) == True;
}
BoxedDict* mp = (BoxedDict*)op;
......@@ -587,6 +597,8 @@ Box* dictFromkeys(Box* cls, Box* iterable, Box* default_value) {
auto rtn = new BoxedDict();
if (PyAnySet_Check(iterable)) {
for (auto&& elt : ((BoxedSet*)iterable)->s) {
Py_INCREF(elt.value);
Py_INCREF(default_value);
rtn->d.insert(std::make_pair(elt, default_value));
}
} else {
......@@ -603,8 +615,12 @@ Box* dictEq(BoxedDict* self, Box* _rhs) {
raiseExcHelper(TypeError, "descriptor '__eq__' requires a 'dict' object but received a '%s'",
getTypeName(self));
if (_rhs->cls == attrwrapper_cls)
if (_rhs->cls == attrwrapper_cls) {
_rhs = attrwrapperToDict(_rhs);
} else {
Py_INCREF(_rhs);
}
AUTO_DECREF(_rhs);
if (!PyDict_Check(_rhs))
return incref(NotImplemented);
......@@ -663,6 +679,7 @@ void dictMerge(BoxedDict* self, Box* other) {
keys = callattr(other, keys_str, callattr_flags, NULL, NULL, NULL, NULL, NULL);
}
assert(keys);
AUTO_DECREF(keys);
for (Box* k : keys->pyElements()) {
self->d[k] = getitemInternal<CXX>(other, k);
......@@ -682,14 +699,16 @@ void dictMergeFromSeq2(BoxedDict* self, Box* other) {
raiseExcHelper(ValueError, "dictionary update sequence element #%d has length %ld; 2 is required", idx,
list->size);
self->d[list->elts->elts[0]] = list->elts->elts[1];
assert(0 && "bad refcounting if key already exists");
self->d[incref(list->elts->elts[0])] = incref(list->elts->elts[1]);
} else if (element->cls == tuple_cls) {
BoxedTuple* tuple = static_cast<BoxedTuple*>(element);
if (tuple->size() != 2)
raiseExcHelper(ValueError, "dictionary update sequence element #%d has length %ld; 2 is required", idx,
tuple->size());
self->d[tuple->elts[0]] = tuple->elts[1];
assert(0 && "bad refcounting if key already exists");
self->d[incref(tuple->elts[0])] = incref(tuple->elts[1]);
} else
raiseExcHelper(TypeError, "cannot convert dictionary update sequence element #%d to a sequence", idx);
......@@ -733,7 +752,7 @@ Box* dictUpdate(BoxedDict* self, BoxedTuple* args, BoxedDict* kwargs) {
if (kwargs && kwargs->d.size())
dictMerge(self, kwargs);
return None;
Py_RETURN_NONE;
}
extern "C" Box* dictInit(BoxedDict* self, BoxedTuple* args, BoxedDict* kwargs) {
......@@ -750,17 +769,22 @@ extern "C" Box* dictInit(BoxedDict* self, BoxedTuple* args, BoxedDict* kwargs) {
// handle keyword arguments by merging (possibly over positional entries per CPy)
assert(kwargs->cls == dict_cls);
for (const auto& p : kwargs->d)
for (const auto& p : kwargs->d) {
assert(0 && "bad refcounting if key already exists");
Py_INCREF(p.first.value);
Py_INCREF(p.second);
self->d[p.first] = p.second;
}
}
return None;
Py_RETURN_NONE;
}
static int dict_init(PyObject* self, PyObject* args, PyObject* kwds) noexcept {
assert(PyDict_Check(self));
try {
dictInit(static_cast<BoxedDict*>(self), static_cast<BoxedTuple*>(args), static_cast<BoxedDict*>(kwds));
Box* r = dictInit(static_cast<BoxedDict*>(self), static_cast<BoxedTuple*>(args), static_cast<BoxedDict*>(kwds));
Py_DECREF(r);
} catch (ExcInfo e) {
setCAPIException(e);
return -1;
......
......@@ -69,7 +69,7 @@ typedef struct _intblock PyIntBlock;
static PyIntBlock *block_list = NULL;
PyIntObject *BoxedInt::free_list = NULL;
PyIntObject* BoxedInt::fill_free_list(void) {
PyIntObject* BoxedInt::fill_free_list(void) noexcept {
PyIntObject* p, *q;
/* Python's object allocator isn't appropriate for large blocks. */
p = (PyIntObject*)PyMem_MALLOC(sizeof(PyIntBlock));
......@@ -87,7 +87,7 @@ PyIntObject* BoxedInt::fill_free_list(void) {
return p + N_INTOBJECTS - 1;
}
void BoxedInt::tp_dealloc(Box* v) {
void BoxedInt::tp_dealloc(Box* v) noexcept {
#ifdef DISABLE_INT_FREELIST
v->cls->tp_free(v);
#else
......@@ -99,7 +99,7 @@ void BoxedInt::tp_dealloc(Box* v) {
#endif
}
void BoxedInt::tp_free(void* b) {
void BoxedInt::tp_free(void* b) noexcept {
#ifdef DISABLE_INT_FREELIST
assert(0);
#endif
......@@ -379,20 +379,18 @@ bool __builtin_smull_overflow(i64 lhs, i64 rhs, i64* result) {
#endif
// Could add this to the others, but the inliner should be smart enough
// that this isn't needed:
extern "C" Box* add_i64_i64(i64 lhs, i64 rhs) {
i64 result;
if (!__builtin_saddl_overflow(lhs, rhs, &result))
return boxInt(result);
return longAdd(boxLong(lhs), boxLong(rhs));
return longAdd(autoDecref(boxLong(lhs)), autoDecref(boxLong(rhs)));
}
extern "C" Box* sub_i64_i64(i64 lhs, i64 rhs) {
i64 result;
if (!__builtin_ssubl_overflow(lhs, rhs, &result))
return boxInt(result);
return longSub(boxLong(lhs), boxLong(rhs));
return longSub(autoDecref(boxLong(lhs)), autoDecref(boxLong(rhs)));
}
extern "C" Box* div_i64_i64(i64 lhs, i64 rhs) {
......@@ -405,7 +403,7 @@ extern "C" Box* div_i64_i64(i64 lhs, i64 rhs) {
static_assert(PYSTON_INT_MIN == -PYSTON_INT_MAX - 1, "");
if (lhs == PYSTON_INT_MIN && rhs == -1) {
return longDiv(boxLong(lhs), boxLong(rhs));
return longDiv(autoDecref(boxLong(lhs)), autoDecref(boxLong(rhs)));
}
#endif
......@@ -457,14 +455,14 @@ extern "C" Box* pow_i64_i64(i64 lhs, i64 rhs, Box* mod) {
return boxFloat(pow_float_float(lhs, rhs));
// let longPow do the checks.
return longPow(boxLong(lhs), boxLong(rhs), mod);
return longPow(autoDecref(boxLong(lhs)), autoDecref(boxLong(rhs)), mod);
}
extern "C" Box* mul_i64_i64(i64 lhs, i64 rhs) {
i64 result;
if (!__builtin_smull_overflow(lhs, rhs, &result))
return boxInt(result);
return longMul(boxLong(lhs), boxLong(rhs));
return longMul(autoDecref(boxLong(lhs)), autoDecref(boxLong(rhs)));
}
extern "C" i1 eq_i64_i64(i64 lhs, i64 rhs) {
......@@ -676,7 +674,7 @@ extern "C" Box* intLShiftInt(BoxedInt* lhs, BoxedInt* rhs) {
if ((res >> rhs->n) == lhs->n)
return boxInt(lhs->n << rhs->n);
}
return longLshift(boxLong(lhs->n), rhs);
return longLshift(autoDecref(boxLong(lhs->n)), rhs);
}
extern "C" Box* intLShift(BoxedInt* lhs, Box* rhs) {
......@@ -685,7 +683,7 @@ extern "C" Box* intLShift(BoxedInt* lhs, Box* rhs) {
getTypeName(lhs));
if (rhs->cls == long_cls)
return longLshift(boxLong(lhs->n), rhs);
return longLshift(autoDecref(boxLong(lhs->n)), rhs);
if (!PyInt_Check(rhs)) {
return incref(NotImplemented);
......@@ -719,15 +717,19 @@ extern "C" Box* intDivmod(BoxedInt* lhs, Box* rhs) {
Box* divResult = intDiv(lhs, rhs);
if (divResult == NotImplemented) {
return incref(NotImplemented);
return divResult;
}
AUTO_DECREF(divResult);
Box* modResult = intMod(lhs, rhs);
if (modResult == NotImplemented) {
return incref(NotImplemented);
return modResult;
}
AUTO_DECREF(modResult);
Box* arg[2] = { divResult, modResult };
return createTuple(2, arg);
}
......@@ -774,7 +776,7 @@ extern "C" Box* intPowLong(BoxedInt* lhs, BoxedLong* rhs, Box* mod) {
assert(PyInt_Check(lhs));
assert(PyLong_Check(rhs));
BoxedLong* lhs_long = boxLong(lhs->n);
return longPow(lhs_long, rhs, mod);
return longPow(autoDecref(lhs_long), rhs, mod);
}
extern "C" Box* intPowFloat(BoxedInt* lhs, BoxedFloat* rhs, Box* mod) {
......@@ -814,7 +816,7 @@ extern "C" Box* intPow(BoxedInt* lhs, Box* rhs, Box* mod) {
Box* rtn = pow_i64_i64(lhs->n, rhs_int->n, mod);
if (PyLong_Check(rtn))
return longInt(rtn);
return longInt(autoDecref(rtn));
return rtn;
}
......@@ -834,7 +836,7 @@ extern "C" Box* intRShift(BoxedInt* lhs, Box* rhs) {
getTypeName(lhs));
if (rhs->cls == long_cls)
return longRshift(boxLong(lhs->n), rhs);
return longRshift(autoDecref(boxLong(lhs->n)), rhs);
if (!PyInt_Check(rhs)) {
return incref(NotImplemented);
......@@ -897,7 +899,7 @@ extern "C" Box* intNeg(BoxedInt* v) {
static_assert(PYSTON_INT_MIN == -PYSTON_INT_MAX - 1, "");
if (v->n == PYSTON_INT_MIN) {
return longNeg(boxLong(v->n));
return longNeg(autoDecref(boxLong(v->n)));
}
#endif
......@@ -1041,6 +1043,7 @@ template <ExceptionStyle S> static Box* _intNew(Box* val, Box* _base) noexcept(S
if (PyString_Check(val)) {
BoxedString* s = static_cast<BoxedString*>(val);
AUTO_DECREF(s);
if (s->size() != strlen(s->data())) {
Box* srepr = PyObject_Repr(val);
......@@ -1103,6 +1106,8 @@ template <ExceptionStyle S> Box* intNew(Box* _cls, Box* val, Box* base) noexcept
return NULL;
}
AUTO_DECREF(n);
if (n->cls == long_cls) {
if (cls == int_cls)
return n;
......
......@@ -2214,7 +2214,6 @@ Box* getattrInternalGeneric(Box* obj, BoxedString* attr, GetattrRewriteArgs* rew
// Check if __get__ exists
if (descr_get) {
if (rewrite_args) {
assert(0 && "check refcounting");
RewriterVar* r_descr_cls = r_descr->getAttr(offsetof(Box, cls), Location::any());
GetattrRewriteArgs grewrite_args(rewrite_args->rewriter, r_descr_cls, Location::any());
_get_ = typeLookup<rewritable>(descr->cls, get_str, &grewrite_args);
......@@ -2239,7 +2238,6 @@ Box* getattrInternalGeneric(Box* obj, BoxedString* attr, GetattrRewriteArgs* rew
// Check if __set__ exists
Box* _set_ = NULL;
if (rewrite_args) {
assert(0 && "check refcounting");
RewriterVar* r_descr_cls = r_descr->getAttr(offsetof(Box, cls), Location::any());
GetattrRewriteArgs grewrite_args(rewrite_args->rewriter, r_descr_cls, Location::any());
_set_ = typeLookup<REWRITABLE>(descr->cls, set_str, &grewrite_args);
......
This diff is collapsed.
......@@ -4371,7 +4371,7 @@ extern "C" void Py_Finalize() noexcept {
call_sys_exitfunc();
// initialized = 0;
// XXX
#ifdef Py_REF_DEBUG
PyGC_Collect(); // To make sure it creates any static objects
IN_SHUTDOWN = true;
PyOS_FiniInterrupts();
......@@ -4395,6 +4395,7 @@ extern "C" void Py_Finalize() noexcept {
// May need to run multiple collections to collect everything:
while (PyGC_Collect())
;
#endif
// PyGC_Collect());
......
......@@ -430,7 +430,7 @@ extern "C" int PyInt_ClearFreeList() noexcept;
class BoxedInt : public Box {
private:
static PyIntObject *free_list;
static PyIntObject * fill_free_list();
static PyIntObject * fill_free_list() noexcept;
public:
int64_t n;
......@@ -457,8 +457,8 @@ public:
#endif
}
static void tp_dealloc(Box* b);
static void tp_free(void* b);
static void tp_dealloc(Box* b) noexcept;
static void tp_free(void* b) noexcept;
friend int PyInt_ClearFreeList() noexcept;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment