Commit 4a5d4a76 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Fix some dict behavior

Large dict literals would crash the bjit since it would try to allocate
scratch space to store all of the keys+values on the stack.

This is also, in a small way, detectable to the user, since we would evaluate
all subexpressions before doing any dict operations (which could trigger
__hash__ and __eq__ calls).  I started working on this, but it looks
like it's not just an issue in the JIT tiers, but it's also encoded in
the CFG phase as well.  Punting on that for now since it's not a refcounting issue.
parent 6cdbb2a3
......@@ -1583,25 +1583,19 @@ Value ASTInterpreter::visit_lambda(AST_Lambda* node) {
Value ASTInterpreter::visit_dict(AST_Dict* node) {
RELEASE_ASSERT(node->keys.size() == node->values.size(), "not implemented");
llvm::SmallVector<RewriterVar*, 8> keys;
llvm::SmallVector<RewriterVar*, 8> values;
BoxedDict* dict = new BoxedDict();
RewriterVar* r_dict = jit ? jit->emitCreateDict() : NULL;
for (size_t i = 0; i < node->keys.size(); ++i) {
Value v = visit_expr(node->values[i]);
AUTO_DECREF(v.o);
Value k = visit_expr(node->keys[i]);
AUTO_DECREF(k.o);
int ret = PyDict_SetItem(dict, k.o, v.o);
if (ret == -1)
throwCAPIException();
values.push_back(v);
keys.push_back(k);
dictSetInternal(dict, k.o, v.o);
if (jit) {
jit->emitDictSet(r_dict, k, v);
}
}
return Value(dict, jit ? jit->emitCreateDict(keys, values) : NULL);
return Value(dict, r_dict);
}
Value ASTInterpreter::visit_set(AST_Set* node) {
......
......@@ -249,25 +249,14 @@ RewriterVar* JitFragmentWriter::emitCompare(AST_expr* node, RewriterVar* lhs, Re
return emitPPCall((void*)compare, { lhs, rhs, imm(op_type) }, 2, 240, node).first->setType(RefType::OWNED);
}
RewriterVar* JitFragmentWriter::emitCreateDict(const llvm::ArrayRef<RewriterVar*> keys,
const llvm::ArrayRef<RewriterVar*> values) {
assert(keys.size() == values.size());
if (keys.empty())
return call(false, (void*)createDict)->setType(RefType::OWNED);
RewriterVar::SmallVector additional_uses;
additional_uses.insert(additional_uses.end(), keys.begin(), keys.end());
additional_uses.insert(additional_uses.end(), values.begin(), values.end());
auto rtn = emitCallWithAllocatedArgs((void*)createDictHelper,
{ imm(keys.size()), allocArgs(keys, RewriterVar::SetattrType::REFUSED),
allocArgs(values, RewriterVar::SetattrType::REFUSED) },
additional_uses)->setType(RefType::OWNED);
for (RewriterVar* k : keys) {
k->refConsumed();
}
for (RewriterVar* v : values) {
v->refConsumed();
}
return rtn;
RewriterVar* JitFragmentWriter::emitCreateDict() {
return call(false, (void*)createDict)->setType(RefType::OWNED);
}
void JitFragmentWriter::emitDictSet(RewriterVar* dict, RewriterVar* k, RewriterVar* v) {
call(true, (void*)dictSetInternal, dict, k, v);
k->refConsumed();
v->refConsumed();
}
// TODO: merge this function's functionality with refUsed
......
......@@ -220,7 +220,8 @@ public:
RewriterVar* emitCallattr(AST_expr* node, RewriterVar* obj, BoxedString* attr, CallattrFlags flags,
const llvm::ArrayRef<RewriterVar*> args, std::vector<BoxedString*>* keyword_names);
RewriterVar* emitCompare(AST_expr* node, RewriterVar* lhs, RewriterVar* rhs, int op_type);
RewriterVar* emitCreateDict(const llvm::ArrayRef<RewriterVar*> keys, const llvm::ArrayRef<RewriterVar*> values);
RewriterVar* emitCreateDict();
void emitDictSet(RewriterVar* dict, RewriterVar* k, RewriterVar* v);
RewriterVar* emitCreateList(const llvm::ArrayRef<STOLEN(RewriterVar*)> values);
RewriterVar* emitCreateSet(const llvm::ArrayRef<RewriterVar*> values);
RewriterVar* emitCreateSlice(RewriterVar* start, RewriterVar* stop, RewriterVar* step);
......
......@@ -43,7 +43,7 @@ struct GlobalFuncs {
llvm::Value* unpackIntoArray, *raiseAttributeError, *raiseAttributeErrorStr, *raiseAttributeErrorCapi,
*raiseAttributeErrorStrCapi, *raiseNotIterableError, *raiseIndexErrorStr, *raiseIndexErrorStrCapi,
*assertNameDefined, *assertFail, *assertFailDerefNameDefined, *printExprHelper, *printHelper;
llvm::Value* listAppendInternal;
llvm::Value* listAppendInternal, *dictSetInternal;
ExceptionSwitchable<llvm::Value*> runtimeCall0, runtimeCall1, runtimeCall2, runtimeCall3, runtimeCall, runtimeCallN;
ExceptionSwitchable<llvm::Value*> callattr0, callattr1, callattr2, callattr3, callattr, callattrN;
llvm::Value* reoptCompiledFunc, *compilePartialFunc;
......
......@@ -52,6 +52,14 @@ static void _dictSet(BoxedDict* self, BoxAndHash k, Box* v) {
_dictSetStolen(self, k, incref(v));
}
extern "C" void dictSetInternal(Box* self, STOLEN(Box*) k, STOLEN(Box*) v) {
assert(self->cls == dict_cls);
AUTO_DECREF(v);
AUTO_DECREF(k);
_dictSet(static_cast<BoxedDict*>(self), k, v);
}
Box* dictRepr(BoxedDict* self) {
std::vector<char> chars;
int status = Py_ReprEnter((PyObject*)self);
......
......@@ -121,6 +121,7 @@ void force() {
FORCE(printHelper);
FORCE(listAppendInternal);
FORCE(dictSetInternal);
FORCE(runtimeCall);
FORCE(runtimeCallCapi);
......
......@@ -94,6 +94,7 @@ extern "C" Box** unpackIntoArray(Box* obj, int64_t expected_size, Box** out_keep
extern "C" void assertNameDefined(bool b, const char* name, BoxedClass* exc_cls, bool local_var_msg);
extern "C" void assertFailDerefNameDefined(const char* name);
extern "C" void assertFail(Box* assertion_type, Box* msg);
extern "C" void dictSetInternal(Box* d, STOLEN(Box*) k, STOLEN(Box* v));
inline bool isSubclass(BoxedClass* child, BoxedClass* parent) {
return child == parent || PyType_IsSubtype(child, parent);
......
# Test to make sure we can handle large dict literals. We used to have issues since we would try to allocate
# stack space per item, and would eventually run out.
# from pydoc.py:
def f():
topics = {
'TYPES': ('types', 'STRINGS UNICODE NUMBERS SEQUENCES MAPPINGS '
'FUNCTIONS CLASSES MODULES FILES inspect'),
'STRINGS': ('strings', 'str UNICODE SEQUENCES STRINGMETHODS FORMATTING '
'TYPES'),
'STRINGMETHODS': ('string-methods', 'STRINGS FORMATTING'),
'FORMATTING': ('formatstrings', 'OPERATORS'),
'UNICODE': ('strings', 'encodings unicode SEQUENCES STRINGMETHODS '
'FORMATTING TYPES'),
'NUMBERS': ('numbers', 'INTEGER FLOAT COMPLEX TYPES'),
'INTEGER': ('integers', 'int range'),
'FLOAT': ('floating', 'float math'),
'COMPLEX': ('imaginary', 'complex cmath'),
'SEQUENCES': ('typesseq', 'STRINGMETHODS FORMATTING xrange LISTS'),
'MAPPINGS': 'DICTIONARIES',
'FUNCTIONS': ('typesfunctions', 'def TYPES'),
'METHODS': ('typesmethods', 'class def CLASSES TYPES'),
'CODEOBJECTS': ('bltin-code-objects', 'compile FUNCTIONS TYPES'),
'TYPEOBJECTS': ('bltin-type-objects', 'types TYPES'),
'FRAMEOBJECTS': 'TYPES',
'TRACEBACKS': 'TYPES',
'NONE': ('bltin-null-object', ''),
'ELLIPSIS': ('bltin-ellipsis-object', 'SLICINGS'),
'FILES': ('bltin-file-objects', ''),
'SPECIALATTRIBUTES': ('specialattrs', ''),
'CLASSES': ('types', 'class SPECIALMETHODS PRIVATENAMES'),
'MODULES': ('typesmodules', 'import'),
'PACKAGES': 'import',
'EXPRESSIONS': ('operator-summary', 'lambda or and not in is BOOLEAN '
'COMPARISON BITWISE SHIFTING BINARY FORMATTING POWER '
'UNARY ATTRIBUTES SUBSCRIPTS SLICINGS CALLS TUPLES '
'LISTS DICTIONARIES BACKQUOTES'),
'OPERATORS': 'EXPRESSIONS',
'PRECEDENCE': 'EXPRESSIONS',
'OBJECTS': ('objects', 'TYPES'),
'SPECIALMETHODS': ('specialnames', 'BASICMETHODS ATTRIBUTEMETHODS '
'CALLABLEMETHODS SEQUENCEMETHODS1 MAPPINGMETHODS '
'SEQUENCEMETHODS2 NUMBERMETHODS CLASSES'),
'BASICMETHODS': ('customization', 'cmp hash repr str SPECIALMETHODS'),
'ATTRIBUTEMETHODS': ('attribute-access', 'ATTRIBUTES SPECIALMETHODS'),
'CALLABLEMETHODS': ('callable-types', 'CALLS SPECIALMETHODS'),
'SEQUENCEMETHODS1': ('sequence-types', 'SEQUENCES SEQUENCEMETHODS2 '
'SPECIALMETHODS'),
'SEQUENCEMETHODS2': ('sequence-methods', 'SEQUENCES SEQUENCEMETHODS1 '
'SPECIALMETHODS'),
'MAPPINGMETHODS': ('sequence-types', 'MAPPINGS SPECIALMETHODS'),
'NUMBERMETHODS': ('numeric-types', 'NUMBERS AUGMENTEDASSIGNMENT '
'SPECIALMETHODS'),
'EXECUTION': ('execmodel', 'NAMESPACES DYNAMICFEATURES EXCEPTIONS'),
'NAMESPACES': ('naming', 'global ASSIGNMENT DELETION DYNAMICFEATURES'),
'DYNAMICFEATURES': ('dynamic-features', ''),
'SCOPING': 'NAMESPACES',
'FRAMES': 'NAMESPACES',
'EXCEPTIONS': ('exceptions', 'try except finally raise'),
'COERCIONS': ('coercion-rules','CONVERSIONS'),
'CONVERSIONS': ('conversions', 'COERCIONS'),
'IDENTIFIERS': ('identifiers', 'keywords SPECIALIDENTIFIERS'),
'SPECIALIDENTIFIERS': ('id-classes', ''),
'PRIVATENAMES': ('atom-identifiers', ''),
'LITERALS': ('atom-literals', 'STRINGS BACKQUOTES NUMBERS '
'TUPLELITERALS LISTLITERALS DICTIONARYLITERALS'),
'TUPLES': 'SEQUENCES',
'TUPLELITERALS': ('exprlists', 'TUPLES LITERALS'),
'LISTS': ('typesseq-mutable', 'LISTLITERALS'),
'LISTLITERALS': ('lists', 'LISTS LITERALS'),
'DICTIONARIES': ('typesmapping', 'DICTIONARYLITERALS'),
'DICTIONARYLITERALS': ('dict', 'DICTIONARIES LITERALS'),
'BACKQUOTES': ('string-conversions', 'repr str STRINGS LITERALS'),
'ATTRIBUTES': ('attribute-references', 'getattr hasattr setattr '
'ATTRIBUTEMETHODS'),
'SUBSCRIPTS': ('subscriptions', 'SEQUENCEMETHODS1'),
'SLICINGS': ('slicings', 'SEQUENCEMETHODS2'),
'CALLS': ('calls', 'EXPRESSIONS'),
'POWER': ('power', 'EXPRESSIONS'),
'UNARY': ('unary', 'EXPRESSIONS'),
'BINARY': ('binary', 'EXPRESSIONS'),
'SHIFTING': ('shifting', 'EXPRESSIONS'),
'BITWISE': ('bitwise', 'EXPRESSIONS'),
'COMPARISON': ('comparisons', 'EXPRESSIONS BASICMETHODS'),
'BOOLEAN': ('booleans', 'EXPRESSIONS TRUTHVALUE'),
'ASSERTION': 'assert',
'ASSIGNMENT': ('assignment', 'AUGMENTEDASSIGNMENT'),
'AUGMENTEDASSIGNMENT': ('augassign', 'NUMBERMETHODS'),
'DELETION': 'del',
'PRINTING': 'print',
'RETURNING': 'return',
'IMPORTING': 'import',
'CONDITIONAL': 'if',
'LOOPING': ('compound', 'for while break continue'),
'TRUTHVALUE': ('truth', 'if while and or not BASICMETHODS'),
'DEBUGGING': ('debugger', 'pdb'),
'CONTEXTMANAGERS': ('context-managers', 'with'),
}
for i in xrange(100):
f()
# expected: fail
# - the CFG expands to the wrong code here
class H(object):
def __init__(self, n):
self.n = n
print "Creating", repr(n)
def __hash__(self):
print "hashing", repr(self.n)
return self.n
def __eq__(self, rhs):
print "eq"
return self is rhs
print "testing sets"
# Set literals evaluate all subexpressions, then do hash operatons:
def f():
{H(1), H(2), H(1L)}
for i in xrange(100):
f()
print "testing dicts"
# Dict literals evaluate subexpressions one by one, then do hash operations
def f2():
{H(1): H(4), H(2): H(5), H(1L): H(6)}
for i in xrange(100):
f2()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment