Commit 3796b9f7 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #829 from kmod/perf4

Save the key hash in dictionaries
parents 1051326f 74a42c5e
...@@ -1837,7 +1837,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo ...@@ -1837,7 +1837,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(clfunc->source->scoping->areGlobalsFromModule()); assert(clfunc->source->scoping->areGlobalsFromModule());
interpreter.setGlobals(source_info->parent_module); interpreter.setGlobals(source_info->parent_module);
for (const auto& p : frame_state.locals->d) { for (const auto& p : *frame_state.locals) {
assert(p.first->cls == str_cls); assert(p.first->cls == str_cls);
auto name = static_cast<BoxedString*>(p.first)->s(); auto name = static_cast<BoxedString*>(p.first)->s();
if (name == PASSED_GENERATOR_NAME) { if (name == PASSED_GENERATOR_NAME) {
......
...@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized* // TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have // But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict // access to and delete them from the locals dict
for (const auto& p : d->d) { for (const auto& p : *d) {
Box* varname = p.first; Box* varname = p.first;
Box* value = p.second; Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value); setitem(frame_info->boxedLocals, varname, value);
......
...@@ -561,7 +561,7 @@ void setupSys() { ...@@ -561,7 +561,7 @@ void setupSys() {
void setupSysEnd() { void setupSysEnd() {
std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names; std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names;
for (auto& p : sys_modules_dict->d) { for (const auto& p : *sys_modules_dict) {
builtin_module_names.push_back(p.first); builtin_module_names.push_back(p.first);
} }
......
...@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) { ...@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) {
made->giveAttr("__module__", boxString(getCurrentModule()->name())); made->giveAttr("__module__", boxString(getCurrentModule()->name()));
made->giveAttr("__doc__", None); made->giveAttr("__doc__", None);
for (auto& p : dict->d) { for (const auto& p : *dict) {
RELEASE_ASSERT(p.first->cls == str_cls, ""); RELEASE_ASSERT(p.first->cls == str_cls, "");
BoxedString* s = (BoxedString*)p.first; BoxedString* s = (BoxedString*)p.first;
internStringMortalInplace(s); internStringMortalInplace(s);
......
...@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) { ...@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) {
try { try {
chars.push_back('{'); chars.push_back('{');
bool first = true; bool first = true;
for (const auto& p : self->d) { for (const auto& p : *self) {
if (!first) { if (!first) {
chars.push_back(','); chars.push_back(',');
chars.push_back(' '); chars.push_back(' ');
...@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) { ...@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
BoxedTuple* t = BoxedTuple::create({ p.first, p.second }); BoxedTuple* t = BoxedTuple::create({ p.first, p.second });
listAppendInternal(rtn, t); listAppendInternal(rtn, t);
} }
...@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) { ...@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) {
Box* dictValues(BoxedDict* self) { Box* dictValues(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
listAppendInternal(rtn, p.second); listAppendInternal(rtn, p.second);
} }
return rtn; return rtn;
...@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) { ...@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
listAppendInternal(rtn, p.first); listAppendInternal(rtn, p.first);
} }
return rtn; return rtn;
...@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb ...@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
return 0; return 0;
} }
*pkey = (*it)->first; *pkey = (*it)->first.value;
*pvalue = (*it)->second; *pvalue = (*it)->second;
++(*it); ++(*it);
...@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) { ...@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) {
raiseExcHelper(KeyError, "popitem(): dictionary is empty"); raiseExcHelper(KeyError, "popitem(): dictionary is empty");
} }
Box* key = it->first; Box* key = it->first.value;
Box* value = it->second; Box* value = it->second;
self->d.erase(it); self->d.erase(it);
...@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) { ...@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) {
if (self->d.size() != rhs->d.size()) if (self->d.size() != rhs->d.size())
return False; return False;
for (const auto& p : self->d) { for (const auto& p : *self) {
auto it = rhs->d.find(p.first); auto it = rhs->d.find(p.first);
if (it == rhs->d.end()) if (it == rhs->d.end())
return False; return False;
...@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) { ...@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) {
BoxedDict* d = (BoxedDict*)b; BoxedDict* d = (BoxedDict*)b;
for (auto p : d->d) { for (auto p : *d) {
v->visit(p.first); v->visit(p.first);
v->visit(p.second); v->visit(p.second);
} }
......
...@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) { ...@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) {
Box* rtn = nullptr; Box* rtn = nullptr;
if (self->type == BoxedDictIterator::KeyIterator) { if (self->type == BoxedDictIterator::KeyIterator) {
rtn = self->it->first; rtn = self->it->first.value;
} else if (self->type == BoxedDictIterator::ValueIterator) { } else if (self->type == BoxedDictIterator::ValueIterator) {
rtn = self->it->second; rtn = self->it->second;
} else if (self->type == BoxedDictIterator::ItemIterator) { } else if (self->type == BoxedDictIterator::ItemIterator) {
rtn = BoxedTuple::create({ self->it->first, self->it->second }); rtn = BoxedTuple::create({ self->it->first.value, self->it->second });
} }
++self->it; ++self->it;
return rtn; return rtn;
......
...@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name ...@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name
assert(PyDict_Check(kwargs)); assert(PyDict_Check(kwargs));
BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs); BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs);
for (auto& p : d_kwargs->d) { for (const auto& p : *d_kwargs) {
auto k = coerceUnicodeToStr(p.first); auto k = coerceUnicodeToStr(p.first);
if (k->cls != str_cls) if (k->cls != str_cls)
...@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD ...@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD
made->setattr(dict_str, dict_descr, NULL); made->setattr(dict_str, dict_descr, NULL);
} }
for (const auto& p : attr_dict->d) { for (const auto& p : *attr_dict) {
auto k = coerceUnicodeToStr(p.first); auto k = coerceUnicodeToStr(p.first);
RELEASE_ASSERT(k->cls == str_cls, ""); RELEASE_ASSERT(k->cls == str_cls, "");
......
...@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) { ...@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
} else { } else {
assert(kwargs_sz); assert(kwargs_sz);
auto const seq = *(kwargs->d.begin()); auto const seq = *(kwargs->d.begin());
auto const kw = static_cast<BoxedString*>(seq.first); auto const kw = static_cast<BoxedString*>(seq.first.value);
if (kw->s() == "sequence") if (kw->s() == "sequence")
elements = seq.second; elements = seq.second;
......
...@@ -2380,7 +2380,7 @@ public: ...@@ -2380,7 +2380,7 @@ public:
assert(PyDict_Check(_container)); assert(PyDict_Check(_container));
BoxedDict* container = static_cast<BoxedDict*>(_container); BoxedDict* container = static_cast<BoxedDict*>(_container);
for (const auto& p : container->d) { for (const auto& p : *container) {
AttrWrapper::setitem(self, p.first, p.second); AttrWrapper::setitem(self, p.first, p.second);
} }
} }
......
...@@ -659,22 +659,36 @@ struct PyLt { ...@@ -659,22 +659,36 @@ struct PyLt {
bool operator()(Box*, Box*) const; bool operator()(Box*, Box*) const;
}; };
struct PythonLevelEq { class BoxedDict : public Box {
static bool isEqual(Box* lhs, Box* rhs) { public:
if (lhs == rhs) // llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
};
struct Comparisons {
static bool isEqual(BoxAndHash lhs, BoxAndHash rhs) {
if (lhs.value == rhs.value)
return true; return true;
if (rhs == getEmptyKey() || rhs == getTombstoneKey()) if (rhs.value == (Box*)-1 || rhs.value == (Box*)-2)
return false; return false;
return PyEq()(lhs, rhs); if (lhs.hash != rhs.hash)
return false;
return PyEq()(lhs.value, rhs.value);
} }
static Box* getEmptyKey() { return (Box*)-1; } static BoxAndHash getEmptyKey() { return BoxAndHash((Box*)-1, 0); }
static Box* getTombstoneKey() { return (Box*)-2; } static BoxAndHash getTombstoneKey() { return BoxAndHash((Box*)-2, 0); }
static unsigned getHashValue(Box* val) { return PyHasher()(val); } static unsigned getHashValue(BoxAndHash val) { return val.hash; }
}; };
class BoxedDict : public Box { typedef llvm::DenseMap<BoxAndHash, Box*, Comparisons> DictMap;
public:
typedef llvm::DenseMap<Box*, Box*, PythonLevelEq> DictMap;
DictMap d; DictMap d;
...@@ -683,12 +697,32 @@ public: ...@@ -683,12 +697,32 @@ public:
DEFAULT_CLASS_SIMPLE(dict_cls); DEFAULT_CLASS_SIMPLE(dict_cls);
Box* getOrNull(Box* k) { Box* getOrNull(Box* k) {
const auto& p = d.find(k); const auto& p = d.find(BoxAndHash(k));
if (p != d.end()) if (p != d.end())
return p->second; return p->second;
return NULL; return NULL;
} }
class iterator {
private:
DictMap::iterator it;
public:
iterator(DictMap::iterator it) : it(std::move(it)) {}
bool operator!=(const iterator& rhs) const { return it != rhs.it; }
bool operator==(const iterator& rhs) const { return it == rhs.it; }
iterator& operator++() {
++it;
return *this;
}
std::pair<Box*, Box*> operator*() const { return std::make_pair(it->first.value, it->second); }
Box* first() const { return it->first.value; }
};
iterator begin() { return iterator(d.begin()); }
iterator end() { return iterator(d.end()); }
static void gcHandler(GCVisitor* v, Box* b); static void gcHandler(GCVisitor* v, Box* b);
}; };
static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), ""); static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), "");
......
...@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) { ...@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) {
if (levels > 0) { if (levels > 0) {
int i = 0; int i = 0;
for (auto t : d->d) { for (auto t : *d) {
printf("\nKey:"); printf("\nKey:");
dumpEx(t.first, levels - 1); dumpEx(t.first, levels - 1);
printf("Value:"); printf("Value:");
......
...@@ -38,3 +38,20 @@ nan = float('nan') ...@@ -38,3 +38,20 @@ nan = float('nan')
d[nan] = "hello world" d[nan] = "hello world"
print d[nan] print d[nan]
# Dicts should not check __eq__ for values that have different hash values,
# even if they internally cause a hash collision.
class C(int):
def __eq__(self, rhs):
print "eq", self, rhs
raise Exception("Error, should not call __eq__!")
def __hash__(self):
print "hash", self
return self
d = {}
for i in xrange(1000):
d[C(i)] = i
print len(d)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment