Commit df3e7ee9 authored by Marius Wachtler's avatar Marius Wachtler

Merge pull request #840 from undingen/denseset

Switch our Python set implementation to use a llvm::DenseSet
parents dd2f0cd3 79e4f2c4
......@@ -29,15 +29,9 @@ void BoxedSet::gcHandler(GCVisitor* v, Box* b) {
Box::gcHandler(v, b);
BoxedSet* s = (BoxedSet*)b;
// This feels like a cludge, but we need to find anything that
// the unordered_map might have allocated.
// Another way to handle this would be to rt_alloc the unordered_map
// as well, though that incurs extra memory dereferences which would
// be nice to avoid.
void** start = (void**)&s->s;
void** end = start + (sizeof(s->s) / 8);
v->visitPotentialRange(start, end);
for (auto&& p : s->s) {
v->visit(p.value);
}
}
namespace set {
......@@ -54,7 +48,7 @@ public:
bool hasNext() { return it != s->s.end(); }
Box* next() {
Box* rtn = *it;
Box* rtn = it->value;
++it;
return rtn;
}
......@@ -136,13 +130,13 @@ static Box* _setRepr(BoxedSet* self, const char* type_name) {
chars.push_back('[');
bool first = true;
for (Box* elt : self->s) {
for (auto&& elt : self->s) {
if (!first) {
chars.push_back(',');
chars.push_back(' ');
}
BoxedString* str = static_cast<BoxedString*>(repr(elt));
BoxedString* str = static_cast<BoxedString*>(repr(elt.value));
chars.insert(chars.end(), str->s().begin(), str->s().end());
first = false;
......@@ -173,10 +167,10 @@ Box* setOrSet(BoxedSet* lhs, BoxedSet* rhs) {
BoxedSet* rtn = new (lhs->cls) BoxedSet();
for (Box* elt : lhs->s) {
for (auto&& elt : lhs->s) {
rtn->s.insert(elt);
}
for (Box* elt : rhs->s) {
for (auto&& elt : rhs->s) {
rtn->s.insert(elt);
}
return rtn;
......@@ -188,7 +182,7 @@ Box* setAndSet(BoxedSet* lhs, BoxedSet* rhs) {
BoxedSet* rtn = new (lhs->cls) BoxedSet();
for (Box* elt : lhs->s) {
for (auto&& elt : lhs->s) {
if (rhs->s.count(elt))
rtn->s.insert(elt);
}
......@@ -201,7 +195,7 @@ Box* setSubSet(BoxedSet* lhs, BoxedSet* rhs) {
BoxedSet* rtn = new (lhs->cls) BoxedSet();
for (Box* elt : lhs->s) {
for (auto&& elt : lhs->s) {
// TODO if len(rhs) << len(lhs), it might be more efficient
// to delete the elements of rhs from lhs?
if (rhs->s.count(elt) == 0)
......@@ -216,12 +210,12 @@ Box* setXorSet(BoxedSet* lhs, BoxedSet* rhs) {
BoxedSet* rtn = new (lhs->cls) BoxedSet();
for (Box* elt : lhs->s) {
for (auto&& elt : lhs->s) {
if (rhs->s.count(elt) == 0)
rtn->s.insert(elt);
}
for (Box* elt : rhs->s) {
for (auto&& elt : rhs->s) {
if (lhs->s.count(elt) == 0)
rtn->s.insert(elt);
}
......@@ -447,7 +441,7 @@ Box* setPop(BoxedSet* self) {
raiseExcHelper(KeyError, "pop from an empty set");
auto it = self->s.begin();
Box* rtn = *it;
Box* rtn = it->value;
self->s.erase(it);
return rtn;
}
......@@ -489,10 +483,8 @@ Box* setHash(BoxedSet* self) {
RELEASE_ASSERT(isSubclass(self->cls, frozenset_cls), "");
int64_t rtn = 1927868237L;
for (Box* e : self->s) {
BoxedInt* h = hash(e);
assert(PyInt_Check(h));
rtn ^= h->n + 0x9e3779b9 + (rtn << 6) + (rtn >> 2);
for (auto&& e : self->s) {
rtn ^= e.hash + 0x9e3779b9 + (rtn << 6) + (rtn >> 2);
}
return boxInt(rtn);
......
......@@ -15,7 +15,7 @@
#ifndef PYSTON_RUNTIME_SET_H
#define PYSTON_RUNTIME_SET_H
#include <unordered_set>
#include "llvm/ADT/DenseSet.h"
#include "core/types.h"
#include "runtime/types.h"
......@@ -29,7 +29,7 @@ extern "C" Box* createSet();
class BoxedSet : public Box {
public:
typedef std::unordered_set<Box*, PyHasher, PyEq, StlCompatAllocator<Box*>> Set;
typedef llvm::DenseSet<BoxAndHash, BoxAndHash::Comparisons> Set;
Set s;
Box** weakreflist; /* List of weak references */
......
......@@ -659,19 +659,16 @@ struct PyLt {
bool operator()(Box*, Box*) const;
};
class BoxedDict : public Box {
public:
// llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
};
// llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
struct Comparisons {
static bool isEqual(BoxAndHash lhs, BoxAndHash rhs) {
......@@ -687,8 +684,11 @@ public:
static BoxAndHash getTombstoneKey() { return BoxAndHash((Box*)-2, 0); }
static unsigned getHashValue(BoxAndHash val) { return val.hash; }
};
};
typedef llvm::DenseMap<BoxAndHash, Box*, Comparisons> DictMap;
class BoxedDict : public Box {
public:
typedef llvm::DenseMap<BoxAndHash, Box*, BoxAndHash::Comparisons> DictMap;
DictMap d;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment