Commit d17137f9 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #611 from kmod/sre_compile

cache + reuse 1-length strings
parents 053a23d6 e51eff87
def f():
s = "hello world!" * 500
count = 0
for i in xrange(10000):
for c in s:
if c == '!':
count += 1
print count
f()
...@@ -30,10 +30,6 @@ extern "C" Box* createList() { ...@@ -30,10 +30,6 @@ extern "C" Box* createList() {
return new BoxedList(); return new BoxedList();
} }
BoxedString* boxString(llvm::StringRef s) {
return new (s.size()) BoxedString(s);
}
BoxedString* boxStringTwine(const llvm::Twine& t) { BoxedString* boxStringTwine(const llvm::Twine& t) {
llvm::SmallString<256> Vec; llvm::SmallString<256> Vec;
return boxString(t.toStringRef(Vec)); return boxString(t.toStringRef(Vec));
......
...@@ -54,6 +54,9 @@ extern "C" PyObject* string__format__(PyObject* self, PyObject* args) noexcept; ...@@ -54,6 +54,9 @@ extern "C" PyObject* string__format__(PyObject* self, PyObject* args) noexcept;
namespace pyston { namespace pyston {
BoxedString* EmptyString;
BoxedString* characters[UCHAR_MAX + 1];
BoxedString::BoxedString(const char* s, size_t n) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(const char* s, size_t n) : interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(n != llvm::StringRef::npos, ""); RELEASE_ASSERT(n != llvm::StringRef::npos, "");
if (s) { if (s) {
...@@ -1147,7 +1150,7 @@ extern "C" Box* strMul(BoxedString* lhs, Box* rhs) { ...@@ -1147,7 +1150,7 @@ extern "C" Box* strMul(BoxedString* lhs, Box* rhs) {
else else
return NotImplemented; return NotImplemented;
if (n <= 0) if (n <= 0)
return boxString(""); return EmptyString;
// TODO: use createUninitializedString and getWriteableStringContents // TODO: use createUninitializedString and getWriteableStringContents
int sz = lhs->size(); int sz = lhs->size();
...@@ -1948,7 +1951,7 @@ Box* strTranslate(BoxedString* self, BoxedString* table, BoxedString* delete_cha ...@@ -1948,7 +1951,7 @@ Box* strTranslate(BoxedString* self, BoxedString* table, BoxedString* delete_cha
Box* strLower(BoxedString* self) { Box* strLower(BoxedString* self) {
assert(isSubclass(self->cls, str_cls)); assert(isSubclass(self->cls, str_cls));
BoxedString* rtn = static_cast<BoxedString*>(boxString(self->s())); BoxedString* rtn = new (self->size()) BoxedString(self->s());
for (int i = 0; i < rtn->size(); i++) for (int i = 0; i < rtn->size(); i++)
rtn->data()[i] = std::tolower(rtn->data()[i]); rtn->data()[i] = std::tolower(rtn->data()[i]);
return rtn; return rtn;
...@@ -1956,7 +1959,7 @@ Box* strLower(BoxedString* self) { ...@@ -1956,7 +1959,7 @@ Box* strLower(BoxedString* self) {
Box* strUpper(BoxedString* self) { Box* strUpper(BoxedString* self) {
assert(isSubclass(self->cls, str_cls)); assert(isSubclass(self->cls, str_cls));
BoxedString* rtn = static_cast<BoxedString*>(boxString(self->s())); BoxedString* rtn = new (self->size()) BoxedString(self->s());
for (int i = 0; i < rtn->size(); i++) for (int i = 0; i < rtn->size(); i++)
rtn->data()[i] = std::toupper(rtn->data()[i]); rtn->data()[i] = std::toupper(rtn->data()[i]);
return rtn; return rtn;
...@@ -1964,7 +1967,7 @@ Box* strUpper(BoxedString* self) { ...@@ -1964,7 +1967,7 @@ Box* strUpper(BoxedString* self) {
Box* strSwapcase(BoxedString* self) { Box* strSwapcase(BoxedString* self) {
assert(isSubclass(self->cls, str_cls)); assert(isSubclass(self->cls, str_cls));
BoxedString* rtn = static_cast<BoxedString*>(boxString(self->s())); BoxedString* rtn = new (self->size()) BoxedString(self->s());
for (int i = 0; i < rtn->size(); i++) { for (int i = 0; i < rtn->size(); i++) {
char c = rtn->data()[i]; char c = rtn->data()[i];
if (std::islower(c)) if (std::islower(c))
...@@ -2208,7 +2211,7 @@ extern "C" Box* strGetitem(BoxedString* self, Box* slice) { ...@@ -2208,7 +2211,7 @@ extern "C" Box* strGetitem(BoxedString* self, Box* slice) {
} }
char c = self->s()[n]; char c = self->s()[n];
return boxString(llvm::StringRef(&c, 1)); return characters[c & UCHAR_MAX];
} else if (slice->cls == slice_cls) { } else if (slice->cls == slice_cls) {
BoxedSlice* sslice = static_cast<BoxedSlice*>(slice); BoxedSlice* sslice = static_cast<BoxedSlice*>(slice);
...@@ -2257,7 +2260,7 @@ public: ...@@ -2257,7 +2260,7 @@ public:
char c = *self->it; char c = *self->it;
++self->it; ++self->it;
return boxString(llvm::StringRef(&c, 1)); return characters[c & UCHAR_MAX];
} }
}; };
...@@ -2309,7 +2312,6 @@ extern "C" int PyString_AsStringAndSize(register PyObject* obj, register char** ...@@ -2309,7 +2312,6 @@ extern "C" int PyString_AsStringAndSize(register PyObject* obj, register char**
} }
BoxedString* createUninitializedString(ssize_t n) { BoxedString* createUninitializedString(ssize_t n) {
// I *think* this should avoid doing any copies, by using move constructors:
return new (n) BoxedString(n, 0); return new (n) BoxedString(n, 0);
} }
...@@ -2504,7 +2506,7 @@ static PyObject* string_zfill(PyObject* self, PyObject* args) { ...@@ -2504,7 +2506,7 @@ static PyObject* string_zfill(PyObject* self, PyObject* args) {
// Pyston change: // Pyston change:
// s = pad(self, fill, 0, '0'); // s = pad(self, fill, 0, '0');
s = pad((BoxedString*)self, boxInt(width), boxString("0"), JUST_RIGHT); s = pad((BoxedString*)self, boxInt(width), characters['0' & UCHAR_MAX], JUST_RIGHT);
if (s == NULL) if (s == NULL)
return NULL; return NULL;
...@@ -2724,7 +2726,8 @@ void setupStr() { ...@@ -2724,7 +2726,8 @@ void setupStr() {
str_cls->tp_richcompare = str_richcompare; str_cls->tp_richcompare = str_richcompare;
BoxedString* spaceChar = boxString(" "); BoxedString* spaceChar = characters[' ' & UCHAR_MAX];
assert(spaceChar);
str_cls->giveAttr("ljust", str_cls->giveAttr("ljust",
new BoxedFunction(boxRTFunction((void*)strLjust, UNKNOWN, 3, 1, false, false), { spaceChar })); new BoxedFunction(boxRTFunction((void*)strLjust, UNKNOWN, 3, 1, false, false), { spaceChar }));
str_cls->giveAttr("rjust", str_cls->giveAttr("rjust",
......
...@@ -739,7 +739,6 @@ BoxedClass* object_cls, *type_cls, *none_cls, *bool_cls, *int_cls, *float_cls, ...@@ -739,7 +739,6 @@ BoxedClass* object_cls, *type_cls, *none_cls, *bool_cls, *int_cls, *float_cls,
*builtin_function_or_method_cls, *attrwrapperiter_cls, *set_cls, *frozenset_cls; *builtin_function_or_method_cls, *attrwrapperiter_cls, *set_cls, *frozenset_cls;
BoxedTuple* EmptyTuple; BoxedTuple* EmptyTuple;
BoxedString* EmptyString;
} }
extern "C" Box* createUserClass(BoxedString* name, Box* _bases, Box* _attr_dict) { extern "C" Box* createUserClass(BoxedString* name, Box* _bases, Box* _attr_dict) {
...@@ -2529,8 +2528,16 @@ void setupRuntime() { ...@@ -2529,8 +2528,16 @@ void setupRuntime() {
BoxedHeapClass(object_cls, BoxedWrapperDescriptor::gcHandler, 0, 0, sizeof(BoxedWrapperDescriptor), false, BoxedHeapClass(object_cls, BoxedWrapperDescriptor::gcHandler, 0, 0, sizeof(BoxedWrapperDescriptor), false,
static_cast<BoxedString*>(boxString("wrapper_descriptor"))); static_cast<BoxedString*>(boxString("wrapper_descriptor")));
EmptyString = boxString(""); EmptyString = new (0) BoxedString("");
gc::registerPermanentRoot(EmptyString); // Call InternInPlace rather than InternFromString since that will
// probably try to return EmptyString
PyString_InternInPlace((Box**)&EmptyString);
for (int i = 0; i <= UCHAR_MAX; i++) {
char c = (char)i;
BoxedString* s = new (1) BoxedString(llvm::StringRef(&c, 1));
PyString_InternInPlace((Box**)&s);
characters[i] = s;
}
// Kind of hacky, but it's easier to manually construct the mro for a couple key classes // Kind of hacky, but it's easier to manually construct the mro for a couple key classes
// than try to make the MRO construction code be safe against say, tuple_cls not having // than try to make the MRO construction code be safe against say, tuple_cls not having
......
...@@ -614,6 +614,7 @@ public: ...@@ -614,6 +614,7 @@ public:
extern "C" BoxedTuple* EmptyTuple; extern "C" BoxedTuple* EmptyTuple;
extern "C" BoxedString* EmptyString; extern "C" BoxedString* EmptyString;
extern BoxedString* characters[UCHAR_MAX + 1];
struct PyHasher { struct PyHasher {
size_t operator()(Box*) const; size_t operator()(Box*) const;
...@@ -914,6 +915,15 @@ Box* codeForCLFunction(CLFunction*); ...@@ -914,6 +915,15 @@ Box* codeForCLFunction(CLFunction*);
CLFunction* clfunctionFromCode(Box* code); CLFunction* clfunctionFromCode(Box* code);
Box* getFrame(int depth); Box* getFrame(int depth);
inline BoxedString* boxString(llvm::StringRef s) {
if (s.size() <= 1) {
if (s.size() == 0)
return EmptyString;
return characters[s.data()[0] & UCHAR_MAX];
}
return new (s.size()) BoxedString(s);
}
} }
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment