Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
f74e9ee6
Commit
f74e9ee6
authored
Aug 19, 2016
by
Marius Wachtler
Committed by
GitHub
Aug 19, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1331 from undingen/mem_string_interning
string interning: make it slightly more space efficient
parents
7f102907
d713267d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
136 additions
and
65 deletions
+136
-65
src/CMakeLists.txt
src/CMakeLists.txt
+2
-1
src/runtime/str.cpp
src/runtime/str.cpp
+19
-64
src/runtime/str_interning.cpp
src/runtime/str_interning.cpp
+114
-0
src/runtime/types.h
src/runtime/types.h
+1
-0
No files found.
src/CMakeLists.txt
View file @
f74e9ee6
...
...
@@ -38,7 +38,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
codegen/baseline_jit.cpp
codegen/codegen.cpp
codegen/compvars.cpp
codegen/cpython_ast.cpp
codegen/cpython_ast.cpp
codegen/entry.cpp
codegen/gcbuilder.cpp
codegen/irgen.cpp
...
...
@@ -102,6 +102,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
runtime/objmodel.cpp
runtime/set.cpp
runtime/str.cpp
runtime/str_interning.cpp
runtime/super.cpp
runtime/tuple.cpp
runtime/types.cpp
...
...
src/runtime/str.cpp
View file @
f74e9ee6
...
...
@@ -369,62 +369,6 @@ template <ExceptionStyle S> Box* strAdd(BoxedString* lhs, Box* _rhs) noexcept(S
return
new
(
lhs
->
size
()
+
rhs
->
size
())
BoxedString
(
lhs
->
s
(),
rhs
->
s
());
}
static
llvm
::
StringMap
<
BoxedString
*>
interned_strings
;
static
StatCounter
num_interned_strings
(
"num_interned_string"
);
extern
"C"
PyObject
*
PyString_InternFromString
(
const
char
*
s
)
noexcept
{
RELEASE_ASSERT
(
s
,
""
);
return
internStringImmortal
(
s
);
}
BoxedString
*
internStringImmortal
(
llvm
::
StringRef
s
)
noexcept
{
auto
&
entry
=
interned_strings
[
s
];
if
(
!
entry
)
{
num_interned_strings
.
log
();
entry
=
boxString
(
s
);
// CPython returns mortal but in our current implementation they are inmortal
entry
->
interned_state
=
SSTATE_INTERNED_IMMORTAL
;
}
Py_INCREF
(
entry
);
return
entry
;
}
extern
"C"
void
PyString_InternInPlace
(
PyObject
**
p
)
noexcept
{
BoxedString
*
s
=
(
BoxedString
*
)
*
p
;
if
(
s
==
NULL
||
!
PyString_Check
(
s
))
Py_FatalError
(
"PyString_InternInPlace: strings only please!"
);
/* If it's a string subclass, we don't really know what putting
it in the interned dict might do. */
if
(
!
PyString_CheckExact
(
s
))
return
;
if
(
PyString_CHECK_INTERNED
(
s
))
return
;
auto
&
entry
=
interned_strings
[
s
->
s
()];
if
(
entry
)
{
Py_INCREF
(
entry
);
Py_DECREF
(
*
p
);
*
p
=
entry
;
}
else
{
// TODO: do CPython's refcounting here
num_interned_strings
.
log
();
entry
=
s
;
Py_INCREF
(
s
);
// CPython returns mortal but in our current implementation they are inmortal
s
->
interned_state
=
SSTATE_INTERNED_IMMORTAL
;
}
}
extern
"C"
void
_Py_ReleaseInternedStrings
()
noexcept
{
// printf("%ld interned strings\n", interned_strings.size());
for
(
const
auto
&
p
:
interned_strings
)
{
Py_DECREF
(
p
.
second
);
}
interned_strings
.
clear
();
}
/* Format codes
* F_LJUST '-'
* F_SIGN '+'
...
...
@@ -1613,34 +1557,45 @@ extern "C" size_t unicodeHashUnboxed(PyUnicodeObject* self) {
return
x
;
}
extern
"C"
size_t
strHashUnboxed
(
BoxedString
*
self
)
{
assert
(
PyString_Check
(
self
));
size_t
strHashUnboxedStrRef
(
llvm
::
StringRef
self
)
{
const
char
*
p
;
long
x
;
#ifdef Py_DEBUG
assert
(
_Py_HashSecret_Initialized
);
#endif
if
(
self
->
hash
!=
-
1
)
return
self
->
hash
;
long
len
=
Py_SIZE
(
self
);
long
len
=
self
.
size
();
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if
(
len
==
0
)
{
self
->
hash
=
0
;
return
0
;
}
p
=
self
->
s
()
.
data
();
p
=
self
.
data
();
x
=
_Py_HashSecret
.
prefix
;
x
^=
*
p
<<
7
;
while
(
--
len
>=
0
)
x
=
(
1000003
*
x
)
^
*
p
++
;
x
^=
Py_SIZE
(
self
);
x
^=
self
.
size
(
);
x
^=
_Py_HashSecret
.
suffix
;
if
(
x
==
-
1
)
x
=
-
2
;
return
x
;
}
extern
"C"
size_t
strHashUnboxed
(
BoxedString
*
self
)
{
assert
(
PyString_Check
(
self
));
const
char
*
p
;
long
x
;
#ifdef Py_DEBUG
assert
(
_Py_HashSecret_Initialized
);
#endif
if
(
self
->
hash
!=
-
1
)
return
self
->
hash
;
x
=
strHashUnboxedStrRef
(
self
->
s
());
self
->
hash
=
x
;
return
x
;
}
...
...
src/runtime/str_interning.cpp
0 → 100644
View file @
f74e9ee6
// Copyright (c) 2014-2016 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "Python.h"
#include "runtime/types.h"
// create a DenseMapInfo which produces the same hash values for llvm::StringRef and BoxedString* keys
namespace
llvm
{
template
<
>
struct
DenseMapInfo
<
pyston
::
BoxedString
*>
{
static
inline
pyston
::
BoxedString
*
getEmptyKey
()
{
uintptr_t
Val
=
static_cast
<
uintptr_t
>
(
-
1
);
Val
<<=
PointerLikeTypeTraits
<
pyston
::
BoxedString
*>::
NumLowBitsAvailable
;
return
reinterpret_cast
<
pyston
::
BoxedString
*>
(
Val
);
}
static
inline
pyston
::
BoxedString
*
getTombstoneKey
()
{
uintptr_t
Val
=
static_cast
<
uintptr_t
>
(
-
2
);
Val
<<=
PointerLikeTypeTraits
<
pyston
::
BoxedString
*>::
NumLowBitsAvailable
;
return
reinterpret_cast
<
pyston
::
BoxedString
*>
(
Val
);
}
static
unsigned
getHashValue
(
pyston
::
BoxedString
*
s
)
{
return
pyston
::
strHashUnboxed
(
s
);
}
static
unsigned
getHashValue
(
llvm
::
StringRef
s
)
{
return
pyston
::
strHashUnboxedStrRef
(
s
);
}
static
bool
isSpecial
(
pyston
::
BoxedString
*
v
)
{
return
v
==
getEmptyKey
()
||
v
==
getTombstoneKey
();
}
static
bool
isEqual
(
pyston
::
BoxedString
*
lhs
,
pyston
::
BoxedString
*
rhs
)
{
if
(
isSpecial
(
lhs
)
||
isSpecial
(
rhs
))
return
lhs
==
rhs
;
return
lhs
->
s
()
==
rhs
->
s
();
}
static
bool
isEqual
(
llvm
::
StringRef
lhs
,
pyston
::
BoxedString
*
rhs
)
{
if
(
isSpecial
(
rhs
))
return
false
;
return
lhs
==
rhs
->
s
();
}
};
}
namespace
pyston
{
static
llvm
::
DenseSet
<
BoxedString
*>
interned_strings
;
static
StatCounter
num_interned_strings
(
"num_interned_string"
);
extern
"C"
PyObject
*
PyString_InternFromString
(
const
char
*
s
)
noexcept
{
RELEASE_ASSERT
(
s
,
""
);
return
internStringImmortal
(
s
);
}
BoxedString
*
internStringImmortal
(
llvm
::
StringRef
s
)
noexcept
{
auto
it
=
interned_strings
.
find_as
(
s
);
if
(
it
!=
interned_strings
.
end
())
return
incref
(
*
it
);
num_interned_strings
.
log
();
BoxedString
*
entry
=
boxString
(
s
);
// CPython returns mortal but in our current implementation they are inmortal
entry
->
interned_state
=
SSTATE_INTERNED_IMMORTAL
;
interned_strings
.
insert
((
BoxedString
*
)
entry
);
Py_INCREF
(
entry
);
return
entry
;
}
extern
"C"
void
PyString_InternInPlace
(
PyObject
**
p
)
noexcept
{
BoxedString
*
s
=
(
BoxedString
*
)
*
p
;
if
(
s
==
NULL
||
!
PyString_Check
(
s
))
Py_FatalError
(
"PyString_InternInPlace: strings only please!"
);
/* If it's a string subclass, we don't really know what putting
it in the interned dict might do. */
if
(
!
PyString_CheckExact
(
s
))
return
;
if
(
PyString_CHECK_INTERNED
(
s
))
return
;
auto
it
=
interned_strings
.
find
(
s
);
if
(
it
!=
interned_strings
.
end
())
{
auto
entry
=
*
it
;
Py_INCREF
(
entry
);
Py_DECREF
(
*
p
);
*
p
=
entry
;
}
else
{
// TODO: do CPython's refcounting here
num_interned_strings
.
log
();
interned_strings
.
insert
(
s
);
Py_INCREF
(
s
);
// CPython returns mortal but in our current implementation they are inmortal
s
->
interned_state
=
SSTATE_INTERNED_IMMORTAL
;
}
}
extern
"C"
void
_Py_ReleaseInternedStrings
()
noexcept
{
// printf("%ld interned strings\n", interned_strings.size());
for
(
const
auto
&
p
:
interned_strings
)
{
Py_DECREF
(
p
);
}
interned_strings
.
clear
();
}
}
src/runtime/types.h
View file @
f74e9ee6
...
...
@@ -672,6 +672,7 @@ static_assert(offsetof(BoxedString, hash) == offsetof(PyStringObject, ob_shash),
static_assert
(
offsetof
(
BoxedString
,
interned_state
)
==
offsetof
(
PyStringObject
,
ob_sstate
),
""
);
static_assert
(
offsetof
(
BoxedString
,
s_data
)
==
offsetof
(
PyStringObject
,
ob_sval
),
""
);
size_t
strHashUnboxedStrRef
(
llvm
::
StringRef
str
);
extern
"C"
size_t
strHashUnboxed
(
BoxedString
*
self
);
extern
"C"
int64_t
hashUnboxed
(
Box
*
obj
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment