Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
3486e84c
Commit
3486e84c
authored
Apr 02, 2015
by
Chris Toshok
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #427 from toshok/unicode-hash
use the same hash function for both unicode and string objects
parents
ed5f850c
15a0ed30
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
47 additions
and
7 deletions
+47
-7
from_cpython/Objects/unicodeobject.c
from_cpython/Objects/unicodeobject.c
+3
-5
src/runtime/objmodel.cpp
src/runtime/objmodel.cpp
+1
-1
src/runtime/str.cpp
src/runtime/str.cpp
+11
-1
src/runtime/types.h
src/runtime/types.h
+31
-0
test/tests/unicode_test.py
test/tests/unicode_test.py
+1
-0
No files found.
from_cpython/Objects/unicodeobject.c
View file @
3486e84c
...
@@ -6598,14 +6598,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
...
@@ -6598,14 +6598,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
return
(
PyObject
*
)
PyUnicode_FromUnicode
(
&
self
->
str
[
index
],
1
);
return
(
PyObject
*
)
PyUnicode_FromUnicode
(
&
self
->
str
[
index
],
1
);
}
}
extern
size_t
unicodeHashUnboxed
(
PyUnicodeObject
*
obj
);
static
long
static
long
unicode_hash
(
PyUnicodeObject
*
self
)
unicode_hash
(
PyUnicodeObject
*
self
)
{
{
// Pyston change: just convert to a str and hash, since we use std::hash and not
return
unicodeHashUnboxed
(
self
);
// CPython's hashing algorithm they duplicated here:
PyObject
*
str
=
PyUnicode_AsEncodedString
((
PyObject
*
)
self
,
"utf8"
,
"replace"
);
return
str
->
ob_type
->
tp_hash
(
str
);
#if 0
#if 0
/* Since Unicode objects compare equal to their ASCII string
/* Since Unicode objects compare equal to their ASCII string
counterparts, they should use the individual character values
counterparts, they should use the individual character values
...
...
src/runtime/objmodel.cpp
View file @
3486e84c
...
@@ -128,7 +128,7 @@ static Box* (*callattrInternal3)(Box*, const std::string*, LookupScope, CallRewr
...
@@ -128,7 +128,7 @@ static Box* (*callattrInternal3)(Box*, const std::string*, LookupScope, CallRewr
size_t
PyHasher
::
operator
()(
Box
*
b
)
const
{
size_t
PyHasher
::
operator
()(
Box
*
b
)
const
{
if
(
b
->
cls
==
str_cls
)
{
if
(
b
->
cls
==
str_cls
)
{
std
::
h
ash
<
std
::
string
>
H
;
StringH
ash
<
std
::
string
>
H
;
return
H
(
static_cast
<
BoxedString
*>
(
b
)
->
s
);
return
H
(
static_cast
<
BoxedString
*>
(
b
)
->
s
);
}
}
...
...
src/runtime/str.cpp
View file @
3486e84c
...
@@ -1478,10 +1478,20 @@ failed:
...
@@ -1478,10 +1478,20 @@ failed:
return
NULL
;
return
NULL
;
}
}
extern
"C"
size_t
unicodeHashUnboxed
(
PyUnicodeObject
*
self
)
{
if
(
self
->
hash
!=
-
1
)
return
self
->
hash
;
Py_ssize_t
len
=
PyUnicode_GET_SIZE
(
self
);
Py_UNICODE
*
p
=
PyUnicode_AS_UNICODE
(
self
);
pyston
::
StringHash
<
Py_UNICODE
>
H
;
return
H
(
p
,
len
);
}
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
assert
(
isSubclass
(
self
->
cls
,
str_cls
));
assert
(
isSubclass
(
self
->
cls
,
str_cls
));
std
::
h
ash
<
std
::
string
>
H
;
StringH
ash
<
std
::
string
>
H
;
return
boxInt
(
H
(
self
->
s
));
return
boxInt
(
H
(
self
->
s
));
}
}
...
...
src/runtime/types.h
View file @
3486e84c
...
@@ -385,6 +385,37 @@ class BoxedUnicode : public Box {
...
@@ -385,6 +385,37 @@ class BoxedUnicode : public Box {
// TODO implementation
// TODO implementation
};
};
template
<
typename
T
>
struct
StringHash
{
size_t
operator
()(
const
T
*
str
)
{
size_t
hash
=
5381
;
T
c
;
while
((
c
=
*
str
++
))
hash
=
((
hash
<<
5
)
+
hash
)
+
c
;
/* hash * 33 + c */
return
hash
;
}
size_t
operator
()(
const
T
*
str
,
int
len
)
{
size_t
hash
=
5381
;
T
c
;
while
(
--
len
>=
0
)
{
c
=
*
str
++
;
hash
=
((
hash
<<
5
)
+
hash
)
+
c
;
/* hash * 33 + c */
}
return
hash
;
}
};
template
<
>
struct
StringHash
<
std
::
string
>
{
size_t
operator
()(
const
std
::
string
&
str
)
{
StringHash
<
char
>
H
;
return
H
(
&
str
[
0
],
str
.
size
());
}
};
class
BoxedInstanceMethod
:
public
Box
{
class
BoxedInstanceMethod
:
public
Box
{
public:
public:
Box
**
in_weakreflist
;
Box
**
in_weakreflist
;
...
...
test/tests/unicode_test.py
View file @
3486e84c
...
@@ -24,6 +24,7 @@ print u'a' in c.__dict__
...
@@ -24,6 +24,7 @@ print u'a' in c.__dict__
print
u''
==
''
print
u''
==
''
print
''
==
u''
print
''
==
u''
print
hash
(
u''
)
==
hash
(
''
)
print
hash
(
u''
)
==
hash
(
''
)
print
hash
(
u'hello world'
)
==
hash
(
'hello world'
)
print
"Hello "
+
u" World"
print
"Hello "
+
u" World"
print
u"Hello "
+
" World"
print
u"Hello "
+
" World"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment