Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cython
Commits
61e19055
Commit
61e19055
authored
Apr 25, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
coerce Py_UNICODE to and from single character unicode strings by default
parent
62a6348b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
43 deletions
+98
-43
Cython/Compiler/PyrexTypes.py
Cython/Compiler/PyrexTypes.py
+49
-30
tests/run/py_unicode_type.pyx
tests/run/py_unicode_type.pyx
+49
-13
No files found.
Cython/Compiler/PyrexTypes.py
View file @
61e19055
...
...
@@ -881,17 +881,60 @@ class CBIntType(CIntType):
class
CPyUnicodeIntType
(
CIntType
):
# Py_UNICODE
#
Conversion from a unicode string to Py_UNICODE at runtime is not
#
currently supported and may never be - we only convert from and
#
to integers here. The maximum value for a Py_UNICODE is
#
1114111, so PyInt_FromLong() will do just fine her
e.
#
Py_UNICODE coerces from and to single character unicode strings,
#
but we also allow Python integers as input. The value range for
#
Py_UNICODE is 0..1114111, which is checked when converting from
#
an integer valu
e.
to_py_function
=
"PyInt_FromLong"
from_py_function
=
"__Pyx_PyInt_AsPy_UNICODE"
to_py_function
=
"PyUnicode_FromOrdinal"
from_py_function
=
"__Pyx_PyObject_AsPy_UNICODE"
def
create_from_py_utility_code
(
self
,
env
):
env
.
use_utility_code
(
pyobject_as_py_unicode_utility_code
)
return
True
def
sign_and_name
(
self
):
return
"Py_UNICODE"
pyobject_as_py_unicode_utility_code
=
UtilityCode
(
proto
=
'''
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
'''
,
impl
=
'''
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
static long maxval = 0;
long ival;
if (PyUnicode_Check(x)) {
if (unlikely(PyUnicode_GET_SIZE(x) != 1)) {
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UNICODE, got length "
#if PY_VERSION_HEX < 0x02050000
"%d",
#else
"%zd",
#endif
PyUnicode_GET_SIZE(x));
return (Py_UNICODE)-1;
}
return PyUnicode_AS_UNICODE(x)[0];
}
if (unlikely(!maxval))
maxval = (long)PyUnicode_GetMax();
ival = __Pyx_PyInt_AsLong(x);
if (unlikely(ival < 0)) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError,
"cannot convert negative value to Py_UNICODE");
return (Py_UNICODE)-1;
} else if (unlikely(ival > maxval)) {
PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UNICODE");
return (Py_UNICODE)-1;
}
return (Py_UNICODE)ival;
}
'''
)
class
CPySSizeTType
(
CIntType
):
...
...
@@ -2512,10 +2555,6 @@ type_conversion_predeclarations = """
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
#ifdef Py_USING_UNICODE
static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject*);
#endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
...
...
@@ -2580,26 +2619,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
return res;
}
#ifdef Py_USING_UNICODE
static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject* x) {
long ival = __Pyx_PyInt_AsLong(x);
static long maxval = 0;
if (unlikely(!maxval))
maxval = (long)PyUnicode_GetMax();
if (unlikely(ival < 0)) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to Py_UNICODE");
return (Py_UNICODE)-1;
} else if (unlikely(ival > maxval)) {
PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UNICODE");
return (Py_UNICODE)-1;
}
return (Py_UNICODE)ival;
}
#endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
Py_ssize_t ival;
PyObject* x = PyNumber_Index(b);
...
...
tests/run/py_unicode_type.pyx
View file @
61e19055
...
...
@@ -3,7 +3,6 @@
cdef
Py_UNICODE
char_ASCII
=
u'A'
cdef
Py_UNICODE
char_KLINGON
=
u'
\
uF8D2
'
def
compare_ASCII
():
"""
>>> compare_ASCII()
...
...
@@ -39,31 +38,68 @@ def index_literal(int i):
>>> index_literal(4) == '5'
True
"""
# runtime casts are not currently supported
#return <Py_UNICODE>(u"12345"[i])
return
u"12345"
[
i
]
def
unicode_cardinal
(
Py_UNICODE
i
):
def
index_literal_pyunicode
(
int
i
):
"""
>>> index_literal_pyunicode(0) == '1'
True
>>> index_literal_pyunicode(-5) == '1'
True
>>> index_literal_pyunicode(2) == '3'
True
>>> index_literal_pyunicode(4) == '5'
True
"""
return
<
Py_UNICODE
>
(
u"12345"
[
i
])
from
cpython.unicode
cimport
PyUnicode_FromOrdinal
import
sys
u0
=
u'
\
x00
'
u1
=
u'
\
x01
'
umax
=
PyUnicode_FromOrdinal
(
sys
.
maxunicode
)
def
unicode_ordinal
(
Py_UNICODE
i
):
"""
>>> import sys
>>> ord(unicode_ordinal(0)) == 0
True
>>> ord(unicode_ordinal(1)) == 1
True
>>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
True
>>>
unicode_cardinal(0)
0
>>>
unicode_cardinal(1)
1
>>>
unicode_cardinal(sys.maxunicode
) == sys.maxunicode
>>>
ord(unicode_ordinal(u0)) == 0
True
>>>
ord(unicode_ordinal(u1)) == 1
True
>>>
ord(unicode_ordinal(umax)
) == sys.maxunicode
True
>>> unicode_
ca
rdinal(-1) #doctest: +ELLIPSIS
Value too small:
>>> unicode_
o
rdinal(-1) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
OverflowError: ...
>>> unicode_cardinal(sys.maxunicode+1) #doctest: +ELLIPSIS
Value too large:
>>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
OverflowError: ...
Less than one character:
>>> unicode_ordinal(u0[:0])
Traceback (most recent call last):
...
ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0
More than one character:
>>> unicode_ordinal(u0+u1)
Traceback (most recent call last):
...
ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2
"""
return
i
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment