Commit 61e19055 authored by Stefan Behnel's avatar Stefan Behnel

coerce Py_UNICODE to and from single character unicode strings by default

parent 62a6348b
...@@ -881,17 +881,60 @@ class CBIntType(CIntType): ...@@ -881,17 +881,60 @@ class CBIntType(CIntType):
class CPyUnicodeIntType(CIntType): class CPyUnicodeIntType(CIntType):
# Py_UNICODE # Py_UNICODE
# Conversion from a unicode string to Py_UNICODE at runtime is not # Py_UNICODE coerces from and to single character unicode strings,
# currently supported and may never be - we only convert from and # but we also allow Python integers as input. The value range for
# to integers here. The maximum value for a Py_UNICODE is # Py_UNICODE is 0..1114111, which is checked when converting from
# 1114111, so PyInt_FromLong() will do just fine here. # an integer value.
to_py_function = "PyInt_FromLong" to_py_function = "PyUnicode_FromOrdinal"
from_py_function = "__Pyx_PyInt_AsPy_UNICODE" from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
def create_from_py_utility_code(self, env):
env.use_utility_code(pyobject_as_py_unicode_utility_code)
return True
def sign_and_name(self): def sign_and_name(self):
return "Py_UNICODE" return "Py_UNICODE"
pyobject_as_py_unicode_utility_code = UtilityCode(
proto='''
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
''',
impl='''
static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
static long maxval = 0;
long ival;
if (PyUnicode_Check(x)) {
if (unlikely(PyUnicode_GET_SIZE(x) != 1)) {
PyErr_Format(PyExc_ValueError,
"only single character unicode strings can be converted to Py_UNICODE, got length "
#if PY_VERSION_HEX < 0x02050000
"%d",
#else
"%zd",
#endif
PyUnicode_GET_SIZE(x));
return (Py_UNICODE)-1;
}
return PyUnicode_AS_UNICODE(x)[0];
}
if (unlikely(!maxval))
maxval = (long)PyUnicode_GetMax();
ival = __Pyx_PyInt_AsLong(x);
if (unlikely(ival < 0)) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError,
"cannot convert negative value to Py_UNICODE");
return (Py_UNICODE)-1;
} else if (unlikely(ival > maxval)) {
PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UNICODE");
return (Py_UNICODE)-1;
}
return (Py_UNICODE)ival;
}
''')
class CPySSizeTType(CIntType): class CPySSizeTType(CIntType):
...@@ -2512,10 +2555,6 @@ type_conversion_predeclarations = """ ...@@ -2512,10 +2555,6 @@ type_conversion_predeclarations = """
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
#ifdef Py_USING_UNICODE
static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject*);
#endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*); static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
...@@ -2580,26 +2619,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { ...@@ -2580,26 +2619,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
return res; return res;
} }
#ifdef Py_USING_UNICODE
static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject* x) {
long ival = __Pyx_PyInt_AsLong(x);
static long maxval = 0;
if (unlikely(!maxval))
maxval = (long)PyUnicode_GetMax();
if (unlikely(ival < 0)) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to Py_UNICODE");
return (Py_UNICODE)-1;
} else if (unlikely(ival > maxval)) {
PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UNICODE");
return (Py_UNICODE)-1;
}
return (Py_UNICODE)ival;
}
#endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
Py_ssize_t ival; Py_ssize_t ival;
PyObject* x = PyNumber_Index(b); PyObject* x = PyNumber_Index(b);
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
cdef Py_UNICODE char_ASCII = u'A' cdef Py_UNICODE char_ASCII = u'A'
cdef Py_UNICODE char_KLINGON = u'\uF8D2' cdef Py_UNICODE char_KLINGON = u'\uF8D2'
def compare_ASCII(): def compare_ASCII():
""" """
>>> compare_ASCII() >>> compare_ASCII()
...@@ -39,31 +38,68 @@ def index_literal(int i): ...@@ -39,31 +38,68 @@ def index_literal(int i):
>>> index_literal(4) == '5' >>> index_literal(4) == '5'
True True
""" """
# runtime casts are not currently supported
#return <Py_UNICODE>(u"12345"[i])
return u"12345"[i] return u"12345"[i]
def unicode_cardinal(Py_UNICODE i): def index_literal_pyunicode(int i):
"""
>>> index_literal_pyunicode(0) == '1'
True
>>> index_literal_pyunicode(-5) == '1'
True
>>> index_literal_pyunicode(2) == '3'
True
>>> index_literal_pyunicode(4) == '5'
True
"""
return <Py_UNICODE>(u"12345"[i])
from cpython.unicode cimport PyUnicode_FromOrdinal
import sys
u0 = u'\x00'
u1 = u'\x01'
umax = PyUnicode_FromOrdinal(sys.maxunicode)
def unicode_ordinal(Py_UNICODE i):
""" """
>>> import sys >>> ord(unicode_ordinal(0)) == 0
True
>>> ord(unicode_ordinal(1)) == 1
True
>>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
True
>>> unicode_cardinal(0) >>> ord(unicode_ordinal(u0)) == 0
0 True
>>> unicode_cardinal(1) >>> ord(unicode_ordinal(u1)) == 1
1 True
>>> unicode_cardinal(sys.maxunicode) == sys.maxunicode >>> ord(unicode_ordinal(umax)) == sys.maxunicode
True True
Value too small:
>>> unicode_cardinal(-1) #doctest: +ELLIPSIS >>> unicode_ordinal(-1) #doctest: +ELLIPSIS
Traceback (most recent call last): Traceback (most recent call last):
... ...
OverflowError: ... OverflowError: ...
>>> unicode_cardinal(sys.maxunicode+1) #doctest: +ELLIPSIS Value too large:
>>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS
Traceback (most recent call last): Traceback (most recent call last):
... ...
OverflowError: ... OverflowError: ...
Less than one character:
>>> unicode_ordinal(u0[:0])
Traceback (most recent call last):
...
ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0
More than one character:
>>> unicode_ordinal(u0+u1)
Traceback (most recent call last):
...
ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2
""" """
return i return i
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment