Commit 24735826 authored by Stefan Behnel's avatar Stefan Behnel

optimise isinstance(obj, basestring) and map basestring to unicode in Py3

parent 6c673fde
...@@ -8,6 +8,12 @@ Cython Changelog ...@@ -8,6 +8,12 @@ Cython Changelog
Features added Features added
-------------- --------------
* ``isinstance(obj, basestring)`` is optimised. In Python 3 it only tests
for instances of ``str`` (i.e. Py2 ``unicode``).
* The ``basestring`` builtin is mapped to ``str`` (i.e. Py2 ``unicode``) when
compiling the generated C code under Python 3.
* A new class decorator ``@cython.freelist(N)`` creates a static freelist of N * A new class decorator ``@cython.freelist(N)`` creates a static freelist of N
instances for an extension type, thus avoiding the costly allocation step if instances for an extension type, thus avoiding the costly allocation step if
possible. This can speed up object instantiation by 20-30% in suitable possible. This can speed up object instantiation by 20-30% in suitable
......
...@@ -268,6 +268,7 @@ builtin_types_table = [ ...@@ -268,6 +268,7 @@ builtin_types_table = [
BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type), BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type),
]), ]),
("basestring", "PyBaseString_Type", []),
("bytes", "PyBytes_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), ("bytes", "PyBytes_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"),
]), ]),
("str", "PyString_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), ("str", "PyString_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"),
......
...@@ -38,6 +38,7 @@ non_portable_builtins_map = { ...@@ -38,6 +38,7 @@ non_portable_builtins_map = {
'unicode' : ('PY_MAJOR_VERSION >= 3', 'str'), 'unicode' : ('PY_MAJOR_VERSION >= 3', 'str'),
'xrange' : ('PY_MAJOR_VERSION >= 3', 'range'), 'xrange' : ('PY_MAJOR_VERSION >= 3', 'range'),
'BaseException' : ('PY_VERSION_HEX < 0x02050000', 'Exception'), 'BaseException' : ('PY_VERSION_HEX < 0x02050000', 'Exception'),
'basestring' : ('PY_MAJOR_VERSION >= 3', 'unicode'),
} }
basicsize_builtins_map = { basicsize_builtins_map = {
......
...@@ -973,6 +973,8 @@ class BuiltinObjectType(PyObjectType): ...@@ -973,6 +973,8 @@ class BuiltinObjectType(PyObjectType):
type_name = self.name type_name = self.name
if type_name == 'str': if type_name == 'str':
type_check = 'PyString_Check' type_check = 'PyString_Check'
elif type_name == 'basestring':
type_check = '__Pyx_PyBaseString_Check'
elif type_name == 'frozenset': elif type_name == 'frozenset':
type_check = 'PyFrozenSet_Check' type_check = 'PyFrozenSet_Check'
else: else:
......
...@@ -168,6 +168,15 @@ ...@@ -168,6 +168,15 @@
#define PyBytes_ConcatAndDel PyString_ConcatAndDel #define PyBytes_ConcatAndDel PyString_ConcatAndDel
#endif #endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
#else
#define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
PyString_Check(obj) || PyUnicode_Check(obj))
#define __Pyx_PyBaseString_CheckExact(obj) (Py_TYPE(obj) == &PyBaseString_Type)
#endif
#if PY_VERSION_HEX < 0x02060000 #if PY_VERSION_HEX < 0x02060000
#define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type) #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
#define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type) #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
......
import sys
IS_PY3 = sys.version_info[0] >= 3
ustring = u'abcdef'
sstring = 'abcdef'
bstring = b'abcdef'
def isinstance_basestring(obj):
"""
>>> isinstance_basestring(ustring)
True
>>> isinstance_basestring(sstring)
True
>>> if IS_PY3: print(not isinstance_basestring(bstring))
... else: print(isinstance_basestring(bstring))
True
"""
return isinstance(obj, basestring)
def basestring_is_unicode_in_py3():
"""
>>> basestring_is_unicode_in_py3()
True
"""
if IS_PY3:
return basestring is unicode
else:
return basestring is not unicode
def unicode_subtypes_basestring():
"""
>>> unicode_subtypes_basestring()
True
"""
return issubclass(unicode, basestring)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment