Commit ec0ce7d3 authored by Stefan Behnel's avatar Stefan Behnel

reimplement unicode character iteration for PEP393 (Py3.3+)

parent 4b6bd728
...@@ -157,8 +157,10 @@ class IterationTransform(Visitor.VisitorTransform): ...@@ -157,8 +157,10 @@ class IterationTransform(Visitor.VisitorTransform):
# C array (slice) iteration? # C array (slice) iteration?
if iterator.type.is_ptr or iterator.type.is_array: if iterator.type.is_ptr or iterator.type.is_array:
return self._transform_carray_iteration(node, iterator, reversed=reversed) return self._transform_carray_iteration(node, iterator, reversed=reversed)
if iterator.type in (Builtin.bytes_type, Builtin.unicode_type): if iterator.type is Builtin.bytes_type:
return self._transform_string_iteration(node, iterator, reversed=reversed) return self._transform_bytes_iteration(node, iterator, reversed=reversed)
if iterator.type is Builtin.unicode_type:
return self._transform_unicode_iteration(node, iterator, reversed=reversed)
# the rest is based on function calls # the rest is based on function calls
if not isinstance(iterator, ExprNodes.SimpleCallNode): if not isinstance(iterator, ExprNodes.SimpleCallNode):
...@@ -233,16 +235,6 @@ class IterationTransform(Visitor.VisitorTransform): ...@@ -233,16 +235,6 @@ class IterationTransform(Visitor.VisitorTransform):
return self._optimise_for_loop(node, arg, reversed=True) return self._optimise_for_loop(node, arg, reversed=True)
PyUnicode_AS_UNICODE_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_unicode_ptr_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
PyUnicode_GET_SIZE_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_ssize_t_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType( PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_char_ptr_type, [ PyrexTypes.c_char_ptr_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
...@@ -253,35 +245,25 @@ class IterationTransform(Visitor.VisitorTransform): ...@@ -253,35 +245,25 @@ class IterationTransform(Visitor.VisitorTransform):
PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
]) ])
def _transform_string_iteration(self, node, slice_node, reversed=False): def _transform_bytes_iteration(self, node, slice_node, reversed=False):
if slice_node.type is Builtin.unicode_type:
unpack_func = "PyUnicode_AS_UNICODE"
len_func = "PyUnicode_GET_SIZE"
unpack_func_type = self.PyUnicode_AS_UNICODE_func_type
len_func_type = self.PyUnicode_GET_SIZE_func_type
elif slice_node.type is Builtin.bytes_type:
target_type = node.target.type target_type = node.target.type
if not target_type.is_int: if not target_type.is_int:
# bytes iteration returns bytes objects in Py2, but # bytes iteration returns bytes objects in Py2, but
# integers in Py3 # integers in Py3
return node return node
unpack_func = "PyBytes_AS_STRING"
unpack_func_type = self.PyBytes_AS_STRING_func_type
len_func = "PyBytes_GET_SIZE"
len_func_type = self.PyBytes_GET_SIZE_func_type
else:
return node
unpack_temp_node = UtilNodes.LetRefNode( unpack_temp_node = UtilNodes.LetRefNode(
slice_node.as_none_safe_node("'NoneType' is not iterable")) slice_node.as_none_safe_node("'NoneType' is not iterable"))
slice_base_node = ExprNodes.PythonCapiCallNode( slice_base_node = ExprNodes.PythonCapiCallNode(
slice_node.pos, unpack_func, unpack_func_type, slice_node.pos, "PyBytes_AS_STRING",
self.PyBytes_AS_STRING_func_type,
args = [unpack_temp_node], args = [unpack_temp_node],
is_temp = 0, is_temp = 0,
) )
len_node = ExprNodes.PythonCapiCallNode( len_node = ExprNodes.PythonCapiCallNode(
slice_node.pos, len_func, len_func_type, slice_node.pos, "PyBytes_GET_SIZE",
self.PyBytes_GET_SIZE_func_type,
args = [unpack_temp_node], args = [unpack_temp_node],
is_temp = 0, is_temp = 0,
) )
...@@ -301,6 +283,93 @@ class IterationTransform(Visitor.VisitorTransform): ...@@ -301,6 +283,93 @@ class IterationTransform(Visitor.VisitorTransform):
), ),
reversed = reversed)) reversed = reversed))
PyUnicode_GET_LENGTH_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_ssize_t_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
PyUnicode_KIND_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_int_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
PyUnicode_READ_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_py_ucs4_type, [
PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_type, None),
PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_type, None),
PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None)
])
PyUnicode_DATA_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_void_ptr_type, [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
def _transform_unicode_iteration(self, node, slice_node, reversed=False):
unpack_temp_node = UtilNodes.LetRefNode(
slice_node.as_none_safe_node("'NoneType' is not iterable"))
start_node = ExprNodes.IntNode(
node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type)
end_node = ExprNodes.PythonCapiCallNode(
slice_node.pos, "__Pyx_PyUnicode_GET_LENGTH",
self.PyUnicode_GET_LENGTH_func_type,
args = [unpack_temp_node],
is_temp = not reversed,
)
if reversed:
relation1, relation2 = '>', '>='
start_node, end_node = end_node, start_node
else:
relation1, relation2 = '<=', '<'
counter = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
counter_temp = counter.ref(node.target.pos)
kind_temp = UtilNodes.LetRefNode(
ExprNodes.PythonCapiCallNode(
slice_node.pos, "__Pyx_PyUnicode_KIND",
self.PyUnicode_KIND_func_type,
args = [unpack_temp_node],
is_temp = False,
))
data_temp = UtilNodes.LetRefNode(
ExprNodes.PythonCapiCallNode(
slice_node.pos, "__Pyx_PyUnicode_DATA",
self.PyUnicode_DATA_func_type,
args = [unpack_temp_node],
is_temp = False,
))
target_assign = Nodes.SingleAssignmentNode(
pos = node.target.pos,
lhs = node.target,
rhs = ExprNodes.PythonCapiCallNode(
slice_node.pos, "__Pyx_PyUnicode_READ",
self.PyUnicode_READ_func_type,
args = [kind_temp, data_temp, counter_temp],
is_temp = 0,
))
body = Nodes.StatListNode(
node.pos,
stats = [target_assign, node.body])
loop_node = Nodes.ForFromStatNode(
node.pos,
bound1=start_node, relation1=relation1,
target=counter_temp,
relation2=relation2, bound2=end_node,
step=None, body=body,
else_clause=node.else_clause,
from_range=True)
loop_node = UtilNodes.TempsBlockNode(
node.pos, temps=[counter], body=loop_node)
for temp in (kind_temp, data_temp, unpack_temp_node): # last is outermost temp
loop_node = UtilNodes.LetNode(temp, loop_node)
return loop_node
def _transform_carray_iteration(self, node, slice_node, reversed=False): def _transform_carray_iteration(self, node, slice_node, reversed=False):
neg_step = False neg_step = False
if isinstance(slice_node, ExprNodes.SliceIndexNode): if isinstance(slice_node, ExprNodes.SliceIndexNode):
......
...@@ -116,15 +116,22 @@ ...@@ -116,15 +116,22 @@
#define Py_TPFLAGS_HAVE_NEWBUFFER 0 #define Py_TPFLAGS_HAVE_NEWBUFFER 0
#endif #endif
/* new Py3.3 unicode representation (PEP 393) */ /* new Py3.3 unicode type (PEP 393) */
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_GET_LENGTH) #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1 #define CYTHON_PEP393_ENABLED 1
#define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#else #else
#define CYTHON_PEP393_ENABLED 0 #define CYTHON_PEP393_ENABLED 0
#define __Pyx_PyUnicode_KIND(u) (0) /* PyUnicode_WCHAR_KIND */
#define __Pyx_PyUnicode_DATA(u) PyUnicode_AS_UNICODE(u)
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
/* (k=k) => avoid unused variable warning due to macro: */
#define __Pyx_PyUnicode_READ(k, d, i) ((k=k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
#endif #endif
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment