Commit f7ef93fb authored by Stefan Behnel's avatar Stefan Behnel

moved string/bytes/unicode related utility code into new StringTools.c file

parent 1c1ad3af
...@@ -17,12 +17,6 @@ proto = """ ...@@ -17,12 +17,6 @@ proto = """
#define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None) #define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None)
""") """)
include_string_h_utility_code = UtilityCode(
proto = """
#include <string.h>
"""
)
abs_int_utility_code = UtilityCode( abs_int_utility_code = UtilityCode(
proto = ''' proto = '''
#if HAVE_LONG_LONG #if HAVE_LONG_LONG
......
...@@ -7985,15 +7985,15 @@ class CmpNode(object): ...@@ -7985,15 +7985,15 @@ class CmpNode(object):
type1, type2 = self.operand1.type, self.operand2.type type1, type2 = self.operand1.type, self.operand2.type
if type1.is_pyobject and type2.is_pyobject: if type1.is_pyobject and type2.is_pyobject:
if type1 is Builtin.unicode_type or type2 is Builtin.unicode_type: if type1 is Builtin.unicode_type or type2 is Builtin.unicode_type:
env.use_utility_code(pyunicode_equals_utility_code) env.use_utility_code(UtilityCode.load_cached("UnicodeEquals", "StringTools.c"))
self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals" self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals"
return True return True
elif type1 is Builtin.bytes_type or type2 is Builtin.bytes_type: elif type1 is Builtin.bytes_type or type2 is Builtin.bytes_type:
env.use_utility_code(pybytes_equals_utility_code) env.use_utility_code(UtilityCode.load_cached("BytesEquals", "StringTools.c"))
self.special_bool_cmp_function = "__Pyx_PyBytes_Equals" self.special_bool_cmp_function = "__Pyx_PyBytes_Equals"
return True return True
elif type1 is Builtin.str_type or type2 is Builtin.str_type: elif type1 is Builtin.str_type or type2 is Builtin.str_type:
env.use_utility_code(pystr_equals_utility_code) env.use_utility_code(UtilityCode.load_cached("StrEquals", "StringTools.c"))
self.special_bool_cmp_function = "__Pyx_PyString_Equals" self.special_bool_cmp_function = "__Pyx_PyString_Equals"
return True return True
return False return False
...@@ -8108,173 +8108,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) { ...@@ -8108,173 +8108,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) {
} }
""") """)
char_in_bytes_utility_code = UtilityCode(
proto="""
static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character); /*proto*/
""",
impl="""
static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character) {
const Py_ssize_t length = PyBytes_GET_SIZE(bytes);
char* char_start = PyBytes_AS_STRING(bytes);
char* pos;
for (pos=char_start; pos < char_start+length; pos++) {
if (character == pos[0]) return 1;
}
return 0;
}
""")
py_ucs4_in_unicode_utility_code = UtilityCode(
proto="""
static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character); /*proto*/
static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character); /*proto*/
""",
# additionally handles surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds
impl="""
static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
#ifdef CYTHON_PEP393_ENABLED
Py_ssize_t i;
int kind;
void* udata;
Py_ssize_t length;
kind = PyUnicode_KIND(unicode);
if (likely(kind != PyUnicode_WCHAR_KIND)) {
udata = PyUnicode_DATA(unicode);
length = PyUnicode_GET_LENGTH(unicode);
for (i=0; i < length; i++) {
if (unlikely(character == PyUnicode_READ(kind, udata, i))) return 1;
}
return 0;
}
#endif
return __Pyx_PyUnicodeBufferContainsUCS4(
PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
character);
}
static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
Py_UNICODE uchar;
Py_UNICODE* pos;
#if Py_UNICODE_SIZE == 2
if (character > 65535) {
Py_UNICODE high_val, low_val;
high_val = (Py_UNICODE) (0xD800 | (((character - 0x10000) >> 10) & ((1<<10)-1)));
low_val = (Py_UNICODE) (0xDC00 | ( (character - 0x10000) & ((1<<10)-1)));
for (pos=buffer; pos < buffer+length-1; pos++) {
if (unlikely(high_val == pos[0]) & unlikely(low_val == pos[1])) return 1;
}
return 0;
}
#endif
uchar = (Py_UNICODE) character;
for (pos=buffer; pos < buffer+length; pos++) {
if (unlikely(uchar == pos[0])) return 1;
}
return 0;
}
""")
pyunicode_equals_utility_code = UtilityCode(
proto="""
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
""",
impl="""
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
if (s1 == s2) { /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
return (equals == Py_EQ);
} else if (PyUnicode_CheckExact(s1) & PyUnicode_CheckExact(s2)) {
#ifdef CYTHON_PEP393_ENABLED
if ((PyUnicode_READY(s1) < 0) || (PyUnicode_READY(s2) < 0))
return -1;
if (PyUnicode_GET_LENGTH(s1) != PyUnicode_GET_LENGTH(s2)) {
return (equals == Py_NE);
} else if (PyUnicode_GET_LENGTH(s1) == 1) {
Py_UCS4 ch1 = PyUnicode_READ_CHAR(s1, 0);
Py_UCS4 ch2 = PyUnicode_READ_CHAR(s2, 0);
return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
"""
## currently disabled: may not be safe depending on who created the string
# } else if (PyUnicode_MAX_CHAR_VALUE(s1) != PyUnicode_MAX_CHAR_VALUE(s2)) {
# return (equals == Py_NE);
"""\
#else
if (PyUnicode_GET_SIZE(s1) != PyUnicode_GET_SIZE(s2)) {
return (equals == Py_NE);
} else if (PyUnicode_GET_SIZE(s1) == 1) {
Py_UNICODE ch1 = PyUnicode_AS_UNICODE(s1)[0];
Py_UNICODE ch2 = PyUnicode_AS_UNICODE(s2)[0];
return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
#endif
} else {
int result = PyUnicode_Compare(s1, s2);
if ((result == -1) && unlikely(PyErr_Occurred()))
return -1;
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & PyUnicode_CheckExact(s2)) {
return (equals == Py_NE);
} else if ((s2 == Py_None) & PyUnicode_CheckExact(s1)) {
return (equals == Py_NE);
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
}
""")
pybytes_equals_utility_code = UtilityCode(
proto="""
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
""",
impl="""
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
if (s1 == s2) { /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
return (equals == Py_EQ);
} else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
if (PyBytes_GET_SIZE(s1) != PyBytes_GET_SIZE(s2)) {
return (equals == Py_NE);
} else if (PyBytes_GET_SIZE(s1) == 1) {
if (equals == Py_EQ)
return (PyBytes_AS_STRING(s1)[0] == PyBytes_AS_STRING(s2)[0]);
else
return (PyBytes_AS_STRING(s1)[0] != PyBytes_AS_STRING(s2)[0]);
} else {
int result = memcmp(PyBytes_AS_STRING(s1), PyBytes_AS_STRING(s2), (size_t)PyBytes_GET_SIZE(s1));
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
return (equals == Py_NE);
} else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
return (equals == Py_NE);
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
}
""",
requires=[Builtin.include_string_h_utility_code])
pystr_equals_utility_code = UtilityCode(
proto="""
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
#else
#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
#endif
""",
requires=[pybytes_equals_utility_code, pyunicode_equals_utility_code])
class PrimaryCmpNode(ExprNode, CmpNode): class PrimaryCmpNode(ExprNode, CmpNode):
# Non-cascaded comparison or first comparison of # Non-cascaded comparison or first comparison of
...@@ -8327,13 +8160,13 @@ class PrimaryCmpNode(ExprNode, CmpNode): ...@@ -8327,13 +8160,13 @@ class PrimaryCmpNode(ExprNode, CmpNode):
error(self.pos, "Cascading comparison not yet supported for 'int_val in string'.") error(self.pos, "Cascading comparison not yet supported for 'int_val in string'.")
return return
if self.operand2.type is unicode_type: if self.operand2.type is unicode_type:
env.use_utility_code(py_ucs4_in_unicode_utility_code) env.use_utility_code(UtilityCode.load_cached("PyUCS4InUnicode", "StringTools.c"))
else: else:
if self.operand1.type is PyrexTypes.c_uchar_type: if self.operand1.type is PyrexTypes.c_uchar_type:
self.operand1 = self.operand1.coerce_to(PyrexTypes.c_char_type, env) self.operand1 = self.operand1.coerce_to(PyrexTypes.c_char_type, env)
if self.operand2.type is not bytes_type: if self.operand2.type is not bytes_type:
self.operand2 = self.operand2.coerce_to(bytes_type, env) self.operand2 = self.operand2.coerce_to(bytes_type, env)
env.use_utility_code(char_in_bytes_utility_code) env.use_utility_code(UtilityCode.load_cached("BytesContains", "StringTools.c"))
self.operand2 = self.operand2.as_none_safe_node( self.operand2 = self.operand2.as_none_safe_node(
"argument of type 'NoneType' is not iterable") "argument of type 'NoneType' is not iterable")
elif self.is_ptr_contains(): elif self.is_ptr_contains():
......
...@@ -2063,7 +2063,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -2063,7 +2063,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
node.pos, "strlen", self.Pyx_strlen_func_type, node.pos, "strlen", self.Pyx_strlen_func_type,
args = [arg], args = [arg],
is_temp = node.is_temp, is_temp = node.is_temp,
utility_code = Builtin.include_string_h_utility_code) utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
elif arg.type.is_pyobject: elif arg.type.is_pyobject:
cfunc_name = self._map_to_capi_len_function(arg.type) cfunc_name = self._map_to_capi_len_function(arg.type)
if cfunc_name is None: if cfunc_name is None:
...@@ -2762,7 +2762,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): ...@@ -2762,7 +2762,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform):
string_node.pos, "strlen", self.Pyx_strlen_func_type, string_node.pos, "strlen", self.Pyx_strlen_func_type,
args = [string_node], args = [string_node],
is_temp = False, is_temp = False,
utility_code = Builtin.include_string_h_utility_code, utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"),
).coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) ).coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
elif start: elif start:
stop = ExprNodes.SubNode( stop = ExprNodes.SubNode(
......
//////////////////// IncludeStringH.proto ////////////////////
#include <string.h>
//////////////////// BytesContains.proto ////////////////////
static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character); /*proto*/
//////////////////// BytesContains ////////////////////
static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character) {
const Py_ssize_t length = PyBytes_GET_SIZE(bytes);
char* char_start = PyBytes_AS_STRING(bytes);
char* pos;
for (pos=char_start; pos < char_start+length; pos++) {
if (character == pos[0]) return 1;
}
return 0;
}
//////////////////// PyUCS4InUnicode.proto ////////////////////
static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character); /*proto*/
static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character); /*proto*/
//////////////////// PyUCS4InUnicode ////////////////////
static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
#ifdef CYTHON_PEP393_ENABLED
Py_ssize_t i;
int kind;
void* udata;
Py_ssize_t length;
kind = PyUnicode_KIND(unicode);
if (likely(kind != PyUnicode_WCHAR_KIND)) {
udata = PyUnicode_DATA(unicode);
length = PyUnicode_GET_LENGTH(unicode);
for (i=0; i < length; i++) {
if (unlikely(character == PyUnicode_READ(kind, udata, i))) return 1;
}
return 0;
}
#endif
return __Pyx_PyUnicodeBufferContainsUCS4(
PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
character);
}
static CYTHON_INLINE int __Pyx_PyUnicodeBufferContainsUCS4(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) {
Py_UNICODE uchar;
Py_UNICODE* pos;
#if Py_UNICODE_SIZE == 2
if (character > 65535) {
/* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */
Py_UNICODE high_val, low_val;
high_val = (Py_UNICODE) (0xD800 | (((character - 0x10000) >> 10) & ((1<<10)-1)));
low_val = (Py_UNICODE) (0xDC00 | ( (character - 0x10000) & ((1<<10)-1)));
for (pos=buffer; pos < buffer+length-1; pos++) {
if (unlikely(high_val == pos[0]) & unlikely(low_val == pos[1])) return 1;
}
return 0;
}
#endif
uchar = (Py_UNICODE) character;
for (pos=buffer; pos < buffer+length; pos++) {
if (unlikely(uchar == pos[0])) return 1;
}
return 0;
}
//////////////////// StrEquals.proto ////////////////////
//@requires: BytesEquals
//@requires: UnicodeEquals
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
#else
#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
#endif
//////////////////// UnicodeEquals.proto ////////////////////
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
//////////////////// UnicodeEquals ////////////////////
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
if (s1 == s2) { /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
return (equals == Py_EQ);
} else if (PyUnicode_CheckExact(s1) & PyUnicode_CheckExact(s2)) {
#ifdef CYTHON_PEP393_ENABLED
if ((PyUnicode_READY(s1) < 0) || (PyUnicode_READY(s2) < 0))
return -1;
if (PyUnicode_GET_LENGTH(s1) != PyUnicode_GET_LENGTH(s2)) {
return (equals == Py_NE);
} else if (PyUnicode_GET_LENGTH(s1) == 1) {
Py_UCS4 ch1 = PyUnicode_READ_CHAR(s1, 0);
Py_UCS4 ch2 = PyUnicode_READ_CHAR(s2, 0);
return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
//// currently disabled: may not be safe depending on who created the string
// } else if (PyUnicode_MAX_CHAR_VALUE(s1) != PyUnicode_MAX_CHAR_VALUE(s2)) {
// return (equals == Py_NE);
#else
if (PyUnicode_GET_SIZE(s1) != PyUnicode_GET_SIZE(s2)) {
return (equals == Py_NE);
} else if (PyUnicode_GET_SIZE(s1) == 1) {
Py_UNICODE ch1 = PyUnicode_AS_UNICODE(s1)[0];
Py_UNICODE ch2 = PyUnicode_AS_UNICODE(s2)[0];
return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2);
#endif
} else {
int result = PyUnicode_Compare(s1, s2);
if ((result == -1) && unlikely(PyErr_Occurred()))
return -1;
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & PyUnicode_CheckExact(s2)) {
return (equals == Py_NE);
} else if ((s2 == Py_None) & PyUnicode_CheckExact(s1)) {
return (equals == Py_NE);
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
}
//////////////////// BytesEquals.proto ////////////////////
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
//////////////////// BytesEquals ////////////////////
//@requires: IncludeStringH
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
if (s1 == s2) { /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
return (equals == Py_EQ);
} else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
if (PyBytes_GET_SIZE(s1) != PyBytes_GET_SIZE(s2)) {
return (equals == Py_NE);
} else if (PyBytes_GET_SIZE(s1) == 1) {
if (equals == Py_EQ)
return (PyBytes_AS_STRING(s1)[0] == PyBytes_AS_STRING(s2)[0]);
else
return (PyBytes_AS_STRING(s1)[0] != PyBytes_AS_STRING(s2)[0]);
} else {
int result = memcmp(PyBytes_AS_STRING(s1), PyBytes_AS_STRING(s2), (size_t)PyBytes_GET_SIZE(s1));
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
return (equals == Py_NE);
} else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
return (equals == Py_NE);
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment