Commit e5c513bf authored by Kirill Smelkov's avatar Kirill Smelkov

golang, strconv: Switch them to cimport each other at pyx level

Since 50b8cb7e (strconv: Move functionality related to UTF8
encode/decode into _golang_str) both golang_str and strconv import each
other.

Before this patch that import was done at py level at runtime from
outside to workaround the import cycle. This results in that strconv
functionality is not available while golang is only being imported.
So far it was not a problem, but when builtin string types will become
patched with bstr and ustr, that will become a problem because string
repr starts to be used at import time, which for pybstr is implemented
via strconv.quote .

-> Fix this by switching golang and strconv to cimport each other at pyx
level. There, similarly to C, the cycle works just ok out of the box.

This also automatically helps performance a bit:

    name                 old time/op  new time/op  delta
    quote[a]              805µs ± 0%   786µs ± 1%   -2.40%  (p=0.016 n=5+4)
    quote[\u03b1]        1.21ms ± 0%  1.12ms ± 0%   -7.47%  (p=0.008 n=5+5)
    quote[\u65e5]         785µs ± 0%   738µs ± 2%   -5.97%  (p=0.016 n=5+4)
    quote[\U0001f64f]    1.04ms ± 0%  0.92ms ± 1%  -11.73%  (p=0.008 n=5+5)
    stdquote             1.18µs ± 0%  1.19µs ± 0%   +0.54%  (p=0.008 n=5+5)
    unquote[a]           1.26ms ± 0%  1.08ms ± 0%  -14.66%  (p=0.008 n=5+5)
    unquote[\u03b1]       911µs ± 1%   797µs ± 0%  -12.55%  (p=0.008 n=5+5)
    unquote[\u65e5]       592µs ± 0%   522µs ± 0%  -11.81%  (p=0.008 n=5+5)
    unquote[\U0001f64f]  3.46ms ± 0%  3.21ms ± 0%   -7.34%  (p=0.008 n=5+5)
    stdunquote            812ns ± 1%   815ns ± 0%     ~     (p=0.183 n=5+5)
parent 2684dc94
...@@ -374,11 +374,3 @@ from ._golang import \ ...@@ -374,11 +374,3 @@ from ._golang import \
pyu as u, \ pyu as u, \
pyustr as ustr, \ pyustr as ustr, \
pyuchr as uchr pyuchr as uchr
# import golang.strconv into _golang from here to workaround cyclic golang ↔ strconv dependency
def _():
from . import _golang
from . import strconv
_golang.pystrconv = strconv
_()
del _
...@@ -43,7 +43,7 @@ In addition to Cython/nogil API, golang.pyx provides runtime for golang.py: ...@@ -43,7 +43,7 @@ In addition to Cython/nogil API, golang.pyx provides runtime for golang.py:
- Python-level channels are represented by pychan + pyselect. - Python-level channels are represented by pychan + pyselect.
- Python-level error is represented by pyerror. - Python-level error is represented by pyerror.
- Python-level panic is represented by pypanic. - Python-level panic is represented by pypanic.
- Python-level strings are represented by pybstr and pyustr. - Python-level strings are represented by pybstr/pyustr and pyb/pyu.
""" """
...@@ -269,4 +269,11 @@ cdef class pyerror(Exception): ...@@ -269,4 +269,11 @@ cdef class pyerror(Exception):
cdef object from_error (error err) # -> pyerror | None cdef object from_error (error err) # -> pyerror | None
# strings
cpdef pyb(s) # -> bstr
cpdef pyu(s) # -> ustr
cdef __pystr(object obj) cdef __pystr(object obj)
cdef (rune, int) _utf8_decode_rune(const byte[::1] s)
cdef unicode _xunichr(rune i)
...@@ -72,7 +72,7 @@ from cython cimport no_gc ...@@ -72,7 +72,7 @@ from cython cimport no_gc
from libc.stdio cimport FILE from libc.stdio cimport FILE
pystrconv = None # = golang.strconv imported at runtime (see __init__.py) from golang cimport strconv
import string as pystring import string as pystring
import types as pytypes import types as pytypes
import functools as pyfunctools import functools as pyfunctools
...@@ -97,7 +97,7 @@ pybstr = _pybstr # initially point to -> _pybstr/_pyustr ...@@ -97,7 +97,7 @@ pybstr = _pybstr # initially point to -> _pybstr/_pyustr
pyustr = _pyustr # TODO -> cdef for speed pyustr = _pyustr # TODO -> cdef for speed
def pyb(s): # -> bstr cpdef pyb(s): # -> bstr
"""b converts object to bstr. """b converts object to bstr.
- For bstr the same object is returned. - For bstr the same object is returned.
...@@ -118,7 +118,7 @@ def pyb(s): # -> bstr ...@@ -118,7 +118,7 @@ def pyb(s): # -> bstr
raise TypeError("b: invalid type %s" % type(s)) raise TypeError("b: invalid type %s" % type(s))
return bs return bs
def pyu(s): # -> ustr cpdef pyu(s): # -> ustr
"""u converts object to ustr. """u converts object to ustr.
- For ustr the same object is returned. - For ustr the same object is returned.
...@@ -1068,7 +1068,7 @@ cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape) ...@@ -1068,7 +1068,7 @@ cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
if (quote in s) and (b'"' not in s): if (quote in s) and (b'"' not in s):
quote = b'"' quote = b'"'
x, nonascii_escape = pystrconv._quote(s, quote) # raw bytes x, nonascii_escape = strconv._quote(s, quote) # raw bytes
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
return x, nonascii_escape return x, nonascii_escape
else: else:
...@@ -1093,7 +1093,7 @@ def pyqq(obj): ...@@ -1093,7 +1093,7 @@ def pyqq(obj):
# py3: str | bytes # py3: str | bytes
if not isinstance(obj, (unicode, bytes)): if not isinstance(obj, (unicode, bytes)):
obj = _bstringify(obj) obj = _bstringify(obj)
return pystrconv.quote(obj) return strconv.pyquote(obj)
...@@ -1875,16 +1875,12 @@ cdef extern from "Python.h": ...@@ -1875,16 +1875,12 @@ cdef extern from "Python.h":
from six import unichr # py2: unichr py3: chr from six import unichr # py2: unichr py3: chr
from six import int2byte as bchr # py2: chr py3: lambda x: bytes((x,)) from six import int2byte as bchr # py2: chr py3: lambda x: bytes((x,))
_py_rune_error = utf8.RuneError
cdef bint _ucs2_build = (sys.maxunicode == 0xffff) # ucs2 cdef bint _ucs2_build = (sys.maxunicode == 0xffff) # ucs2
assert _ucs2_build or sys.maxunicode >= 0x0010ffff # or ucs4 assert _ucs2_build or sys.maxunicode >= 0x0010ffff # or ucs4
# _utf8_decode_rune decodes next UTF8-character from byte string s. # _utf8_decode_rune decodes next UTF8-character from byte string s.
# #
# _utf8_decode_rune(s) -> (r, size) # _utf8_decode_rune(s) -> (r, size)
def _py_utf8_decode_rune(const byte[::1] s):
return _utf8_decode_rune(s)
cdef (rune, int) _utf8_decode_rune(const byte[::1] s): cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
if len(s) == 0: if len(s) == 0:
return utf8.RuneError, 0 return utf8.RuneError, 0
...@@ -2029,10 +2025,10 @@ else: ...@@ -2029,10 +2025,10 @@ else:
# #
# it works correctly even on ucs2 python builds, where ordinals >= 0x10000 are # it works correctly even on ucs2 python builds, where ordinals >= 0x10000 are
# represented as 2 unicode points. # represented as 2 unicode points.
if not _ucs2_build: cdef unicode _xunichr(rune i):
_xunichr = unichr if not _ucs2_build:
else: return unichr(i)
def _xunichr(i): else:
if i < 0x10000: if i < 0x10000:
return unichr(i) return unichr(i)
......
...@@ -19,3 +19,8 @@ ...@@ -19,3 +19,8 @@
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
"""Package strconv provides Go-compatible string conversions.""" """Package strconv provides Go-compatible string conversions."""
from golang cimport byte
cpdef pyquote(s)
cdef _quote(s, quote) # -> (quoted, nonascii_escape)
...@@ -25,16 +25,17 @@ from __future__ import print_function, absolute_import ...@@ -25,16 +25,17 @@ from __future__ import print_function, absolute_import
import unicodedata, codecs import unicodedata, codecs
from six.moves import range as xrange from six.moves import range as xrange
from golang import b from golang cimport pyb
from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr from golang cimport _utf8_decode_rune, _xunichr
from golang.unicode cimport utf8
# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ". # quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
def quote(s): # -> bstr cpdef pyquote(s): # -> bstr
q, _ = _quote(b(s), b'"') q, _ = _quote(pyb(s), b'"')
return b(q) return pyb(q)
def _quote(s, quote): # -> (quoted, nonascii_escape) cdef _quote(s, quote): # -> (quoted, nonascii_escape)
assert isinstance(s, bytes), type(s) assert isinstance(s, bytes), type(s)
assert isinstance(quote, bytes), type(quote) assert isinstance(quote, bytes), type(quote)
assert len(quote) == 1, repr(quote) assert len(quote) == 1, repr(quote)
...@@ -74,7 +75,7 @@ def _quote(s, quote): # -> (quoted, nonascii_escape) ...@@ -74,7 +75,7 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
isize = i + size isize = i + size
# decode error - just emit raw byte as escaped # decode error - just emit raw byte as escaped
if r == _rune_error and size == 1: if r == utf8.RuneError and size == 1:
nonascii_escape = True nonascii_escape = True
emit(br'\x%02x' % ord(c)) emit(br'\x%02x' % ord(c))
...@@ -96,8 +97,8 @@ def _quote(s, quote): # -> (quoted, nonascii_escape) ...@@ -96,8 +97,8 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
# unquote decodes "-quoted unicode|byte string. # unquote decodes "-quoted unicode|byte string.
# #
# ValueError is raised if there are quoting syntax errors. # ValueError is raised if there are quoting syntax errors.
def unquote(s): # -> bstr def pyunquote(s): # -> bstr
us, tail = unquote_next(s) us, tail = pyunquote_next(s)
if len(tail) != 0: if len(tail) != 0:
raise ValueError('non-empty tail after closing "') raise ValueError('non-empty tail after closing "')
return us return us
...@@ -107,11 +108,11 @@ def unquote(s): # -> bstr ...@@ -107,11 +108,11 @@ def unquote(s): # -> bstr
# it returns -> (unquoted(s), tail-after-") # it returns -> (unquoted(s), tail-after-")
# #
# ValueError is raised if there are quoting syntax errors. # ValueError is raised if there are quoting syntax errors.
def unquote_next(s): # -> (bstr, bstr) def pyunquote_next(s): # -> (bstr, bstr)
us, tail = _unquote_next(b(s)) us, tail = _unquote_next(pyb(s))
return b(us), b(tail) return pyb(us), pyb(tail)
def _unquote_next(s): cdef _unquote_next(s):
assert isinstance(s, bytes) assert isinstance(s, bytes)
if len(s) == 0 or s[0:0+1] != b'"': if len(s) == 0 or s[0:0+1] != b'"':
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
from __future__ import print_function, absolute_import from __future__ import print_function, absolute_import
from golang._strconv import \ from golang._strconv import \
quote, \ pyquote as quote, \
_quote, \ pyunquote as unquote, \
unquote, \ pyunquote_next as unquote_next
unquote_next
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment