Commit e5c513bf authored by Kirill Smelkov's avatar Kirill Smelkov

golang, strconv: Switch them to cimport each other at pyx level

Since 50b8cb7e (strconv: Move functionality related to UTF8
encode/decode into _golang_str) both golang_str and strconv import each
other.

Before this patch that import was done at py level at runtime from
outside to workaround the import cycle. This results in that strconv
functionality is not available while golang is only being imported.
So far it was not a problem, but when builtin string types will become
patched with bstr and ustr, that will become a problem because string
repr starts to be used at import time, which for pybstr is implemented
via strconv.quote .

-> Fix this by switching golang and strconv to cimport each other at pyx
level. There, similarly to C, the cycle works just ok out of the box.

This also automatically helps performance a bit:

    name                 old time/op  new time/op  delta
    quote[a]              805µs ± 0%   786µs ± 1%   -2.40%  (p=0.016 n=5+4)
    quote[\u03b1]        1.21ms ± 0%  1.12ms ± 0%   -7.47%  (p=0.008 n=5+5)
    quote[\u65e5]         785µs ± 0%   738µs ± 2%   -5.97%  (p=0.016 n=5+4)
    quote[\U0001f64f]    1.04ms ± 0%  0.92ms ± 1%  -11.73%  (p=0.008 n=5+5)
    stdquote             1.18µs ± 0%  1.19µs ± 0%   +0.54%  (p=0.008 n=5+5)
    unquote[a]           1.26ms ± 0%  1.08ms ± 0%  -14.66%  (p=0.008 n=5+5)
    unquote[\u03b1]       911µs ± 1%   797µs ± 0%  -12.55%  (p=0.008 n=5+5)
    unquote[\u65e5]       592µs ± 0%   522µs ± 0%  -11.81%  (p=0.008 n=5+5)
    unquote[\U0001f64f]  3.46ms ± 0%  3.21ms ± 0%   -7.34%  (p=0.008 n=5+5)
    stdunquote            812ns ± 1%   815ns ± 0%     ~     (p=0.183 n=5+5)
parent 2684dc94
......@@ -374,11 +374,3 @@ from ._golang import \
pyu as u, \
pyustr as ustr, \
pyuchr as uchr
# import golang.strconv into _golang from here to workaround cyclic golang ↔ strconv dependency
def _():
from . import _golang
from . import strconv
_golang.pystrconv = strconv
_()
del _
......@@ -43,7 +43,7 @@ In addition to Cython/nogil API, golang.pyx provides runtime for golang.py:
- Python-level channels are represented by pychan + pyselect.
- Python-level error is represented by pyerror.
- Python-level panic is represented by pypanic.
- Python-level strings are represented by pybstr and pyustr.
- Python-level strings are represented by pybstr/pyustr and pyb/pyu.
"""
......@@ -269,4 +269,11 @@ cdef class pyerror(Exception):
cdef object from_error (error err) # -> pyerror | None
# strings
cpdef pyb(s) # -> bstr
cpdef pyu(s) # -> ustr
cdef __pystr(object obj)
cdef (rune, int) _utf8_decode_rune(const byte[::1] s)
cdef unicode _xunichr(rune i)
......@@ -72,7 +72,7 @@ from cython cimport no_gc
from libc.stdio cimport FILE
pystrconv = None # = golang.strconv imported at runtime (see __init__.py)
from golang cimport strconv
import string as pystring
import types as pytypes
import functools as pyfunctools
......@@ -97,7 +97,7 @@ pybstr = _pybstr # initially point to -> _pybstr/_pyustr
pyustr = _pyustr # TODO -> cdef for speed
def pyb(s): # -> bstr
cpdef pyb(s): # -> bstr
"""b converts object to bstr.
- For bstr the same object is returned.
......@@ -118,7 +118,7 @@ def pyb(s): # -> bstr
raise TypeError("b: invalid type %s" % type(s))
return bs
def pyu(s): # -> ustr
cpdef pyu(s): # -> ustr
"""u converts object to ustr.
- For ustr the same object is returned.
......@@ -1068,7 +1068,7 @@ cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
if (quote in s) and (b'"' not in s):
quote = b'"'
x, nonascii_escape = pystrconv._quote(s, quote) # raw bytes
x, nonascii_escape = strconv._quote(s, quote) # raw bytes
if PY_MAJOR_VERSION < 3:
return x, nonascii_escape
else:
......@@ -1093,7 +1093,7 @@ def pyqq(obj):
# py3: str | bytes
if not isinstance(obj, (unicode, bytes)):
obj = _bstringify(obj)
return pystrconv.quote(obj)
return strconv.pyquote(obj)
......@@ -1875,16 +1875,12 @@ cdef extern from "Python.h":
from six import unichr # py2: unichr py3: chr
from six import int2byte as bchr # py2: chr py3: lambda x: bytes((x,))
_py_rune_error = utf8.RuneError
cdef bint _ucs2_build = (sys.maxunicode == 0xffff) # ucs2
assert _ucs2_build or sys.maxunicode >= 0x0010ffff # or ucs4
# _utf8_decode_rune decodes next UTF8-character from byte string s.
#
# _utf8_decode_rune(s) -> (r, size)
def _py_utf8_decode_rune(const byte[::1] s):
return _utf8_decode_rune(s)
cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
if len(s) == 0:
return utf8.RuneError, 0
......@@ -2029,10 +2025,10 @@ else:
#
# it works correctly even on ucs2 python builds, where ordinals >= 0x10000 are
# represented as 2 unicode points.
if not _ucs2_build:
_xunichr = unichr
else:
def _xunichr(i):
cdef unicode _xunichr(rune i):
if not _ucs2_build:
return unichr(i)
else:
if i < 0x10000:
return unichr(i)
......
......@@ -19,3 +19,8 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""Package strconv provides Go-compatible string conversions."""
from golang cimport byte
cpdef pyquote(s)
cdef _quote(s, quote) # -> (quoted, nonascii_escape)
......@@ -25,16 +25,17 @@ from __future__ import print_function, absolute_import
import unicodedata, codecs
from six.moves import range as xrange
from golang import b
from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
from golang cimport pyb
from golang cimport _utf8_decode_rune, _xunichr
from golang.unicode cimport utf8
# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
def quote(s): # -> bstr
q, _ = _quote(b(s), b'"')
return b(q)
cpdef pyquote(s): # -> bstr
q, _ = _quote(pyb(s), b'"')
return pyb(q)
def _quote(s, quote): # -> (quoted, nonascii_escape)
cdef _quote(s, quote): # -> (quoted, nonascii_escape)
assert isinstance(s, bytes), type(s)
assert isinstance(quote, bytes), type(quote)
assert len(quote) == 1, repr(quote)
......@@ -74,7 +75,7 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
isize = i + size
# decode error - just emit raw byte as escaped
if r == _rune_error and size == 1:
if r == utf8.RuneError and size == 1:
nonascii_escape = True
emit(br'\x%02x' % ord(c))
......@@ -96,8 +97,8 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
# unquote decodes "-quoted unicode|byte string.
#
# ValueError is raised if there are quoting syntax errors.
def unquote(s): # -> bstr
us, tail = unquote_next(s)
def pyunquote(s): # -> bstr
us, tail = pyunquote_next(s)
if len(tail) != 0:
raise ValueError('non-empty tail after closing "')
return us
......@@ -107,11 +108,11 @@ def unquote(s): # -> bstr
# it returns -> (unquoted(s), tail-after-")
#
# ValueError is raised if there are quoting syntax errors.
def unquote_next(s): # -> (bstr, bstr)
us, tail = _unquote_next(b(s))
return b(us), b(tail)
def pyunquote_next(s): # -> (bstr, bstr)
us, tail = _unquote_next(pyb(s))
return pyb(us), pyb(tail)
def _unquote_next(s):
cdef _unquote_next(s):
assert isinstance(s, bytes)
if len(s) == 0 or s[0:0+1] != b'"':
......
......@@ -22,7 +22,6 @@
from __future__ import print_function, absolute_import
from golang._strconv import \
quote, \
_quote, \
unquote, \
unquote_next
pyquote as quote, \
pyunquote as unquote, \
pyunquote_next as unquote_next
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment