Commit 1e6c876f authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: fix UCS2 builds

It became broken after 50b8cb7e (strconv: Move functionality related to UTF8 encode/decode into _golang_str):

    + ./trun python -m pytest -vvsx golang/golang_str_test.py
    ==================================== test session starts =====================================
    platform linux2 -- Python 2.7.18, pytest-4.6.11, py-1.11.0, pluggy-0.13.1 -- /home/kirr/src/tools/go/py2d.venv2023/bin/python
    cachedir: .pytest_cache
    rootdir: /home/kirr/src/tools/go/pygolang-xgpystr
    collected 64 items

    golang/golang_str_test.py::test_strings_basic Traceback (most recent call last):
      File "golang/_golang_str.pyx", line 2270, in golang._golang._xuniord
        return ord(u)
    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
    Exception ValueError: 'only single character unicode strings can be converted to Py_UCS4, got length 2' in 'golang._golang._utf8_decode_rune' ignored

    (py2d.venv2023) kirr@deca:~/src/tools/go/pygolang-xgpystr$ python
    Python 2.7.18 (tags/2.7-dirty:8d21aa21f2c, Mar 30 2023, 07:38:40)
    [GCC 10.2.1 20210110] on linux2
    Type "help", "copyright", "credits" or "license" for more information.
    >>> from pygolang import *
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    ImportError: No module named pygolang
    >>> from golang import *
    >>> ord('xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(b'xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(u'xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(b('xy'))
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(u('xy'))
    Traceback (most recent call last):
      File "golang/_golang_str.pyx", line 2270, in golang._golang._xuniord
        return ord(u)
    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
    Exception ValueError: 'only single character unicode strings can be converted to Py_UCS4, got length 2' in 'golang._golang._utf8_decode_rune' ignored
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "golang/_golang_str.pyx", line 157, in golang._golang.pyu
        us = _pyu(pyustr, s)
      File "golang/_golang_str.pyx", line 195, in golang._golang._pyu
        s = _utf8_decode_surrogateescape(s)
      File "golang/_golang_str.pyx", line 2198, in golang._golang._utf8_decode_surrogateescape
        emit(_xunichr(r))
      File "golang/_golang_str.pyx", line 2286, in golang._golang._xunichr
        return unichr(0xd800 + (uh >> 10)) + \
    ValueError: unichr() arg not in range(0x10000) (narrow Python build)

/proposed-for-review-on nexedi/pygolang!25
parent d4866c4a
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Copyright (C) 2018-2023 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -221,8 +221,9 @@ cdef (int, int) _utf8_decode_rune(const uint8_t[::1] s):
if _ucs2_build and len(r) == 2:
try:
return _xuniord(r), l
# e.g. TypeError: ord() expected a character, but string of length 2 found
except TypeError:
# py: TypeError: ord() expected a character, but string of length 2 found
# cy: ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
except (TypeError, ValueError):
l -= 1
continue
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment