Commit 300d7dfa authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: Fix bstr/ustr slice access on py2

In the patch "golang_str: bstr/ustr index access" we added __getitem__
implementation for bstr/ustr and thorough corresponding tests to cover
all access cases: [i], [i:j] and [i:j:k].

The tests, however, are run via pytest which does AST rewriting, and, as
it turned out, always invokes __getitem__ even for [i:j] case even on py2.
Which differs from plain python2 behaviour to invoke __getslice__ for
[i:j] case if __getslice__ slot is present.

Since on py2 both str and unicode provide __getslice__ implementation,
and bstr/ustr inherit from those types, they also inherit __getslice__.
And oops, then on py2 e.g. bstr[i:j] was returning str instead of bstr:

    In [1]: bs = b('αβγ')

    In [2]: bs
    Out[2]: b('αβγ')

    In [3]: bs[0]
    Out[3]: b(b'\xce')

    In [4]: bs[0:1]
    Out[4]: '\xce'              <-- NOTE not b(...)

    In [5]: type(_)
    Out[5]: str                 <-- NOTE not bstr

-> Fix it by explicitly whiting out __getslice__ slot for bstr and ustr.
parent 859a55eb
......@@ -51,6 +51,7 @@ cdef extern from "Python.h":
ctypedef struct PySequenceMethods:
binaryfunc sq_concat
binaryfunc sq_inplace_concat
object (*sq_slice) (object, Py_ssize_t, Py_ssize_t) # present only on py2
from libc.stdint cimport uint8_t
......@@ -928,6 +929,15 @@ IF PY2:
(<_PyTypeObject_Print*>Py_TYPE(pybstr())) .tp_print = _pybstr_tp_print
# whiteout .sq_slice for pybstr/pyustr inherited from str/unicode.
# This way slice access always goes through our __getitem__ implementation.
# If we don't do this e.g. bstr[:] will be handled by str.__getslice__ instead
# of bstr.__getitem__, and will return str instead of bstr.
if PY2:
(<_XPyTypeObject*>pybstr) .tp_as_sequence.sq_slice = NULL
(<_XPyTypeObject*>pyustr) .tp_as_sequence.sq_slice = NULL
# _bpysmartquote_u3b2 quotes bytes/bytearray s the same way python would do for string.
#
# nonascii_escape indicates whether \xNN with NN >= 0x80 is present in the output.
......
......@@ -577,6 +577,19 @@ def test_strings_index():
assert U.endswith(("α","β","мир")) == True
assert B.endswith(("α","β","мир")) == True
def test_strings_index2():
# test_strings_index verifies __getitem__ thoroughly, but on py2
# for [x:y] access plain python uses __getslice__ if present, while
# pytest, because it does AST rewriting, calls __getitem__. This
# way [x:y] handling remains untested if verified only via pytest.
# -> test it also via running external program via plain python.
outok = readfile(dir_testprog + "/golang_test_str_index2.txt")
retcode, stdout, stderr = _pyrun(["golang_test_str_index2.py"],
cwd=dir_testprog, stdout=PIPE, stderr=PIPE)
assert retcode == 0, (stdout, stderr)
assert stderr == b""
assertDoc(outok, stdout)
# verify strings iteration.
def test_strings_iter():
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""This program helps to verify [:] handling for bstr and ustr.
It complements golang_str_test.test_strings_index2.
It needs to verify [:] only lightly because thorough verification is done in
test_string_index, and here we need to verify only that __getslice__, inherited
from builtin str/unicode, does not get into our way.
"""
from __future__ import print_function, absolute_import
from golang import b, u
def main():
us = u("миру мир")
bs = b("миру мир")
def emit(what, uobj, bobj):
print("u"+what, repr(uobj))
print("b"+what, repr(bobj))
emit("s", us, bs)
emit("s[:]", us[:], bs[:])
emit("s[0:1]", us[0:1], bs[0:1])
emit("s[0:2]", us[0:2], bs[0:2])
emit("s[1:2]", us[1:2], bs[1:2])
emit("s[0:-1]", us[0:-1], bs[0:-1])
if __name__ == '__main__':
main()
us u('миру мир')
bs b('миру мир')
us[:] u('миру мир')
bs[:] b('миру мир')
us[0:1] u('м')
bs[0:1] b(b'\xd0')
us[0:2] u('ми')
bs[0:2] b('м')
us[1:2] u('и')
bs[1:2] b(b'\xbc')
us[0:-1] u('миру ми')
bs[0:-1] b(b'миру ми\xd1')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment