Commit 4546aaec authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: Switch bstr/ustr to cdef classes

For gpython to switch builtin str/unicode to bstr/ustr we will need
bstr/ustr to have exactly the same C layout as builtin string types.
This is possible to achieve only via `cdef class`. It is also good to
switch to `cdef class` for RAM savings - from https://github.com/cython/cython/pull/5212#issuecomment-1387659026 :

    # what Cython does at runtime for `class MyBytes(bytes)`
    In [3]: MyBytes = type('MyBytes', (bytes,), {'__slots__': ()})

    In [4]: MyBytes
    Out[4]: __main__.MyBytes

    In [5]: a = bytes(b'123')

    In [6]: b = MyBytes(b'123')

    In [7]: a
    Out[7]: b'123'

    In [8]: b
    Out[8]: b'123'

    In [9]: a == b
    Out[9]: True

    In [10]: import sys

    In [11]: sys.getsizeof(a)
    Out[11]: 36

    In [12]: sys.getsizeof(b)
    Out[12]: 52

So with `cdef class` we gain more control and optimize memory usage.

This was not done before because cython forbids to `cdef class X(bytes)` due to
https://github.com/cython/cython/issues/711. We work it around in setup.py with
draft for proper patch pre-posted to upstream in https://github.com/cython/cython/pull/5212 .
parent 21fab975
This diff is collapsed.
......@@ -31,7 +31,7 @@ import sys
import six
from six import text_type as unicode, unichr
from six.moves import range as xrange
import re, pickle, copy, types
import gc, re, pickle, copy, types
import array, collections
......@@ -284,6 +284,25 @@ def test_strings_basic():
bs.hello = 1
# verify that bstr/ustr are created with correct refcount.
def test_strings_refcount():
# first verify our logic on std type
obj = xbytes(u'abc'); assert type(obj) is bytes
gc.collect(); assert sys.getrefcount(obj) == 1+1 # +1 due to obj passed to getrefcount call
# bstr
obj = b('abc'); assert type(obj) is bstr
gc.collect(); assert sys.getrefcount(obj) == 1+1
obj = bstr('abc'); assert type(obj) is bstr
gc.collect(); assert sys.getrefcount(obj) == 1+1
# ustr
obj = u('abc'); assert type(obj) is ustr
gc.collect(); assert sys.getrefcount(obj) == 1+1
obj = ustr('abc'); assert type(obj) is ustr
gc.collect(); assert sys.getrefcount(obj) == 1+1
# verify memoryview(bstr|ustr).
def test_strings_memoryview():
bs = b('мир')
......
......@@ -18,6 +18,25 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
# patch cython to allow `cdef class X(bytes)` while building pygolang to
# workaround https://github.com/cython/cython/issues/711
# see `cdef class pybstr` in golang/_golang_str.pyx for details.
# (should become unneeded with cython 3 once https://github.com/cython/cython/pull/5212 is finished)
import inspect
from Cython.Compiler.PyrexTypes import BuiltinObjectType
def pygo_cy_builtin_type_name_set(self, v):
self._pygo_name = v
def pygo_cy_builtin_type_name_get(self):
name = self._pygo_name
if name == 'bytes':
caller = inspect.currentframe().f_back.f_code.co_name
if caller == 'analyse_declarations':
# need anything different from 'bytes' to deactivate check in
# https://github.com/cython/cython/blob/c21b39d4/Cython/Compiler/Nodes.py#L4759-L4762
name = 'xxx'
return name
BuiltinObjectType.name = property(pygo_cy_builtin_type_name_get, pygo_cy_builtin_type_name_set)
from setuptools import find_packages
from setuptools.command.install_scripts import install_scripts as _install_scripts
from setuptools.command.develop import develop as _develop
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment