Commit 1be4d730 authored by Kirill Smelkov's avatar Kirill Smelkov

XY bigfile/py: Garbage-collect BigFile <=> BigFileH cycles

Since ZBigFile keeps references to fileh objects that are created
through it it forms a file <=> fileh cycle that is not collected without
cyclic GC:

https://lab.nexedi.com/nexedi/wendelin.core/blob/v0.13-52-ga702d41/bigfile/file_zodb.py#L497
https://lab.nexedi.com/nexedi/wendelin.core/blob/v0.13-52-ga702d41/bigfile/file_zodb.py#L566-571

We did not noticed this leak until now because it is small, but with
upcoming wendelin.core 2 it is important to release a fileh, because
there is WCFS connection associated with fileh, and if fileh is not
released, that connection also stays alive, keeping on-WCFS resources
still being used, and preventing WCFS from being unmounted cleanly.

-> Add cyclic GC support to PyBigFile / PyBigFileH

NOTE: we still don't allow PyVMA <=> PyBigFileH cycles to be collected,
because fileh_close called from fileh.__del__ asserts that there are no
live mappings left. See added comments for details. There is no
known practical need to use such cycles, so this should be ok.

See also other patches on cyclic GC topic:

- 450ad804 (bigarray: ArrayRef support for BigArray)  // adds cyclic GC support for PyVMA
- d97641d2 (bigfile/py: Properly untrack PyVMA from GC before dealloc)
parent 21913d02
...@@ -26,11 +26,14 @@ ...@@ -26,11 +26,14 @@
* *
* NOTE on refcounting - who holds who: * NOTE on refcounting - who holds who:
* *
* vma -> fileh -> file * vma -> fileh -> file(*)
* ^ | * ^ |
* +---------+ * +---------+
* fileh->mmaps (kind of weak) * fileh->mmaps (kind of weak)
* *
* (*) PyBigFile is intended to be used as subclass, whose children can add
* references from file to other objects. In particular ZBigFile keeps
* references to fileh that were created through it.
* *
* NOTE virtmem/bigfile functions release/reacquire GIL (see virt_lock()) - * NOTE virtmem/bigfile functions release/reacquire GIL (see virt_lock()) -
* thus functions that use them cannot assume they run mutually exclusive to * thus functions that use them cannot assume they run mutually exclusive to
...@@ -165,11 +168,20 @@ pyvma_len(PyObject *pyvma0) ...@@ -165,11 +168,20 @@ pyvma_len(PyObject *pyvma0)
} }
/* pyvma vs cyclic GC */ /* pyvma vs cyclic GC */ // XXX move closer to pyvma_dealloc
static int static int
pyvma_traverse(PyObject *pyvma0, visitproc visit, void *arg) pyvma_traverse(PyObject *pyvma0, visitproc visit, void *arg)
{ {
PyVMA *pyvma = container_of(pyvma0, PyVMA, pyobj); PyVMA *pyvma = container_of(pyvma0, PyVMA, pyobj);
#if 0
VMA *vma = &pyvma->vma;
BigFileH *fileh = vma->fileh;
if (fileh) {
PyBigFileH *pyfileh = container_of(fileh, PyBigFileH, fileh);
Py_VISIT(&pyfileh->pyobj);
}
#endif
Py_VISIT(pyvma->pyuser); Py_VISIT(pyvma->pyuser);
return 0; return 0;
...@@ -180,6 +192,30 @@ pyvma_clear(PyObject *pyvma0) ...@@ -180,6 +192,30 @@ pyvma_clear(PyObject *pyvma0)
{ {
PyVMA *pyvma = container_of(pyvma0, PyVMA, pyobj); PyVMA *pyvma = container_of(pyvma0, PyVMA, pyobj);
/* NOTE don't clear vma->fileh: we need vma to be released first - else -
* if there would be vma <=> fileh cycle, it could be possible for Python
* to release fileh first, and then fileh_close called by fileh release
* would break with BUG asserting that there is no fileh mappings left.
* Protect py-level users from that. */
#if 0
VMA *vma = &pyvma->vma;
BigFileH *fileh = vma->fileh;
/* pyvma->fileh indicates whether vma was yet created (via fileh_mmap()) or not */
if (fileh) {
PyBigFileH *pyfileh = container_of(fileh, PyBigFileH, fileh);
/* NOTE calling vma_unmap in tp_clear - it is a bit hacky but ok.
* we have to call vma_unmap now becuase we'll reset vma->fileh=NULL next.
* pyvma_dealloc also calls pyvma_clear. */
vma_unmap(vma);
vma->fileh = NULL;
Py_DECREF(pyfileh);
}
#endif
Py_CLEAR(pyvma->pyuser); Py_CLEAR(pyvma->pyuser);
return 0; return 0;
} }
...@@ -231,6 +267,7 @@ pyvma_dealloc(PyObject *pyvma0) ...@@ -231,6 +267,7 @@ pyvma_dealloc(PyObject *pyvma0)
/* pyvma->fileh indicates whether vma was yet created (via fileh_mmap()) or not */ /* pyvma->fileh indicates whether vma was yet created (via fileh_mmap()) or not */
if (fileh) { if (fileh) {
vma_unmap(vma); vma_unmap(vma);
//vma->fileh = NULL;
PyBigFileH *pyfileh = container_of(fileh, PyBigFileH, fileh); PyBigFileH *pyfileh = container_of(fileh, PyBigFileH, fileh);
Py_DECREF(pyfileh); Py_DECREF(pyfileh);
...@@ -428,27 +465,67 @@ PyFunc(pyfileh_uses_mmap_overlay, "uses_mmap_overlay() - whether base data for a ...@@ -428,27 +465,67 @@ PyFunc(pyfileh_uses_mmap_overlay, "uses_mmap_overlay() - whether base data for a
} }
static void /* pyfileh vs cyclic GC */
pyfileh_dealloc(PyObject *pyfileh0) static int
pyfileh_traverse(PyObject *pyfileh0, visitproc visit, void *arg)
{ {
/* PyBigFileH does not support cyclic GC - no need to PyObject_GC_UnTrack it */
PyBigFileH *pyfileh = container_of(pyfileh0, PyBigFileH, pyobj); PyBigFileH *pyfileh = container_of(pyfileh0, PyBigFileH, pyobj);
BigFileH *fileh = &pyfileh->fileh; BigFileH *fileh = &pyfileh->fileh;
BigFile *file = fileh->file; BigFile *file = fileh->file;
PyBigFile *pyfile; PyBigFile *pyfile;
if (pyfileh->in_weakreflist) if (file) {
PyObject_ClearWeakRefs(&pyfileh->pyobj); pyfile = container_of(file, PyBigFile, file);
Py_VISIT(pyfile);
}
return 0;
}
static int
pyfileh_clear(PyObject *pyfileh0)
{
PyBigFileH *pyfileh = container_of(pyfileh0, PyBigFileH, pyobj);
BigFileH *fileh = &pyfileh->fileh;
BigFile *file = fileh->file;
PyBigFile *pyfile;
/* pyfileh->file indicates whether fileh was yet opened (via fileh_open()) or not */ /* pyfileh->file indicates whether fileh was yet opened (via fileh_open()) or not */
if (file) { if (file) {
pyfile = container_of(file, PyBigFile, file);
/* NOTE calling fileh_close in tp_clear - it is a bit hacky but ok.
* we have to call fileh_close now becuase we'll reset fileh->file=NULL next.
* pyfileh_dealloc also calls pyfileh_clear. */
fileh_close(fileh); fileh_close(fileh);
pyfile = container_of(file, PyBigFile, file); fileh->file = NULL;
Py_DECREF(pyfile); Py_DECREF(pyfile);
} }
return 0;
}
static void
pyfileh_dealloc(PyObject *pyfileh0)
{
/* PyBigFileH supports cyclic GC - first, before destructing, remove it from GC
* list to avoid segfaulting on double entry here - e.g. if GC is triggered
* from a weakref callback, or concurrently from another thread.
*
* See https://bugs.python.org/issue31095 for details */
PyObject_GC_UnTrack(pyfileh0);
PyBigFileH *pyfileh = container_of(pyfileh0, PyBigFileH, pyobj);
BigFileH *fileh = &pyfileh->fileh;
BigFile *file = fileh->file;
PyBigFile *pyfile;
if (pyfileh->in_weakreflist)
PyObject_ClearWeakRefs(&pyfileh->pyobj);
pyfileh_clear(&pyfileh->pyobj);
pyfileh->pyobj.ob_type->tp_free(&pyfileh->pyobj); pyfileh->pyobj.ob_type->tp_free(&pyfileh->pyobj);
} }
...@@ -482,7 +559,9 @@ static /*const*/ PyTypeObject PyBigFileH_Type = { ...@@ -482,7 +559,9 @@ static /*const*/ PyTypeObject PyBigFileH_Type = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "_bigfile.BigFileH", .tp_name = "_bigfile.BigFileH",
.tp_basicsize = sizeof(PyBigFileH), .tp_basicsize = sizeof(PyBigFileH),
.tp_flags = Py_TPFLAGS_DEFAULT, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
.tp_traverse = pyfileh_traverse,
.tp_clear = pyfileh_clear,
.tp_methods = pyfileh_methods, .tp_methods = pyfileh_methods,
.tp_dealloc = pyfileh_dealloc, .tp_dealloc = pyfileh_dealloc,
.tp_new = pyfileh_new, .tp_new = pyfileh_new,
...@@ -993,15 +1072,41 @@ pyfileh_open(PyObject *pyfile0, PyObject *args) ...@@ -993,15 +1072,41 @@ pyfileh_open(PyObject *pyfile0, PyObject *args)
return &pyfileh->pyobj; return &pyfileh->pyobj;
} }
/* pyfile vs cyclic GC */
static int
pyfile_traverse(PyObject *pyfile0, visitproc visit, void *arg)
{
PyBigFile *pyfile = container_of(pyfile0, PyBigFile, pyobj);
Py_VISIT(pyfile->blkmmapper);
return 0;
}
static int
pyfile_clear(PyObject *pyfile0)
{
PyBigFile *pyfile = container_of(pyfile0, PyBigFile, pyobj);
Py_CLEAR(pyfile->blkmmapper);
return 0;
}
static void static void
pyfile_dealloc(PyObject *pyfile0) pyfile_dealloc(PyObject *pyfile0)
{ {
/* PyBigFile supports cyclic GC - first, before destructing, remove it from GC
* list to avoid segfaulting on double entry here - e.g. if GC is triggered
* from a weakref callback, or concurrently from another thread.
*
* See https://bugs.python.org/issue31095 for details */
PyObject_GC_UnTrack(pyfile0);
PyBigFile *pyfile = container_of(pyfile0, PyBigFile, pyobj); PyBigFile *pyfile = container_of(pyfile0, PyBigFile, pyobj);
pyfile->blkmmap_ops = NULL; pyfile->blkmmap_ops = NULL;
Py_CLEAR(pyfile->blkmmapper);
pyfile_clear(&pyfile->pyobj);
pyfile->pyobj.ob_type->tp_free(&pyfile->pyobj); pyfile->pyobj.ob_type->tp_free(&pyfile->pyobj);
} }
...@@ -1082,7 +1187,9 @@ static PyTypeObject PyBigFile_Type = { ...@@ -1082,7 +1187,9 @@ static PyTypeObject PyBigFile_Type = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "_bigfile.BigFile", .tp_name = "_bigfile.BigFile",
.tp_basicsize = sizeof(PyBigFile), .tp_basicsize = sizeof(PyBigFile),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
.tp_traverse = pyfile_traverse,
.tp_clear = pyfile_clear,
.tp_methods = pyfile_methods, .tp_methods = pyfile_methods,
.tp_members = pyfile_members, .tp_members = pyfile_members,
.tp_dealloc = pyfile_dealloc, .tp_dealloc = pyfile_dealloc,
......
# Wendelin.core.bigfile | Basic tests # Wendelin.core.bigfile | Basic tests
# Copyright (C) 2014-2019 Nexedi SA and Contributors. # Copyright (C) 2014-2020 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -424,6 +424,52 @@ def test_vma_del_vs_gc(): ...@@ -424,6 +424,52 @@ def test_vma_del_vs_gc():
wvma = weakref.ref(vma, lambda _: gc.collect()) wvma = weakref.ref(vma, lambda _: gc.collect())
del vma del vma
# verify that BigFile / BigFileH properly support cyclic GC.
def test_bigfile_vs_cyclic_gc():
f = XBigFile(b'abcd', PS)
fh = f.fileh_open()
# keep reference f -> fh
# together with builtin fileh -> file reference this forms f <=> fh cycle
f._myfh = fh
vma = fh.mmap(0, 1)
# NOTE: vma <=> f cycle is not supported: fileh_close called by
# fileh.__del__ BUGs if there are still opened mmappings, and py-level
# users are protected from that via requiring vma to be collected first.
#
# keep reference f -> vma
# together with builtin vma -> fileh reference this forms another cycle
#
# vma -> fileh -> file
# ^ |
# +-------------------+
f._myvma = vma
wf = weakref.ref(f)
wfh = weakref.ref(fh)
wvma = weakref.ref(vma)
gc.collect()
assert wf() is not None
assert wfh() is not None
assert wvma() is not None
# unreference objects; nothing is collected because of vma <=> f cycle through f._myvma
del vma, fh, f
gc.collect()
assert wf() is not None
assert wfh() is not None
assert wvma() is not None
# after removing vma <=> f cycle manually, everything goes to GC
wf()._myvma = None
gc.collect()
assert wf() is None
assert wfh() is None
assert wvma() is None
# test that there is no crash after first store error on writeout # test that there is no crash after first store error on writeout
class StoreError(Exception): class StoreError(Exception):
pass pass
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment