Commit 41d4a4f8 authored by Kirill Smelkov's avatar Kirill Smelkov

bigfile/virtmem: Do storeblk() with virtmem lock released

to avoid deadlocks.

Description is in the last patch.

Fixes: nexedi/wendelin.core#6

/cc @Tyagov, @klaus, @jm
/reviewed-on nexedi/wendelin.core!2
parents e73e22ea fb4bfb32
......@@ -319,8 +319,8 @@ PyFunc(pyfileh_isdirty, "isdirty() - are there any changes to fileh memory at al
if (!PyArg_ParseTuple(args, ""))
return NULL;
/* NOTE not strictly neccessary to virt_lock() for reading ->dirty */
return PyBool_FromLong(pyfileh->dirty);
/* NOTE not strictly neccessary to virt_lock() for checking ->dirty_pages not empty */
return PyBool_FromLong(!list_empty(&pyfileh->dirty_pages));
}
......
......@@ -51,6 +51,7 @@ Page *ramh_alloc_page(RAMH *ramh, pgoff_t pgoffset_hint)
page->ramh = ramh;
page->ramh_pgoffset = ramh_pgoffset;
INIT_LIST_HEAD(&page->lru); /* NOTE ->lru left unlinked */
INIT_LIST_HEAD(&page->in_dirty); /* initially not in dirty list */
page->refcnt = 0;
return page;
......
......@@ -15,7 +15,7 @@
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
from wendelin.bigfile import BigFile
from wendelin.bigfile import BigFile, WRITEOUT_STORE
from threading import Thread, Lock
from time import sleep
......@@ -68,7 +68,17 @@ PS = 2*MB
# V -> loadblk
# Z <- ClientStorage.invalidateTransaction()
# Z -> zeo.load
# V <- fileh_invalidate_page
# V <- fileh_invalidate_page (possibly of unrelated page)
#
# --------
# and similarly for storeblk:
#
# T1 T2
#
# commit same as ^^^
# V -> storeblk
#
# Z -> zeo.store
def test_thread_lock_vs_virtmem_lock():
Z = Lock()
c12 = NotifyChannel() # T1 -> T2
......@@ -77,14 +87,12 @@ def test_thread_lock_vs_virtmem_lock():
class ZLockBigFile(BigFile):
def __new__(cls, blksize):
obj = BigFile.__new__(cls, blksize)
obj.cycle = 0
return obj
def loadblk(self, blk, buf):
def Zsync_and_lockunlock(self):
tell, wait = c12.tell, c21.wait
# on the first cycle we synchronize with invalidate in T2
if self.cycle == 0:
# synchronize with invalidate in T2
tell('T1-V-under')
wait('T2-Z-taken')
......@@ -93,27 +101,42 @@ def test_thread_lock_vs_virtmem_lock():
Z.acquire()
Z.release()
self.cycle += 1
def loadblk(self, blk, buf):
self.Zsync_and_lockunlock()
def storeblk(self, blk, buf):
self.Zsync_and_lockunlock()
f = ZLockBigFile(PS)
fh = f.fileh_open()
fh2 = f.fileh_open()
vma = fh.mmap(0, 1)
m = memoryview(vma)
def T1():
m[0] # calls ZLockBigFile.loadblk()
tell, wait = c12.tell, c21.wait
wait('T2-Z-released')
m[0] = bord_py3(b'1') # make page dirty
fh.dirty_writeout(WRITEOUT_STORE) # calls ZLockBigFile.storeblk()
def T2():
tell, wait = c21.tell, c12.wait
# cycle 0: vs loadblk in T0
# cycle 1: vs storeblk in T0
for _ in range(2):
wait('T1-V-under')
Z.acquire()
tell('T2-Z-taken')
fh.invalidate_page(0)
fh2.invalidate_page(0) # NOTE invalidating page _not_ of fh
Z.release()
tell('T2-Z-released')
t1, t2 = Thread(target=T1), Thread(target=T2)
t1.start(); t2.start()
......@@ -185,7 +208,7 @@ def test_thread_multiaccess_parallel():
t1.join(); t2.join()
# loading vs invalidate in another thread
# loading vs invalidate of same page in another thread
def test_thread_load_vs_invalidate():
c12 = NotifyChannel() # T1 -> T2
c21 = NotifyChannel() # T2 -> T1
......
This diff is collapsed.
......@@ -149,6 +149,8 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram)
fileh->file = file;
INIT_LIST_HEAD(&fileh->mmaps);
INIT_LIST_HEAD(&fileh->dirty_pages);
fileh->writeout_inprogress = 0;
pagemap_init(&fileh->pagemap, ilog2_exact(ram->pagesize));
out:
......@@ -171,6 +173,9 @@ void fileh_close(BigFileH *fileh)
// fileh, but mapping exists - real fileh release is delayed to last unmap ?
BUG_ON(!list_empty(&fileh->mmaps));
/* it's an error to close fileh while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
/* drop all pages (dirty or not) associated with this fileh */
pagemap_for_each(page, &fileh->pagemap) {
/* it's an error to close fileh to mapping of which an access is
......@@ -182,6 +187,8 @@ void fileh_close(BigFileH *fileh)
free(page);
}
BUG_ON(!list_empty(&fileh->dirty_pages));
/* and clear pagemap */
pagemap_clear(&fileh->pagemap);
......@@ -296,11 +303,24 @@ void vma_unmap(VMA *vma)
* WRITEOUT / DISCARD *
**********************/
/* helper for sorting dirty pages by ->f_pgoffset */
static int hpage_indirty_cmp_bypgoffset(struct list_head *hpage1, struct list_head *hpage2, void *_)
{
Page *page1 = list_entry(hpage1, typeof(*page1), in_dirty);
Page *page2 = list_entry(hpage2, typeof(*page2), in_dirty);
if (page1->f_pgoffset < page2->f_pgoffset)
return -1;
if (page1->f_pgoffset > page2->f_pgoffset)
return +1;
return 0;
}
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
{
Page *page;
BigFile *file = fileh->file;
struct list_head *hmmap;
struct list_head *hpage, *hpage_next, *hmmap;
sigset_t save_sigset;
int err = 0;
......@@ -312,12 +332,18 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
sigsegv_block(&save_sigset);
virt_lock();
/* concurrent writeouts are not allowed */
BUG_ON(fileh->writeout_inprogress);
fileh->writeout_inprogress = 1;
/* pages are stored (if stored) in sorted order */
if (flags & WRITEOUT_STORE)
list_sort(&fileh->dirty_pages, hpage_indirty_cmp_bypgoffset, NULL);
/* write out dirty pages */
pagemap_for_each(page, &fileh->pagemap) {
/* XXX we scan whole file pages which could be slow
* TODO -> maintain something like separate dirty_list ? */
if (page->state != PAGE_DIRTY)
continue;
list_for_each_safe(hpage, hpage_next, &fileh->dirty_pages) {
page = list_entry(hpage, typeof(*page), in_dirty);
BUG_ON(page->state != PAGE_DIRTY);
/* ->storeblk() */
if (flags & WRITEOUT_STORE) {
......@@ -325,34 +351,28 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
blk_t blk = page->f_pgoffset; // NOTE assumes blksize = pagesize
void *pagebuf;
int mapped_tmp = 0;
if (!page->refcnt) {
/* page not mmaped anywhere - mmap it temporarily somewhere */
/* mmap page temporarily somewhere
*
* ( we cannot use present page mapping in some vma directly,
* because while storeblk is called with virtmem lock released that
* mapping can go away ) */
pagebuf = page_mmap(page, NULL, PROT_READ);
TODO(!pagebuf); // XXX err
mapped_tmp = 1;
}
else {
/* some vma mmaps page - use that memory directly */
/* XXX this assumes there is small #vma and is ugly - in general it
* should be simpler via back-pointers from page? */
pagebuf = NULL;
list_for_each(hmmap, &fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
if (vma_page_ismapped(vma, page)) {
pagebuf = vma_page_addr(vma, page);
break;
}
}
BUG_ON(!pagebuf);
}
/* unlock virtmem before calling storeblk()
*
* that call is potentially slow and external code can take other
* locks. If that "other locks" are also taken before external code
* calls e.g. fileh_invalidate_page() in different codepath a deadlock
* can happen. (similar to loadblk case) */
virt_unlock();
err = file->file_ops->storeblk(file, blk, pagebuf);
if (mapped_tmp)
/* relock virtmem */
virt_lock();
xmunmap(pagebuf, page_size(page));
if (err)
......@@ -362,7 +382,7 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
/* page.state -> PAGE_LOADED and correct mappings RW -> R */
if (flags & WRITEOUT_MARKSTORED) {
page->state = PAGE_LOADED;
fileh->dirty--;
list_del_init(&page->in_dirty);
list_for_each(hmmap, &fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
......@@ -375,7 +395,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
/* if we successfully finished with markstored flag set - all dirty pages
* should become non-dirty */
if (flags & WRITEOUT_MARKSTORED)
BUG_ON(fileh->dirty);
BUG_ON(!list_empty(&fileh->dirty_pages));
fileh->writeout_inprogress = 0;
out:
virt_unlock();
......@@ -387,18 +409,23 @@ out:
void fileh_dirty_discard(BigFileH *fileh)
{
Page *page;
struct list_head *hpage, *hpage_next;
sigset_t save_sigset;
sigsegv_block(&save_sigset);
virt_lock();
/* XXX we scan whole file pages which could be slow
* TODO -> maintain something like separate dirty_list ? */
pagemap_for_each(page, &fileh->pagemap)
if (page->state == PAGE_DIRTY)
/* discard is not allowed to run in parallel to writeout */
BUG_ON(fileh->writeout_inprogress);
list_for_each_safe(hpage, hpage_next, &fileh->dirty_pages) {
page = list_entry(hpage, typeof(*page), in_dirty);
BUG_ON(page->state != PAGE_DIRTY);
page_drop_memory(page);
}
BUG_ON(fileh->dirty);
BUG_ON(!list_empty(&fileh->dirty_pages));
virt_unlock();
sigsegv_restore(&save_sigset);
......@@ -417,6 +444,9 @@ void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset)
sigsegv_block(&save_sigset);
virt_lock();
/* it's an error to invalidate fileh while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
page = pagemap_get(&fileh->pagemap, pgoffset);
if (page) {
/* for pages where loading is in progress, we just remove the page from
......@@ -639,7 +669,7 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write)
* that call is potentially slow and external code can take other
* locks. If that "other locks" are also taken before external code
* calls e.g. fileh_invalidate_page() in different codepath a deadlock
* can happen. */
* can happen. (similar to storeblk case) */
page->state = PAGE_LOADING;
virt_unlock();
......@@ -721,8 +751,12 @@ VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write)
}
// XXX also call page->markdirty() ?
if (newstate == PAGE_DIRTY && newstate != page->state)
fileh->dirty++;
if (newstate == PAGE_DIRTY && newstate != page->state) {
/* it is not allowed to modify pages while writeout is in progress */
BUG_ON(fileh->writeout_inprogress);
list_add_tail(&page->in_dirty, &fileh->dirty_pages);
}
page->state = max(page->state, newstate);
/* mark page as used recently */
......@@ -838,6 +872,8 @@ static void page_drop_memory(Page *page)
/* NOTE we try not to drop memory for loading-in-progress pages.
* so if this is called for such a page - it is a bug. */
BUG_ON(page->state == PAGE_LOADING);
/* same for storing-in-progress */
BUG_ON(page->fileh->writeout_inprogress && page->state == PAGE_DIRTY);
if (page->state == PAGE_EMPTY)
return;
......@@ -850,7 +886,7 @@ static void page_drop_memory(Page *page)
/* 2) release memory to ram */
ramh_drop_memory(page->ramh, page->ramh_pgoffset);
if (page->state == PAGE_DIRTY)
page->fileh->dirty--;
list_del_init(&page->in_dirty);
page->state = PAGE_EMPTY;
// XXX touch lru?
......
......@@ -65,10 +65,11 @@ struct BigFileH {
PageMap pagemap;
// XXX not sure we need this
// -> currently is used to know whether to join ZODB DataManager serving ZBigFile
// XXX maybe change into dirty_list in the future?
unsigned dirty;
/* fileh dirty pages */
struct list_head dirty_pages; /* _ -> page->in_dirty */
/* whether writeout is currently in progress */
int writeout_inprogress;
};
typedef struct BigFileH BigFileH;
......@@ -99,6 +100,9 @@ struct Page {
/* in recently-used pages for ramh->ram (ram->lru_list -> _) */
struct list_head lru;
/* in dirty pages for fileh (fileh->dirty_pages -> _) */
struct list_head in_dirty;
int refcnt; /* each mapping in a vma counts here */
};
typedef struct Page Page;
......@@ -152,6 +156,7 @@ int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);
/* close fileh
*
* it's an error to call fileh_close with existing mappings
* it's an error to call fileh_close while writeout for fileh is in progress
*/
void fileh_close(BigFileH *fileh);
......@@ -204,6 +209,12 @@ enum WriteoutFlags {
*
* No guarantee is made about atomicity - e.g. if this call fails, some
* pages could be written and some left in memory in dirty state.
*
* it's an error for a given fileh to call several fileh_dirty_writeout() in
* parallel.
*
* it's an error for a given fileh to modify its pages while writeout is in
* progress: until fileh_dirty_writeout(... | WRITEOUT_STORE) has finished.
*/
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);
......@@ -215,6 +226,9 @@ int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);
* - it is unmapped from all mmaps;
* - its content is discarded;
* - its backing memory is released to OS.
*
* it's an error for a given fileh to call fileh_dirty_discard() while writeout
* is in progress.
*/
void fileh_dirty_discard(BigFileH *fileh);
......@@ -229,6 +243,9 @@ void fileh_dirty_discard(BigFileH *fileh);
*
* ( Such invalidation is needed to synchronize fileh memory, when we know a
* file was changed externally )
*
* it's an error to call fileh_invalidate_page() while writeout for fileh is in
* progress.
*/
void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment