Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Joshua
wendelin.core
Commits
600167f7
Commit
600167f7
authored
Mar 20, 2019
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
314dbb0d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
235 additions
and
234 deletions
+235
-234
wcfs/wcfs.go
wcfs/wcfs.go
+235
-234
No files found.
wcfs/wcfs.go
View file @
600167f7
...
...
@@ -553,7 +553,7 @@ type blkLoadState struct {
err
error
}
// --------
------------------------
--------
// --------
3) ZODB invariant
--------
// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
...
...
@@ -594,6 +594,8 @@ func (_ *zodbCacheControl) PCacheClassify(obj zodb.IPersistent) zodb.PCachePolic
return
0
}
// -------- 4) ZODB invalidation -> OS cache --------
func
traceZWatch
(
format
string
,
argv
...
interface
{})
{
if
!
log
.
V
(
1
)
{
// XXX -> 2?
return
...
...
@@ -971,9 +973,239 @@ func (root *Root) mkrevfile(rev zodb.Tid, fid zodb.Oid) (_ *BigFile, release fun
return
xfrev
.
Node
()
.
(
*
BigFile
),
func
()
{
f
.
Close
()
},
nil
}
// ----------------------------------------
// -------- 7) FUSE read(#blk) --------
// /(head|<rev>)/bigfile/<bigfileX> -> Read serves reading bigfile data.
func
(
f
*
BigFile
)
Read
(
_
nodefs
.
File
,
dest
[]
byte
,
off
int64
,
fctx
*
fuse
.
Context
)
(
fuse
.
ReadResult
,
fuse
.
Status
)
{
f
.
head
.
zconnMu
.
RLock
()
defer
f
.
head
.
zconnMu
.
RUnlock
()
// cap read request to file size
end
:=
off
+
int64
(
len
(
dest
))
// XXX overflow?
if
end
>
f
.
size
{
end
=
f
.
size
}
if
end
<=
off
{
// XXX off >= size -> EINVAL? (but when size=0 kernel issues e.g. [0 +4K) read)
return
fuse
.
ReadResultData
(
nil
),
fuse
.
OK
}
// widen read request to be aligned with blksize granularity
// (we can load only whole ZBlk* blocks)
aoff
:=
off
-
(
off
%
f
.
blksize
)
aend
:=
end
if
re
:=
end
%
f
.
blksize
;
re
!=
0
{
aend
+=
f
.
blksize
-
re
}
dest
=
make
([]
byte
,
aend
-
aoff
)
// ~> [aoff:aend) in file
// XXX better ctx = transaction.PutIntoContext(ctx, txn)
ctx
,
cancel
:=
xcontext
.
Merge
(
asctx
(
fctx
),
f
.
head
.
zconn
.
txnCtx
)
defer
cancel
()
// read/load all block(s) in parallel
wg
,
ctx
:=
errgroup
.
WithContext
(
ctx
)
for
blkoff
:=
aoff
;
blkoff
<
aend
;
blkoff
+=
f
.
blksize
{
blkoff
:=
blkoff
blk
:=
blkoff
/
f
.
blksize
wg
.
Go
(
func
()
error
{
δ
:=
blkoff
-
aoff
// blk position in dest
//log.Infof("readBlk #%d dest[%d:+%d]", blk, δ, f.blksize)
return
f
.
readBlk
(
ctx
,
blk
,
dest
[
δ
:
δ
+
f
.
blksize
])
})
}
err
:=
wg
.
Wait
()
if
err
!=
nil
{
// XXX -> err2LogStatus
log
.
Errorf
(
"%s"
,
err
)
// XXX + /bigfile/XXX: read [a,b): -> ...
return
nil
,
fuse
.
EIO
}
return
fuse
.
ReadResultData
(
dest
[
off
-
aoff
:
end
-
aoff
]),
fuse
.
OK
}
// readBlk serves Read to read 1 ZBlk #blk into destination buffer.
//
// see "7) when we receive a FUSE read(#blk) request ..." in overview.
//
// len(dest) == blksize.
func
(
f
*
BigFile
)
readBlk
(
ctx
context
.
Context
,
blk
int64
,
dest
[]
byte
)
error
{
// XXX errctx?
// XXX locking
// check if someone else is already loading this block
f
.
loadMu
.
Lock
()
loading
,
already
:=
f
.
loading
[
blk
]
if
!
already
{
loading
=
&
blkLoadState
{
ready
:
make
(
chan
struct
{}),
}
f
.
loading
[
blk
]
=
loading
}
f
.
loadMu
.
Unlock
()
// if it is already loading - just wait for it
if
already
{
select
{
case
<-
ctx
.
Done
()
:
return
ctx
.
Err
()
case
<-
loading
.
ready
:
if
loading
.
err
==
nil
{
copy
(
dest
,
loading
.
blkdata
)
}
return
loading
.
err
}
}
// noone was loading - we became responsible to load this block
zfile
:=
f
.
zfile
blkdata
,
treepath
,
pathRevMax
,
err
:=
zfile
.
LoadBlk
(
ctx
,
blk
)
loading
.
blkdata
=
blkdata
loading
.
err
=
err
close
(
loading
.
ready
)
// only head/ has δbtree index.
if
f
.
head
.
rev
==
0
{
bfdir
:=
f
.
head
.
bfdir
bfdir
.
indexMu
.
Lock
()
// XXX locking correct?
bfdir
.
indexLooked
.
Add
(
f
,
treepath
)
bfdir
.
indexMu
.
Unlock
()
}
// XXX before loading.ready?
blkrevmax
,
_
:=
f
.
δFtail
.
LastRevOf
(
blk
,
zfile
.
PJar
()
.
At
())
blkrevmax
=
tidmin
(
blkrevmax
,
pathRevMax
)
/*
// XXX remmapping - only if head.rev == 0
// XXX -> own func?
// XXX locking
for _, mapping := range f.mappings {
if revmax <= mapping.at || !mapping.blkrange.in(blk) {
continue // do nothing
}
if mapping.pinned.Contains(blk) {
continue // do nothing
}
rev = max(δFtail.by(blk) : _ <= mapping.at)
// XXX vvv -> go
client.remmap(mapping.addr[blk], file/@<rev>/data)
mapping.pinned.Add(blk)
}
*/
// data loaded with error - cleanup .loading
if
loading
.
err
!=
nil
{
f
.
loadMu
.
Lock
()
delete
(
f
.
loading
,
blk
)
f
.
loadMu
.
Unlock
()
return
err
}
// data loaded ok
copy
(
dest
,
blkdata
)
// store to kernel pagecache whole block that we've just loaded from database.
// This way, even if the user currently requested to read only small portion from it,
// it will prevent next e.g. consecutive user read request to again hit
// the DB, and instead will be served by kernel from its pagecache.
//
// We cannot do this directly from reading goroutine - while reading
// kernel FUSE is holding corresponding page in pagecache locked, and if
// we would try to update that same page in pagecache it would result
// in deadlock inside kernel.
//
// .loading cleanup is done once we are finished with putting the data into OS pagecache.
// If we do it earlier - a simultaneous read covered by the same block could result
// into missing both kernel pagecache (if not yet updated) and empty .loading[blk],
// and thus would trigger DB access again.
//
// XXX if direct-io: don't touch pagecache
go
f
.
uploadBlk
(
blk
,
loading
)
return
nil
}
// uploadBlk complements readBlk and uploads loaded blkdata into OS cache.
func
(
f
*
BigFile
)
uploadBlk
(
blk
int64
,
loading
*
blkLoadState
)
{
head
:=
f
.
head
// rlock zconnMu and make sure zwatcher is not asking us to pause.
// if it does - wait for a safer time not to deadlock.
// see notes.txt -> "Kernel locks page on read/cache store/..." for details.
retry
:
for
{
head
.
zconnMu
.
RLock
()
// help zwatcher if it asks us to pause uploadings, so it can
// take zconnMu wlocked without deadlocks.
if
head
.
pauseOSCacheUpload
{
ready
:=
head
.
continueOSCacheUpload
head
.
zconnMu
.
RUnlock
()
<-
ready
continue
retry
}
break
}
// zwatcher is not currently trying to pause OS cache uploads.
// check if this block was already invalidated by zwatcher.
// if so don't upload the block into OS cache.
f
.
loadMu
.
Lock
()
loading_
:=
f
.
loading
[
blk
]
f
.
loadMu
.
Unlock
()
if
loading
!=
loading_
{
head
.
zconnMu
.
RUnlock
()
return
}
oid
:=
f
.
zfile
.
POid
()
// signal to zwatcher not to run while we are performing the upload.
// upload with released zconnMu so that zwatcher can lock it even if to
// check inflightOSCacheUploads status.
atomic
.
AddInt32
(
&
head
.
inflightOSCacheUploads
,
+
1
)
head
.
zconnMu
.
RUnlock
()
st
:=
gfsconn
.
FileNotifyStoreCache
(
f
.
Inode
(),
blk
*
f
.
blksize
,
loading
.
blkdata
)
f
.
loadMu
.
Lock
()
bug
:=
(
loading
!=
f
.
loading
[
blk
])
if
!
bug
{
delete
(
f
.
loading
,
blk
)
}
f
.
loadMu
.
Unlock
()
// signal to zwatcher that we are done and it can continue.
atomic
.
AddInt32
(
&
head
.
inflightOSCacheUploads
,
-
1
)
if
bug
{
panic
(
fmt
.
Sprintf
(
"BUG: bigfile %s: blk %d: f.loading mutated while uploading data to pagecache"
,
oid
,
blk
))
}
if
st
==
fuse
.
OK
{
return
}
// pagecache update failed, but it must not (we verified on startup that
// pagecache control is supported by kernel). We can correctly live on
// with the error, but data access will be likely very slow. Tell user
// about the problem.
log
.
Errorf
(
"BUG: bigfile %s: blk %d: -> pagecache: %s (ignoring, but reading from bigfile will be very slow)"
,
oid
,
blk
,
st
)
}
// XXX Move Read here
// ----------------------------------------
...
...
@@ -1248,237 +1480,6 @@ func (f *BigFile) getattr(out *fuse.Attr) {
}
// /(head|<rev>)/bigfile/<bigfileX> -> Read serves reading bigfile data.
func
(
f
*
BigFile
)
Read
(
_
nodefs
.
File
,
dest
[]
byte
,
off
int64
,
fctx
*
fuse
.
Context
)
(
fuse
.
ReadResult
,
fuse
.
Status
)
{
f
.
head
.
zconnMu
.
RLock
()
defer
f
.
head
.
zconnMu
.
RUnlock
()
// cap read request to file size
end
:=
off
+
int64
(
len
(
dest
))
// XXX overflow?
if
end
>
f
.
size
{
end
=
f
.
size
}
if
end
<=
off
{
// XXX off >= size -> EINVAL? (but when size=0 kernel issues e.g. [0 +4K) read)
return
fuse
.
ReadResultData
(
nil
),
fuse
.
OK
}
// widen read request to be aligned with blksize granularity
// (we can load only whole ZBlk* blocks)
aoff
:=
off
-
(
off
%
f
.
blksize
)
aend
:=
end
if
re
:=
end
%
f
.
blksize
;
re
!=
0
{
aend
+=
f
.
blksize
-
re
}
dest
=
make
([]
byte
,
aend
-
aoff
)
// ~> [aoff:aend) in file
// XXX better ctx = transaction.PutIntoContext(ctx, txn)
ctx
,
cancel
:=
xcontext
.
Merge
(
asctx
(
fctx
),
f
.
head
.
zconn
.
txnCtx
)
defer
cancel
()
// read/load all block(s) in parallel
wg
,
ctx
:=
errgroup
.
WithContext
(
ctx
)
for
blkoff
:=
aoff
;
blkoff
<
aend
;
blkoff
+=
f
.
blksize
{
blkoff
:=
blkoff
blk
:=
blkoff
/
f
.
blksize
wg
.
Go
(
func
()
error
{
δ
:=
blkoff
-
aoff
// blk position in dest
//log.Infof("readBlk #%d dest[%d:+%d]", blk, δ, f.blksize)
return
f
.
readBlk
(
ctx
,
blk
,
dest
[
δ
:
δ
+
f
.
blksize
])
})
}
err
:=
wg
.
Wait
()
if
err
!=
nil
{
// XXX -> err2LogStatus
log
.
Errorf
(
"%s"
,
err
)
// XXX + /bigfile/XXX: read [a,b): -> ...
return
nil
,
fuse
.
EIO
}
return
fuse
.
ReadResultData
(
dest
[
off
-
aoff
:
end
-
aoff
]),
fuse
.
OK
}
// readBlk serves Read to read 1 ZBlk #blk into destination buffer.
//
// see "7) when we receive a FUSE read(#blk) request ..." in overview.
//
// len(dest) == blksize.
func
(
f
*
BigFile
)
readBlk
(
ctx
context
.
Context
,
blk
int64
,
dest
[]
byte
)
error
{
// XXX errctx?
// XXX locking
// check if someone else is already loading this block
f
.
loadMu
.
Lock
()
loading
,
already
:=
f
.
loading
[
blk
]
if
!
already
{
loading
=
&
blkLoadState
{
ready
:
make
(
chan
struct
{}),
}
f
.
loading
[
blk
]
=
loading
}
f
.
loadMu
.
Unlock
()
// if it is already loading - just wait for it
if
already
{
select
{
case
<-
ctx
.
Done
()
:
return
ctx
.
Err
()
case
<-
loading
.
ready
:
if
loading
.
err
==
nil
{
copy
(
dest
,
loading
.
blkdata
)
}
return
loading
.
err
}
}
// noone was loading - we became responsible to load this block
zfile
:=
f
.
zfile
blkdata
,
treepath
,
pathRevMax
,
err
:=
zfile
.
LoadBlk
(
ctx
,
blk
)
loading
.
blkdata
=
blkdata
loading
.
err
=
err
close
(
loading
.
ready
)
// only head/ has δbtree index.
if
f
.
head
.
rev
==
0
{
bfdir
:=
f
.
head
.
bfdir
bfdir
.
indexMu
.
Lock
()
// XXX locking correct?
bfdir
.
indexLooked
.
Add
(
f
,
treepath
)
bfdir
.
indexMu
.
Unlock
()
}
// XXX before loading.ready?
blkrevmax
,
_
:=
f
.
δFtail
.
LastRevOf
(
blk
,
zfile
.
PJar
()
.
At
())
blkrevmax
=
tidmin
(
blkrevmax
,
pathRevMax
)
/*
// XXX remmapping - only if head.rev == 0
// XXX -> own func?
// XXX locking
for _, mapping := range f.mappings {
if revmax <= mapping.at || !mapping.blkrange.in(blk) {
continue // do nothing
}
if mapping.pinned.Contains(blk) {
continue // do nothing
}
rev = max(δFtail.by(blk) : _ <= mapping.at)
// XXX vvv -> go
client.remmap(mapping.addr[blk], file/@<rev>/data)
mapping.pinned.Add(blk)
}
*/
// data loaded with error - cleanup .loading
if
loading
.
err
!=
nil
{
f
.
loadMu
.
Lock
()
delete
(
f
.
loading
,
blk
)
f
.
loadMu
.
Unlock
()
return
err
}
// data loaded ok
copy
(
dest
,
blkdata
)
// store to kernel pagecache whole block that we've just loaded from database.
// This way, even if the user currently requested to read only small portion from it,
// it will prevent next e.g. consecutive user read request to again hit
// the DB, and instead will be served by kernel from its pagecache.
//
// We cannot do this directly from reading goroutine - while reading
// kernel FUSE is holding corresponding page in pagecache locked, and if
// we would try to update that same page in pagecache it would result
// in deadlock inside kernel.
//
// .loading cleanup is done once we are finished with putting the data into OS pagecache.
// If we do it earlier - a simultaneous read covered by the same block could result
// into missing both kernel pagecache (if not yet updated) and empty .loading[blk],
// and thus would trigger DB access again.
//
// XXX if direct-io: don't touch pagecache
go
f
.
uploadBlk
(
blk
,
loading
)
return
nil
}
// uploadBlk complements readBlk and uploads loaded blkdata into OS cache.
func
(
f
*
BigFile
)
uploadBlk
(
blk
int64
,
loading
*
blkLoadState
)
{
head
:=
f
.
head
// rlock zconnMu and make sure zwatcher is not asking us to pause.
// if it does - wait for a safer time not to deadlock.
// see notes.txt -> "Kernel locks page on read/cache store/..." for details.
retry
:
for
{
head
.
zconnMu
.
RLock
()
// help zwatcher if it asks us to pause uploadings, so it can
// take zconnMu wlocked without deadlocks.
if
head
.
pauseOSCacheUpload
{
ready
:=
head
.
continueOSCacheUpload
head
.
zconnMu
.
RUnlock
()
<-
ready
continue
retry
}
break
}
// zwatcher is not currently trying to pause OS cache uploads.
// check if this block was already invalidated by zwatcher.
// if so don't upload the block into OS cache.
f
.
loadMu
.
Lock
()
loading_
:=
f
.
loading
[
blk
]
f
.
loadMu
.
Unlock
()
if
loading
!=
loading_
{
head
.
zconnMu
.
RUnlock
()
return
}
oid
:=
f
.
zfile
.
POid
()
// signal to zwatcher not to run while we are performing the upload.
// upload with released zconnMu so that zwatcher can lock it even if to
// check inflightOSCacheUploads status.
atomic
.
AddInt32
(
&
head
.
inflightOSCacheUploads
,
+
1
)
head
.
zconnMu
.
RUnlock
()
st
:=
gfsconn
.
FileNotifyStoreCache
(
f
.
Inode
(),
blk
*
f
.
blksize
,
loading
.
blkdata
)
f
.
loadMu
.
Lock
()
bug
:=
(
loading
!=
f
.
loading
[
blk
])
if
!
bug
{
delete
(
f
.
loading
,
blk
)
}
f
.
loadMu
.
Unlock
()
// signal to zwatcher that we are done and it can continue.
atomic
.
AddInt32
(
&
head
.
inflightOSCacheUploads
,
-
1
)
if
bug
{
panic
(
fmt
.
Sprintf
(
"BUG: bigfile %s: blk %d: f.loading mutated while uploading data to pagecache"
,
oid
,
blk
))
}
if
st
==
fuse
.
OK
{
return
}
// pagecache update failed, but it must not (we verified on startup that
// pagecache control is supported by kernel). We can correctly live on
// with the error, but data access will be likely very slow. Tell user
// about the problem.
log
.
Errorf
(
"BUG: bigfile %s: blk %d: -> pagecache: %s (ignoring, but reading from bigfile will be very slow)"
,
oid
,
blk
,
st
)
}
// FIXME groot/gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment