Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Levin Zimmermann
wendelin.core
Commits
b15ad994
Commit
b15ad994
authored
Oct 27, 2024
by
Levin Zimmermann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
9157390d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
21 deletions
+61
-21
wcfs/misc.go
wcfs/misc.go
+1
-6
wcfs/wcfs.go
wcfs/wcfs.go
+60
-15
No files found.
wcfs/misc.go
View file @
b15ad994
...
@@ -37,7 +37,6 @@ import (
...
@@ -37,7 +37,6 @@ import (
"github.com/shirou/gopsutil/v4/process"
"github.com/shirou/gopsutil/v4/process"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/fuse/nodefs"
"github.com/hanwen/go-fuse/v2/fuse/nodefs"
"github.com/elastic/go-sysinfo"
"github.com/pkg/errors"
"github.com/pkg/errors"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xerr"
...
@@ -548,7 +547,7 @@ func panicf(format string, argv ...interface{}) {
...
@@ -548,7 +547,7 @@ func panicf(format string, argv ...interface{}) {
//
//
// NOTE: starting from go1.23 it, via os.FindProcess, uses pidfd which avoids potential
// NOTE: starting from go1.23 it, via os.FindProcess, uses pidfd which avoids potential
// race of later signalling to pid of already long-gone and replaced process.
// race of later signalling to pid of already long-gone and replaced process.
func
findAliveProcess
(
pid
int
)
(
_
*
sysinfo
.
type
s
.
Process
,
err
error
)
{
func
findAliveProcess
(
pid
int
)
(
_
*
o
s
.
Process
,
err
error
)
{
defer
xerr
.
Contextf
(
&
err
,
"findAlive pid%d"
,
pid
)
defer
xerr
.
Contextf
(
&
err
,
"findAlive pid%d"
,
pid
)
proc
,
err
:=
os
.
FindProcess
(
pid
)
proc
,
err
:=
os
.
FindProcess
(
pid
)
...
@@ -565,10 +564,6 @@ func findAliveProcess(pid int) (_ *sysinfo.types.Process, err error) {
...
@@ -565,10 +564,6 @@ func findAliveProcess(pid int) (_ *sysinfo.types.Process, err error) {
proc
.
Release
()
proc
.
Release
()
return
nil
,
syscall
.
ESRCH
return
nil
,
syscall
.
ESRCH
}
}
proc
,
err
:=
sysinfo
.
Process
(
proc
.
Pid
)
if
err
!=
nil
{
return
nil
,
err
}
return
proc
,
nil
return
proc
,
nil
}
}
...
...
wcfs/wcfs.go
View file @
b15ad994
...
@@ -516,6 +516,8 @@ import (
...
@@ -516,6 +516,8 @@ import (
"github.com/johncgriffin/overflow"
"github.com/johncgriffin/overflow"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/fuse/nodefs"
"github.com/hanwen/go-fuse/v2/fuse/nodefs"
"github.com/elastic/go-sysinfo"
"github.com/elastic/go-sysinfo/types"
"github.com/pkg/errors"
"github.com/pkg/errors"
"lab.nexedi.com/nexedi/wendelin.core/wcfs/internal/xzodb"
"lab.nexedi.com/nexedi/wendelin.core/wcfs/internal/xzodb"
...
@@ -697,7 +699,7 @@ type WatchLink struct {
...
@@ -697,7 +699,7 @@ type WatchLink struct {
down
chan
struct
{}
// ready after shutdown completes
down
chan
struct
{}
// ready after shutdown completes
pinWG
sync
.
WaitGroup
// all pin handlers are accounted here
pinWG
sync
.
WaitGroup
// all pin handlers are accounted here
client
*
os
.
Process
// client that opened the WatchLink
client
Client
// client that opened the WatchLink
}
}
// Watch represents watching for changes to 1 BigFile over particular watch link.
// Watch represents watching for changes to 1 BigFile over particular watch link.
...
@@ -742,6 +744,24 @@ type Stats struct {
...
@@ -742,6 +744,24 @@ type Stats struct {
pinkill
atomic
.
Int64
// # of times a client was killed due to badly handling pin
pinkill
atomic
.
Int64
// # of times a client was killed due to badly handling pin
}
}
// Client represents a client of WCFS server.
type
Client
struct
{
proc
*
os
.
Process
// Holds process to send signals to
info
types
.
ProcessInfo
// Provides information about process
user
types
.
UserInfo
// Provides information about the user that owns the client process
}
func
(
client
Client
)
Format
()
string
{
return
fmt
.
Sprintf
(
"process(PID=%v;exe=%v;UID=%v)"
,
client
.
info
.
PID
,
client
.
info
.
Exe
,
client
.
user
.
UID
)
}
func
debugClient
(
client
Client
,
format
string
,
argv
...
interface
{})
{
if
!
log
.
V
(
2
)
{
return
}
log
.
InfoDepth
(
1
,
fmt
.
Sprintf
(
client
.
Format
()
+
": "
+
format
,
argv
...
))
}
// -------- ZODB cache control --------
// -------- ZODB cache control --------
...
@@ -1595,11 +1615,8 @@ func (w *Watch) __pin(ctx context.Context, blk int64, rev zodb.Tid) (err error)
...
@@ -1595,11 +1615,8 @@ func (w *Watch) __pin(ctx context.Context, blk int64, rev zodb.Tid) (err error)
// continue to provide correct uncorrupted data to it. The filesystem is
// continue to provide correct uncorrupted data to it. The filesystem is
// switched to EIO mode in such case.
// switched to EIO mode in such case.
func
(
wlink
*
WatchLink
)
badPinKill
(
reason
error
)
{
func
(
wlink
*
WatchLink
)
badPinKill
(
reason
error
)
{
pid
:=
wlink
.
client
.
Pid
logf
:=
func
(
format
string
,
argv
...
any
)
{
logf
:=
func
(
format
string
,
argv
...
any
)
{
emsg
:=
fmt
.
Sprintf
(
"pid%d: "
,
pid
)
emsg
:=
wlink
.
client
.
Format
()
+
": "
+
fmt
.
Sprintf
(
format
,
argv
...
)
emsg
+=
fmt
.
Sprintf
(
format
,
argv
...
)
log
.
Error
(
emsg
)
log
.
Error
(
emsg
)
}
}
logf
(
"client failed to handle pin notification correctly and timely in %s: %s"
,
groot
.
pinTimeout
,
reason
)
logf
(
"client failed to handle pin notification correctly and timely in %s: %s"
,
groot
.
pinTimeout
,
reason
)
...
@@ -1619,7 +1636,7 @@ func (wlink *WatchLink) badPinKill(reason error) {
...
@@ -1619,7 +1636,7 @@ func (wlink *WatchLink) badPinKill(reason error) {
func
(
wlink
*
WatchLink
)
_badPinKill
()
error
{
func
(
wlink
*
WatchLink
)
_badPinKill
()
error
{
client
:=
wlink
.
client
client
:=
wlink
.
client
pid
:=
client
.
Pid
pid
:=
client
.
proc
.
Pid
// time budget for pin + wait + fatal-notify + kill = pinTimeout + 1 + 1/3·pinTimeout
// time budget for pin + wait + fatal-notify + kill = pinTimeout + 1 + 1/3·pinTimeout
// < 2 ·pinTimeout if pinTimeout > 3/2
// < 2 ·pinTimeout if pinTimeout > 3/2
...
@@ -1640,12 +1657,12 @@ func (wlink *WatchLink) _badPinKill() error {
...
@@ -1640,12 +1657,12 @@ func (wlink *WatchLink) _badPinKill() error {
// siginfo structure. It would be good if we can mimic that behaviour to a
// siginfo structure. It would be good if we can mimic that behaviour to a
// reasonable extent if possible."
// reasonable extent if possible."
log
.
Errorf
(
"pid%d: <- SIGBUS"
,
pid
)
log
.
Errorf
(
"pid%d: <- SIGBUS"
,
pid
)
err
:=
client
.
Signal
(
syscall
.
SIGBUS
)
err
:=
client
.
proc
.
Signal
(
syscall
.
SIGBUS
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
ok
,
err
:=
waitProcessEnd
(
ctx1
,
client
)
ok
,
err
:=
waitProcessEnd
(
ctx1
,
client
.
proc
)
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
ctx1
.
Err
())
{
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
ctx1
.
Err
())
{
return
err
return
err
}
}
...
@@ -1655,12 +1672,12 @@ func (wlink *WatchLink) _badPinKill() error {
...
@@ -1655,12 +1672,12 @@ func (wlink *WatchLink) _badPinKill() error {
log
.
Errorf
(
"pid%d: is still alive after SIGBUS"
,
pid
)
log
.
Errorf
(
"pid%d: is still alive after SIGBUS"
,
pid
)
log
.
Errorf
(
"pid%d: <- SIGKILL"
,
pid
)
log
.
Errorf
(
"pid%d: <- SIGKILL"
,
pid
)
err
=
client
.
Signal
(
syscall
.
SIGKILL
)
err
=
client
.
proc
.
Signal
(
syscall
.
SIGKILL
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
ok
,
err
=
waitProcessEnd
(
ctx2
,
client
)
ok
,
err
=
waitProcessEnd
(
ctx2
,
client
.
proc
)
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
ctx2
.
Err
())
{
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
ctx2
.
Err
())
{
return
err
return
err
}
}
...
@@ -1773,6 +1790,7 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, blkrevMax zodb
...
@@ -1773,6 +1790,7 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, blkrevMax zodb
// It sends "pin" notifications; final "ok" or "error" must be sent by caller.
// It sends "pin" notifications; final "ok" or "error" must be sent by caller.
func
(
wlink
*
WatchLink
)
setupWatch
(
ctx
context
.
Context
,
foid
zodb
.
Oid
,
at
zodb
.
Tid
)
(
err
error
)
{
func
(
wlink
*
WatchLink
)
setupWatch
(
ctx
context
.
Context
,
foid
zodb
.
Oid
,
at
zodb
.
Tid
)
(
err
error
)
{
defer
xerr
.
Contextf
(
&
err
,
"setup watch f<%s> @%s"
,
foid
,
at
)
defer
xerr
.
Contextf
(
&
err
,
"setup watch f<%s> @%s"
,
foid
,
at
)
debugClient
(
wlink
.
client
,
"setup watch oid=%v ; tid=%v"
,
foid
,
at
)
head
:=
wlink
.
head
head
:=
wlink
.
head
bfdir
:=
head
.
bfdir
bfdir
:=
head
.
bfdir
...
@@ -1990,6 +2008,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
...
@@ -1990,6 +2008,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
return
err
// should not fail
return
err
// should not fail
}
}
debugClient
(
wlink
.
client
,
"successfully setup watch oid=%v ; tid=%v"
,
foid
,
at
)
return
nil
return
nil
}
}
...
@@ -2005,14 +2024,32 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
...
@@ -2005,14 +2024,32 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
// TODO(?) check flags
// TODO(?) check flags
head
:=
wnode
.
head
head
:=
wnode
.
head
// remember our client who opened the watchlink.
// remember
process of
our client who opened the watchlink.
// We will need to kill the client if it will be e.g. slow to respond to pin notifications.
// We will need to kill the client if it will be e.g. slow to respond to pin notifications.
client
,
err
:=
findAliveProcess
(
int
(
fctx
.
Caller
.
Pid
))
proc
,
err
:=
findAliveProcess
(
int
(
fctx
.
Caller
.
Pid
))
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
// fetch info of client process for logging
syslog_proc
,
err
:=
sysinfo
.
Process
(
proc
.
Pid
)
if
err
!=
nil
{
return
nil
,
err
}
proc_info
,
err
:=
syslog_proc
.
Info
()
if
err
!=
nil
{
return
nil
,
err
}
user_info
,
err
:=
syslog_proc
.
User
()
if
err
!=
nil
{
return
nil
,
err
}
client
:=
Client
{
proc
,
proc_info
,
user_info
}
serveCtx
,
serveCancel
:=
context
.
WithCancel
(
context
.
TODO
()
/*TODO ctx of wcfs running*/
)
serveCtx
,
serveCancel
:=
context
.
WithCancel
(
context
.
TODO
()
/*TODO ctx of wcfs running*/
)
debugClient
(
client
,
"Open WatchLink"
)
wlink
:=
&
WatchLink
{
wlink
:=
&
WatchLink
{
sk
:
NewFileSock
(),
sk
:
NewFileSock
(),
id
:
atomic
.
AddInt32
(
&
wnode
.
idNext
,
+
1
),
id
:
atomic
.
AddInt32
(
&
wnode
.
idNext
,
+
1
),
...
@@ -2029,7 +2066,7 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
...
@@ -2029,7 +2066,7 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
head
.
wlinkTab
[
wlink
]
=
struct
{}{}
head
.
wlinkTab
[
wlink
]
=
struct
{}{}
head
.
wlinkMu
.
Unlock
()
head
.
wlinkMu
.
Unlock
()
go
wlink
.
serve
(
serveCtx
)
go
wlink
.
serve
(
serveCtx
,
int
(
fctx
.
Caller
.
Pid
)
)
return
wlink
.
sk
.
File
(),
nil
return
wlink
.
sk
.
File
(),
nil
}
}
...
@@ -2042,6 +2079,7 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
...
@@ -2042,6 +2079,7 @@ func (wnode *WatchNode) open(flags uint32, fctx *fuse.Context) (_ nodefs.File, e
//
//
// NOTE shutdown can be invoked under atMu.R from pin.
// NOTE shutdown can be invoked under atMu.R from pin.
func
(
wlink
*
WatchLink
)
shutdown
(
reason
error
)
{
func
(
wlink
*
WatchLink
)
shutdown
(
reason
error
)
{
debugClient
(
wlink
.
client
,
"shutdown watchlink"
)
wlink
.
down1
.
Do
(
func
()
{
wlink
.
down1
.
Do
(
func
()
{
// mark wlink as down; this signals serve loop to exit and cancels all in-progress pins
// mark wlink as down; this signals serve loop to exit and cancels all in-progress pins
wlink
.
serveCancel
()
wlink
.
serveCancel
()
...
@@ -2079,10 +2117,14 @@ func (wlink *WatchLink) shutdown(reason error) {
...
@@ -2079,10 +2117,14 @@ func (wlink *WatchLink) shutdown(reason error) {
// serve serves client initiated watch requests and routes client replies to
// serve serves client initiated watch requests and routes client replies to
// wcfs initiated pin requests.
// wcfs initiated pin requests.
func
(
wlink
*
WatchLink
)
serve
(
ctx
context
.
Context
)
{
func
(
wlink
*
WatchLink
)
serve
(
ctx
context
.
Context
,
clientpid
int
)
{
debugClient
(
wlink
.
client
,
"start serving"
)
err
:=
wlink
.
_serve
(
ctx
)
err
:=
wlink
.
_serve
(
ctx
)
if
err
!=
nil
{
if
err
!=
nil
{
debugClient
(
wlink
.
client
,
"stop serving client with error"
)
log
.
Error
(
err
)
log
.
Error
(
err
)
}
else
{
debugClient
(
wlink
.
client
,
"stop serving client without error"
)
}
}
}
}
...
@@ -2091,6 +2133,8 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
...
@@ -2091,6 +2133,8 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
// final watchlink cleanup is done on serve exit
// final watchlink cleanup is done on serve exit
defer
func
()
{
defer
func
()
{
debugClient
(
wlink
.
client
,
"cleanup watchlink"
)
// unregister all watches created on this wlink
// unregister all watches created on this wlink
wlink
.
byfileMu
.
Lock
()
wlink
.
byfileMu
.
Lock
()
for
_
,
w
:=
range
wlink
.
byfile
{
for
_
,
w
:=
range
wlink
.
byfile
{
...
@@ -2115,7 +2159,7 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
...
@@ -2115,7 +2159,7 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
}
}
// release client process
// release client process
wlink
.
client
.
Release
()
wlink
.
client
.
proc
.
Release
()
}()
}()
// watch handlers are spawned in dedicated workgroup
// watch handlers are spawned in dedicated workgroup
...
@@ -2223,6 +2267,7 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
...
@@ -2223,6 +2267,7 @@ func (wlink *WatchLink) _serve(ctx context.Context) (err error) {
// returned error comes without full error prefix.
// returned error comes without full error prefix.
func
(
wlink
*
WatchLink
)
handleWatch
(
ctx
context
.
Context
,
stream
uint64
,
msg
string
)
(
err
error
)
{
func
(
wlink
*
WatchLink
)
handleWatch
(
ctx
context
.
Context
,
stream
uint64
,
msg
string
)
(
err
error
)
{
defer
xerr
.
Contextf
(
&
err
,
"%d"
,
stream
)
defer
xerr
.
Contextf
(
&
err
,
"%d"
,
stream
)
debugClient
(
wlink
.
client
,
"handleWatch: %v"
,
msg
)
err
=
wlink
.
_handleWatch
(
ctx
,
msg
)
err
=
wlink
.
_handleWatch
(
ctx
,
msg
)
reply
:=
"ok"
reply
:=
"ok"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment