Commit 89d653c0 authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: Implement protection against faulty client + related fixes and improvements

The WCFS documentation specifies [1]:

- - - 8> - - - 8> - - -

If a client, on purpose or due to a bug or being stopped, is slow to respond
with ack to file invalidation notification, it creates a problem because the
server will become blocked waiting for pin acknowledgments, and thus all
other clients, that try to work with the same file, will get stuck.

[...]

Lacking OS primitives to change address space of another process and not
being able to work it around with ptrace in userspace, wcfs takes approach
to kill a slow client on 30 seconds timeout by default.

- - - <8 - - - <8 - - -

But before, this protection wasn't implemented yet: one
faulty client could therefore freeze the whole system. With this work
this protection is implemented now: faulty clients are killed after the
timeout or any other misbehaviour in their pin handlers.

Working on this topic also resulted in several fixes and improvements
around isolation protocol implementation on the server side.

See individual patches for details.

[1] https://lab.nexedi.com/nexedi/wendelin.core/blob/38dde766/wcfs/wcfs.go#L186-208Co-authored-by: Levin Zimmermann's avatarLevin Zimmermann <levin.zimmermann@nexedi.com>

/reviewed-on nexedi/wendelin.core!18
parents 79e6f7b9 1fcef9c9
module lab.nexedi.com/nexedi/wendelin.core/wcfs module lab.nexedi.com/nexedi/wendelin.core/wcfs
go 1.14 go 1.19
require ( require (
github.com/golang/glog v1.0.0 github.com/golang/glog v1.0.0
...@@ -8,11 +8,34 @@ require ( ...@@ -8,11 +8,34 @@ require (
github.com/johncgriffin/overflow v0.0.0-20211019200055-46fa312c352c github.com/johncgriffin/overflow v0.0.0-20211019200055-46fa312c352c
github.com/kisielk/og-rek v1.2.0 github.com/kisielk/og-rek v1.2.0
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.8.4 github.com/shirou/gopsutil/v4 v4.24.8
github.com/stretchr/testify v1.9.0
lab.nexedi.com/kirr/go123 v0.0.0-20230822135329-95433de34faf lab.nexedi.com/kirr/go123 v0.0.0-20230822135329-95433de34faf
lab.nexedi.com/kirr/neo/go v0.0.0-20240723085959-839ee634bd66 lab.nexedi.com/kirr/neo/go v0.0.0-20240723085959-839ee634bd66
) )
require (
crawshaw.io/sqlite v0.3.2 // indirect
github.com/DataDog/czlib v0.0.0-20210322182103-8087f4e14ae7 // indirect
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fsnotify/fsnotify v1.5.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/philhofer/fwd v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/shamaton/msgpack v1.2.1 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/someonegg/gocontainer v1.0.0 // indirect
github.com/tinylib/msgp v1.1.6 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/sys v0.24.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
// we use kirr/go-fuse@y/nodefs-cancel // we use kirr/go-fuse@y/nodefs-cancel
// see https://github.com/hanwen/go-fuse/pull/343 for details // see https://github.com/hanwen/go-fuse/pull/343 for details
replace github.com/hanwen/go-fuse/v2 v2.4.2 => lab.nexedi.com/kirr/go-fuse/v2 v2.4.2-0.20231211215333-9f9ad4a1c7cc replace github.com/hanwen/go-fuse/v2 v2.4.2 => lab.nexedi.com/kirr/go-fuse/v2 v2.4.2-0.20231211215333-9f9ad4a1c7cc
This diff is collapsed.
# Copyright (C) 2019-2021 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -33,7 +33,7 @@ from posix.types cimport off_t ...@@ -33,7 +33,7 @@ from posix.types cimport off_t
from cpython.exc cimport PyErr_SetFromErrno from cpython.exc cimport PyErr_SetFromErrno
from golang cimport chan, pychan, select, panic, topyexc, cbool from golang cimport chan, pychan, select, panic, topyexc, cbool, structZ
from golang cimport sync, time from golang cimport sync, time
# _tWCFS is pyx part of tWCFS. # _tWCFS is pyx part of tWCFS.
...@@ -53,16 +53,15 @@ cdef class _tWCFS: ...@@ -53,16 +53,15 @@ cdef class _tWCFS:
# but pin handler is failing one way or another - select will wake-up # but pin handler is failing one way or another - select will wake-up
# but, if _abort_ontimeout uses GIL, won't continue to run trying to lock # but, if _abort_ontimeout uses GIL, won't continue to run trying to lock
# GIL -> deadlock. # GIL -> deadlock.
def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan nogilready not None): def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan timeoutch not None, pychan nogilready not None):
cdef chan[double] timeoutch = time.after(dt)
emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second) emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second)
cdef char *_emsg1 = emsg1 cdef char *_emsg1 = emsg1
with nogil: with nogil:
# tell main thread that we entered nogil world # tell main thread that we entered nogil world
nogilready.chan_structZ().close() nogilready.chan_structZ().close()
t.__abort_ontimeout(dt, timeoutch, fdabort, _emsg1) t.__abort_ontimeout(timeoutch.chan_structZ(), fdabort, _emsg1)
cdef void __abort_ontimeout(_tWCFS t, double dt, chan[double] timeoutch, cdef void __abort_ontimeout(_tWCFS t, chan[structZ] timeoutch,
int fdabort, const char *emsg1) nogil except +topyexc: int fdabort, const char *emsg1) nogil except +topyexc:
_ = select([ _ = select([
timeoutch.recvs(), # 0 timeoutch.recvs(), # 0
......
// Copyright (C) 2018-2021 Nexedi SA and Contributors. // Copyright (C) 2018-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -25,17 +25,21 @@ import ( ...@@ -25,17 +25,21 @@ import (
"fmt" "fmt"
"io" "io"
"math" "math"
"os"
"strconv" "strconv"
"strings" "strings"
"sync/atomic" "sync/atomic"
"syscall" "syscall"
"time"
log "github.com/golang/glog" log "github.com/golang/glog"
"github.com/shirou/gopsutil/v4/process"
"github.com/hanwen/go-fuse/v2/fuse" "github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/fuse/nodefs" "github.com/hanwen/go-fuse/v2/fuse/nodefs"
"github.com/pkg/errors" "github.com/pkg/errors"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xio" "lab.nexedi.com/kirr/go123/xio"
"lab.nexedi.com/kirr/neo/go/zodb" "lab.nexedi.com/kirr/neo/go/zodb"
...@@ -321,11 +325,16 @@ func NewFileSock() *FileSock { ...@@ -321,11 +325,16 @@ func NewFileSock() *FileSock {
// The handle should be given to kernel as result of a file open, for that file // The handle should be given to kernel as result of a file open, for that file
// to be connected to the socket. // to be connected to the socket.
func (sk *FileSock) File() nodefs.File { func (sk *FileSock) File() nodefs.File {
return WithOpenStreamFlags(sk.file)
}
// WithOpenStreamFlags wraps file handle with FUSE flags needed when opening stream IO.
func WithOpenStreamFlags(file nodefs.File) nodefs.File {
// nonseekable & directio for opened file to have streaming semantic as // nonseekable & directio for opened file to have streaming semantic as
// if it was a socket. FOPEN_STREAM is used so that both read and write // if it was a socket. FOPEN_STREAM is used so that both read and write
// could be run simultaneously: git.kernel.org/linus/10dce8af3422 // could be run simultaneously: git.kernel.org/linus/10dce8af3422
return &nodefs.WithFlags{ return &nodefs.WithFlags{
File: sk.file, File: file,
FuseFlags: fuse.FOPEN_STREAM | fuse.FOPEN_NONSEEKABLE | fuse.FOPEN_DIRECT_IO, FuseFlags: fuse.FOPEN_STREAM | fuse.FOPEN_NONSEEKABLE | fuse.FOPEN_DIRECT_IO,
} }
} }
...@@ -428,7 +437,14 @@ func (f *skFile) Release() { ...@@ -428,7 +437,14 @@ func (f *skFile) Release() {
} }
// ---- parsing ---- // fatalEIO switches filesystem into EIO mode and terminates the program.
func fatalEIO() {
// log.Fatal terminates the program and so any attempt to access
// was-mounted filesystem starts to return ENOTCONN
log.Fatal("switching filesystem to EIO mode")
}
// ---- parsing / formatting ----
// parseWatchFrame parses line going through /head/watch into (stream, msg) // parseWatchFrame parses line going through /head/watch into (stream, msg)
// //
...@@ -489,6 +505,17 @@ func parseWatch(msg string) (oid zodb.Oid, at zodb.Tid, err error) { ...@@ -489,6 +505,17 @@ func parseWatch(msg string) (oid zodb.Oid, at zodb.Tid, err error) {
return oid, at, nil return oid, at, nil
} }
// isoRevstr returns string form of revision as used in isolation protocol.
//
// It is almost the same as standard string form of ZODB revision except that
// zodb.TidMax is represented as "head".
func isoRevstr(rev zodb.Tid) string {
if rev == zodb.TidMax {
return "head"
}
return rev.String()
}
// ---- make df happy (else it complains "function not supported") ---- // ---- make df happy (else it complains "function not supported") ----
func (root *Root) StatFs() *fuse.StatfsOut { func (root *Root) StatFs() *fuse.StatfsOut {
...@@ -515,3 +542,87 @@ func (root *Root) StatFs() *fuse.StatfsOut { ...@@ -515,3 +542,87 @@ func (root *Root) StatFs() *fuse.StatfsOut {
func panicf(format string, argv ...interface{}) { func panicf(format string, argv ...interface{}) {
panic(fmt.Sprintf(format, argv...)) panic(fmt.Sprintf(format, argv...))
} }
// findAliveProces lookups process by pid and makes sure it is alive.
//
// NOTE: starting from go1.23 it, via os.FindProcess, uses pidfd which avoids potential
// race of later signalling to pid of already long-gone and replaced process.
func findAliveProcess(pid int) (_ *os.Process, err error) {
defer xerr.Contextf(&err, "findAlive pid%d", pid)
proc, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
// verify that found process is actually good because
// os.FindProcess returns "done" stub instead of an error
alive, err := isProcessAlive(proc)
if err != nil {
return nil, err
}
if !alive {
proc.Release()
return nil, syscall.ESRCH
}
return proc, nil
}
// isProcessAlive returns whether process is alive or not.
func isProcessAlive(proc *os.Process) (_ bool, err error) {
defer xerr.Contextf(&err, "isAlive pid%d", proc.Pid)
// verify that proc's pid exists
// proc.Signal(0) returns ok even for zombie, but zombie is not alive
err = proc.Signal(syscall.Signal(0))
if err != nil {
var e syscall.Errno
if errors.As(err, &e) && e == syscall.EPERM {
return false, err
}
return false, nil
}
// pid exists. Check if proc is not zombie
gproc, err := process.NewProcess(int32(proc.Pid))
if err != nil {
return false, err
}
statusv, err := gproc.Status()
if err != nil {
return false, err
}
for _, status := range statusv {
if status == process.Zombie {
return false, nil
}
}
return true, nil
}
// waitProcessEnd waits for process to end.
//
// Contrary to os.Process.Wait it does not require the caller to be a parent of proc.
func waitProcessEnd(ctx context.Context, proc *os.Process) (_ bool, err error) {
defer xerr.Contextf(&err, "waitEnd pid%d", proc.Pid)
tick := time.NewTicker(100*time.Millisecond)
defer tick.Stop()
for {
alive, err := isProcessAlive(proc)
if err != nil {
return false, err
}
if !alive {
return true, nil
}
select {
case <-ctx.Done():
return false, ctx.Err()
case <-tick.C:
// ok
}
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment