Commit 6a785da2 authored by Jakob Unterwurzacher's avatar Jakob Unterwurzacher Committed by Han-Wen Nienhuys

fs: TestFsstress: add rename, link ; reduce runtime to 1 sec

With rename and link, it hangs within 20ms, so we can reduce the
runtime.

Also add deadlock detection & statistics printout.

A good run looks like this:

	$ TMPDIR=/var/tmp go test -run TestFsstress -v
	=== RUN   TestFsstress
	    simple_test.go:601: Operation statistics:
	    simple_test.go:604:      creat:    33 ok, 132978 fail,  0 hung
	    simple_test.go:604:       link:    85 ok,  37196 fail,  0 hung
	    simple_test.go:604:      mkdir:    60 ok, 155273 fail,  0 hung
	    simple_test.go:604:     mkfifo:    45 ok, 161937 fail,  0 hung
	    simple_test.go:604: mknod_fifo:    55 ok, 154459 fail,  0 hung
	    simple_test.go:604:  mknod_reg:    77 ok, 180614 fail,  0 hung
	    simple_test.go:604: mknod_sock:    83 ok, 139479 fail,  0 hung
	    simple_test.go:604:    readdir:    13 ok,      0 fail,  0 hung
	    simple_test.go:604:     remove:   180 ok,     40 fail,  0 hung
	    simple_test.go:604:     rename:   646 ok,    137 fail,  0 hung
	    simple_test.go:604:      rmdir:    48 ok,  48342 fail,  0 hung
	    simple_test.go:604:    symlink:    30 ok, 119954 fail,  0 hung
	    simple_test.go:604:     unlink:   150 ok,   6121 fail,  0 hung

	--- PASS: TestFsstress (1.02s)
	PASS
	ok  	github.com/hanwen/go-fuse/v2/fs	1.022s

A bad run like this:

	go-fuse/fs$ TMPDIR=/var/tmp go test -run TestFsstress -v
	=== RUN   TestFsstress
	    simple_test.go:592: timeout waiting for goroutines to exit (deadlocked?)
	    simple_test.go:601: Operation statistics:
	    simple_test.go:604:      creat:     0 ok,     44 fail, 10 hung
	    simple_test.go:604:       link:     0 ok,      6 fail, 10 hung
	    simple_test.go:604:      mkdir:     1 ok,    283 fail, 10 hung
	    simple_test.go:604:     mkfifo:     1 ok,    140 fail, 10 hung
	    simple_test.go:604: mknod_fifo:     1 ok,   1246 fail, 10 hung
	    simple_test.go:604:  mknod_reg:     0 ok,     19 fail, 10 hung
	    simple_test.go:604: mknod_sock:     0 ok,    110 fail, 10 hung
	    simple_test.go:604:    readdir:     1 ok,      0 fail,  1 hung
	    simple_test.go:604:     remove:     1 ok,      0 fail, 10 hung
	    simple_test.go:604:     rename:     0 ok,     12 fail, 10 hung
	    simple_test.go:604:      rmdir:     1 ok,    346 fail, 10 hung
	    simple_test.go:604:    symlink:     1 ok,   1456 fail, 10 hung
	    simple_test.go:604:     unlink:     2 ok,     15 fail, 10 hung
	    simple_test.go:51: /usr/bin/fusermount: failed to unmount /var/tmp/TestFsstress302646914/mnt: Device or resource busy
	         (code exit status 1)

	--- FAIL: TestFsstress (2.30s)
	FAIL

	[hangs]

Change-Id: I9eb0ce02eb1df766be257d6a87de53c6ffa6d21e
parent e5a9167d
......@@ -14,8 +14,10 @@ import (
"path/filepath"
"reflect"
"runtime"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"testing"
"time"
......@@ -400,6 +402,13 @@ func TestOpenDirectIO(t *testing.T) {
// TestFsstress is loosely modeled after xfstest's fsstress. It performs rapid
// parallel removes / creates / readdirs. Coupled with inode reuse, this test
// used to deadlock go-fuse quite quickly.
//
// Note: Run as
//
// TMPDIR=/var/tmp go test -run TestFsstress
//
// to make sure the backing filesystem is ext4. /tmp is tmpfs on modern Linux
// distributions, and tmpfs does not reuse inode numbers, hiding the problem.
func TestFsstress(t *testing.T) {
tc := newTestCase(t, &testOptions{suppressDebug: true, attrCache: true, entryCache: true})
defer tc.Clean()
......@@ -409,11 +418,14 @@ func TestFsstress(t *testing.T) {
defer runtime.GOMAXPROCS(old)
}
const concurrency = 10
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
ops := map[string]func(string) error{
// operations taking 1 path argument
ops1 := map[string]func(string) error{
"mkdir": func(p string) error { return syscall.Mkdir(p, 0700) },
"rmdir": func(p string) error { return syscall.Rmdir(p) },
"mknod_reg": func(p string) error { return syscall.Mknod(p, 0700|syscall.S_IFREG, 0) },
"remove": os.Remove,
"unlink": syscall.Unlink,
......@@ -429,13 +441,60 @@ func TestFsstress(t *testing.T) {
return err
},
}
// operations taking 2 path arguments
ops2 := map[string]func(string, string) error{
"rename": syscall.Rename,
"link": syscall.Link,
}
type opStats struct {
ok *int64
fail *int64
hung *int64
}
stats := make(map[string]opStats)
// pathN() returns something like /var/tmp/TestFsstress/TestFsstress.4
pathN := func(n int) string {
return fmt.Sprintf("%s/%s.%d", tc.mntDir, t.Name(), n)
}
opLoop := func(k string, n int) {
defer wg.Done()
op := ops[k]
op := ops1[k]
for {
p := fmt.Sprintf("%s/%s.%d", tc.mntDir, t.Name(), n)
op(p)
p := pathN(1)
atomic.AddInt64(stats[k].hung, 1)
err := op(p)
atomic.AddInt64(stats[k].hung, -1)
if err != nil {
atomic.AddInt64(stats[k].fail, 1)
} else {
atomic.AddInt64(stats[k].ok, 1)
}
select {
case <-ctx.Done():
return
default:
}
}
}
op2Loop := func(k string, n int) {
defer wg.Done()
op := ops2[k]
n2 := (n + 1) % concurrency
for {
p1 := pathN(n)
p2 := pathN(n2)
atomic.AddInt64(stats[k].hung, 1)
err := op(p1, p2)
atomic.AddInt64(stats[k].hung, -1)
if err != nil {
atomic.AddInt64(stats[k].fail, 1)
} else {
atomic.AddInt64(stats[k].ok, 1)
}
select {
case <-ctx.Done():
return
......@@ -444,18 +503,22 @@ func TestFsstress(t *testing.T) {
}
}
readdirLoop := func() {
readdirLoop := func(k string) {
defer wg.Done()
for {
atomic.AddInt64(stats[k].hung, 1)
f, err := os.Open(tc.mntDir)
if err != nil {
panic(err)
}
_, err = f.Readdir(0)
if err != nil {
panic(err)
atomic.AddInt64(stats[k].fail, 1)
} else {
atomic.AddInt64(stats[k].ok, 1)
}
f.Close()
atomic.AddInt64(stats[k].hung, -1)
select {
case <-ctx.Done():
return
......@@ -464,16 +527,39 @@ func TestFsstress(t *testing.T) {
}
}
for i := 1; i < 10; i++ {
for k := range ops {
// prepare stats map
var allOps []string
for k := range ops1 {
allOps = append(allOps, k)
}
for k := range ops2 {
allOps = append(allOps, k)
}
allOps = append(allOps, "readdir")
for _, k := range allOps {
var i1, i2, i3 int64
stats[k] = opStats{ok: &i1, fail: &i2, hung: &i3}
}
// spawn worker goroutines
for i := 0; i < concurrency; i++ {
for k := range ops1 {
wg.Add(1)
go opLoop(k, i)
}
for k := range ops2 {
wg.Add(1)
go op2Loop(k, i)
}
}
{
k := "readdir"
wg.Add(1)
go readdirLoop()
go readdirLoop(k)
}
// spawn ls loop
//
// An external "ls" loop has a destructive effect that I am unable to
// reproduce through in-process operations.
if strings.ContainsAny(tc.mntDir, "'\\") {
......@@ -488,12 +574,42 @@ func TestFsstress(t *testing.T) {
}
defer cmd.Process.Kill()
// Run the test for 10 seconds
time.Sleep(10 * time.Second)
// Run the test for 1 second. If it deadlocks, it usually does within 20ms.
time.Sleep(1 * time.Second)
cancel()
// waitTimeout waits for the waitgroup for the specified max timeout.
// Returns true if waiting timed out.
waitTimeout := func(wg *sync.WaitGroup, timeout time.Duration) bool {
c := make(chan struct{})
go func() {
defer close(c)
wg.Wait()
}()
select {
case <-c:
return false // completed normally
case <-time.After(timeout):
return true // timed out
}
}
if waitTimeout(&wg, time.Second) {
t.Errorf("timeout waiting for goroutines to exit (deadlocked?)")
}
// Print operation statistics
var keys []string
for k := range stats {
keys = append(keys, k)
}
sort.Strings(keys)
t.Logf("Operation statistics:")
for _, k := range keys {
v := stats[k]
t.Logf("%10s: %5d ok, %6d fail, %2d hung", k, *v.ok, *v.fail, *v.hung)
}
}
func init() {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment