Commit 904ef0cc authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Han-Wen Nienhuys

fuse/test: Fix TestFopenKeepCache

This test was flaky even before ce2558b4 (fuse/test: disable
TestFopenKeepCache) because after file change it was reporting to kernel
both different size (before=6, after=5) and potentially different mtime.
The kernel is known to invalidate file's data cache on size change, and
also to invalidate file's data cache on mtime change if
CAP_AUTO_INVAL_DATA was negotiated at FUSE handshake. Until recently
go-fuse was always using CAP_AUTO_INVAL_DATA mode.

The test was somehow passing on kernels < Linux 4.20 due to the fact
that the write was coming soon after previous lookup/getattr and thus
likely before attributes timeout. The kernel was still seeing old mtime
and size and was not invalidating file cache. If I add just "sleep
enough time for file attributes to expire..." + followup stat from the
patch, the test fails reliably even on older kernels where it used to be
passing:

	=== RUN   TestFopenKeepCache
	17:44:33.397329 rx 1: INIT i0 {7.27 Ra 0x20000 SPLICE_READ,HANDLE_KILLPRIV,IOCTL_DIR,READDIRPLUS,PARALLEL_DIROPS,ABORT_ERROR,POSIX_LOCKS,DONT_MASK,SPLICE_WRITE,SPLICE_MOVE,ASYNC_READ,FLOCK_LOCKS,WRITEBACK_CACHE,POSIX
	17:44:33.397413 tx 1:     OK, {7.23 Ra 0x20000 ASYNC_READ,NO_OPEN_SUPPORT,BIG_WRITES,AUTO_INVAL_DATA,READDIRPLUS,PARALLEL_DIROPS 0/0 Wr 0x10000 Tg 0x0}
	17:44:33.398589 rx 2: LOOKUP i1 [".go-fuse-epoll-hack"] 20b
	17:44:33.398644 tx 2:     2=no such file or directory, {i0 g0 tE=0s tA=0s {M00 SZ=0 L=0 0:0 B0*0 i0:0 A 0.000000 M 0.000000 C 0.000000}}
	17:44:33.398686 rx 3: CREATE i1 {0100100 [WRONLY,TRUNC,CREAT,0x8000] (022)} [".go-fuse-epoll-hack"] 20b
	17:44:33.398710 tx 3:     OK, {i18446744073709551615 g0 {M0100644 SZ=0 L=1 0:0 B0*0 i0:18446744073709551615 A 0.000000 M 0.000000 C 0.000000} &{18446744073709551615 0 0}}
	17:44:33.398745 rx 4: POLL i18446744073709551615
	17:44:33.398753 tx 4:     38=function not implemented
	17:44:33.399446 rx 5: FLUSH i18446744073709551615 {Fh 18446744073709551615}
	17:44:33.399466 tx 5:     5=input/output error
	17:44:33.399540 rx 6: RELEASE i18446744073709551615 {Fh 18446744073709551615 WRONLY,0x8000  L0}
	17:44:33.399548 tx 6:     5=input/output error
	17:44:33.399567 rx 7: LOOKUP i1 ["file.txt"] 9b
	17:44:33.399648 tx 7:     OK, {i3 g2 tE=1s tA=0.01s {M0100644 SZ=6 L=1 1000:1000 B8*4096 i0:841351 A 1552833873.399069 M 1552833873.399069 C 1552833873.399069}}
	17:44:33.399936 rx 8: OPEN i3 {O_RDONLY,0x8000}
	17:44:33.399976 tx 8:     OK, {Fh 2 CACHE}
	17:44:33.400045 rx 9: READ i3 {Fh 2 [0 +4096)  L 0 NONBLOCK,0x8000}
	17:44:33.400065 tx 9:     OK,  4096b data (fd data)
	17:44:33.400185 rx 10: GETATTR i3 {Fh 2}
	17:44:33.400261 tx 10:     OK, {tA=0.01s {M0100644 SZ=6 L=1 1000:1000 B8*4096 i0:841351 A 1552833873.399069 M 1552833873.399069 C 1552833873.399069}}
	17:44:33.400296 rx 11: FLUSH i3 {Fh 2}
	17:44:33.400305 tx 11:     OK
	17:44:33.400324 rx 12: RELEASE i3 {Fh 2 NONBLOCK,0x8000  L0}
	17:44:33.400334 tx 12:     OK

		sleep here

	17:44:33.500843 rx 13: GETATTR i3 {Fh 0}
	17:44:33.500939 tx 13:     OK, {tA=0.01s {M0100644 SZ=5 L=1 1000:1000 B8*4096 i0:841351 A 1552833873.399069 M 1552833873.399069 C 1552833873.399069}}
	17:44:33.501118 rx 14: OPEN i3 {O_RDONLY,0x8000}
	17:44:33.501195 tx 14:     OK, {Fh 2 CACHE}
	17:44:33.501468 rx 15: READ i3 {Fh 2 [0 +4096)  L 0 NONBLOCK,0x8000}
	17:44:33.501500 tx 15:     OK,  4096b data (fd data)
	17:44:33.501582 rx 16: GETATTR i3 {Fh 2}
	17:44:33.501625 tx 16:     OK, {tA=0.01s {M0100644 SZ=5 L=1 1000:1000 B8*4096 i0:841351 A 1552833873.499071 M 1552833873.399069 C 1552833873.399069}}
	17:44:33.502176 rx 17: FLUSH i3 {Fh 2}
	17:44:33.502210 tx 17:     OK
	17:44:33.502268 rx 18: RELEASE i3 {Fh 2 NONBLOCK,0x8000  L0}
	17:44:33.502296 tx 18:     OK
	17:44:33.547469 received ENODEV (unmount request), thread exiting
	17:44:33.547471 received ENODEV (unmount request), thread exiting
	17:44:33.547469 received ENODEV (unmount request), thread exiting
	--- FAIL: TestFopenKeepCache (0.15s)
	    cache_test.go:147: ReadFile: got "after", want cached "before"

In other words the test was racy and was passing only due to likely
conditions to win a race in particular environment. Here is example debug trace
when that particular conditions are met:

	17:52:00.119419 rx 7: LOOKUP i1 ["file.txt"] 9b
	17:52:00.119818 tx 7:     OK, {i3 g2 tE=1s tA=0.01s {M0100644 SZ=6 L=1 1000:1000 B8*4096 i0:853832 A 1552834320.116131 M 1552834320.116131 C 1552834320.116131}}
	17:52:00.122865 rx 8: OPEN i3 {O_RDONLY,0x8000}
	17:52:00.122889 tx 8:     OK, {Fh 2 CACHE}
	17:52:00.122933 rx 9: READ i3 {Fh 2 [0 +4096)  L 0 NONBLOCK,0x8000}
	17:52:00.122957 tx 9:     OK,  4096b data (fd data)
	17:52:00.123014 rx 10: GETATTR i3 {Fh 2}
	17:52:00.123031 tx 10:     OK, {tA=0.01s {M0100644 SZ=6 L=1 1000:1000 B8*4096 i0:853832 A 1552834320.116131 M 1552834320.116131 C 1552834320.116131}}
	17:52:00.123050 rx 11: FLUSH i3 {Fh 2}
	17:52:00.123056 tx 11:     OK
	17:52:00.123071 rx 12: RELEASE i3 {Fh 2 NONBLOCK,0x8000  L0}
	17:52:00.123082 tx 12:     OK

	17:52:00.123105 rx 13: OPEN i3 {O_RDONLY,0x8000} 		<-- NOTE: OPEN, but no GETATTR around
	17:52:00.123124 tx 13:     OK, {Fh 2 CACHE}
	17:52:00.123146 rx 14: FLUSH i3 {Fh 2}
	17:52:00.123152 tx 14:     OK
	17:52:00.123164 rx 15: RELEASE i3 {Fh 2 NONBLOCK,0x8000  L0}
	17:52:00.123183 tx 0:     NOTIFY_INVAL_ENTRY, {parent i1 sz 8} "file.txt"
	17:52:00.123186 tx 15:     OK

However starting from Linux 4.20 the kernel started to always issue
GETATTR request around second OPEN, for example:

	18:34:22.323238 rx 26: LOOKUP i1 ["file.txt"] 9b
	18:34:22.323309 tx 26:     OK, {i3 g2 tE=1s tA=1s {M0100644 SZ=6 L=1 1000:1000 B8*4096 i0:1531145 A 1550252062.321237 M 1550252062.321237 C 1550252062.321237}}
	18:34:22.323339 rx 28: OPEN i3 {O_RDONLY,0x8000}
	18:34:22.323384 tx 28:     OK, {Fh 2 CACHE}
	18:34:22.323441 rx 30: READ i3 {Fh 2 [0 +4096)  L 0 NONBLOCK,0x8000}
	18:34:22.323477 tx 30:     OK,  4096b data (fd data)
	18:34:22.323534 rx 32: FLUSH i3 {Fh 2}
	18:34:22.323546 tx 32:     OK
	18:34:22.323577 rx 34: RELEASE i3 {Fh 2 NONBLOCK,0x8000  L0}
	18:34:22.323594 tx 34:     OK
	18:34:22.323611 rx 36: OPEN i3 {O_RDONLY,0x8000}		<-- NOTE: OPEN with GETATTR around
	18:34:22.323636 tx 36:     OK, {Fh 2 CACHE}
	18:34:22.323661 rx 38: GETATTR i3 {Fh 0}
	18:34:22.323684 tx 38:     OK, {tA=1s {M0100644 SZ=5 L=1 1000:1000 B8*4096 i0:1531145 A 1550252062.322237 M 1550252062.322237 C 1550252062.322237}}
	18:34:22.323729 rx 40: READ i3 {Fh 2 [0 +4096)  L 0 NONBLOCK,0x8000}
	18:34:22.323740 tx 40:     OK,  4096b data (fd data)

which almost always triggers conditions to invalidate data cache on kernel side
(different size and different mtime).

The kernel is not doing anything wrong here - it is allowed to issue
GETATTR request at any time. It is thus only a kernel behaviour change,
still being valid from FUSE protocol point of view, not a kernel
regression.

-> Fix the test

- by disabling CAP_AUTO_INVAL_DATA via ExplicitDataCacheControl. This
  should stop kernel from dropping data cache on mtime change;
- by using the same size for before and after states. This avoid hitting
  cache being dropped when kernel sees file size being changed.

Make the test more picky trying to hit the pain points:

- make sure that mtime of file.txt at before and after states are
  different. Without added sleep the panic on mtime δ == 0 is triggered
  reliably on my notebook.
- issue explicit stat before second open to force kernel to
  relookup/regetattr the file and reread the attributes.

Hopefully finally fixes https://github.com/hanwen/go-fuse/issues/168.
parent 0d80af72
......@@ -6,11 +6,13 @@ package test
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"runtime"
"sync"
"testing"
"time"
"github.com/hanwen/go-fuse/fuse"
"github.com/hanwen/go-fuse/fuse/nodefs"
......@@ -44,9 +46,17 @@ func setupCacheTest(t *testing.T) (string, *pathfs.PathNodeFs, func()) {
}
pfs := pathfs.NewPathNodeFs(fs, &pathfs.PathNodeFsOptions{Debug: testutil.VerboseTest()})
mntOpts := &fuse.MountOptions{
// ask kernel not to invalidate file data automatically
ExplicitDataCacheControl: true,
Debug: testutil.VerboseTest(),
}
opts := nodefs.NewOptions()
opts.AttrTimeout = 10*time.Millisecond
opts.Debug = testutil.VerboseTest()
state, _, err := nodefs.MountRoot(dir+"/mnt", pfs.Root(), opts)
state, _, err := nodefs.Mount(dir+"/mnt", pfs.Root(), mntOpts, opts)
if err != nil {
t.Fatalf("MountNodeFileSystem failed: %v", err)
}
......@@ -67,10 +77,6 @@ func TestFopenKeepCache(t *testing.T) {
t.Skip("FOPEN_KEEP_CACHE is broken on Darwin.")
}
// Failing on 4.20.5-200.fc29.x86_64.
// reported as https://github.com/libfuse/libfuse/issues/362
t.Skip("started failing on linux")
wd, pathfs, clean := setupCacheTest(t)
defer clean()
......@@ -87,16 +93,50 @@ func TestFopenKeepCache(t *testing.T) {
t.Fatal(err)
}
}
// xstat stats path and fails on error
xstat := func(path string) os.FileInfo {
st, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
return st
}
// XXX Linux FUSE client automatically invalidates cache of a file if it sees size change.
// As workaround we keep len(before) == len(after) to avoid that codepath.
// See https://github.com/hanwen/go-fuse/pull/273 for details.
//
// TODO use len(before) != len(after) if kernel supports precise control over data cache.
before := "before"
after := "after"
after := "afterX"
if len(before) != len(after) {
panic("len(before) != len(after)")
}
xwriteFile(wd+"/orig/file.txt", before)
mtimeBefore := xstat(wd + "/orig/file.txt").ModTime()
c := xreadFile(wd + "/mnt/file.txt")
if c != before {
t.Fatalf("ReadFile: got %q, want %q", c, before)
}
// sleep a bit to make sure mtime of file for before and after are different
time.Sleep(20*time.Millisecond)
xwriteFile(wd+"/orig/file.txt", after)
mtimeAfter := xstat(wd + "/orig/file.txt").ModTime()
if δ := mtimeAfter.Sub(mtimeBefore); δ == 0 {
panic(fmt.Sprintf("mtime(orig/before) == mtime(orig/after)"))
}
// sleep enough time for file attributes to expire; restat the file after.
// this forces kernel client to relookup/regetattr the file and reread the attributes.
//
// this way we make sure the kernel knows updated size/mtime before we
// try to read the file next time.
time.Sleep(100*time.Millisecond)
_ = xstat(wd + "/mnt/file.txt")
c = xreadFile(wd + "/mnt/file.txt")
if c != before {
t.Fatalf("ReadFile: got %q, want cached %q", c, before)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment