Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
bcc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
bcc
Commits
4c2b5388
Commit
4c2b5388
authored
Aug 30, 2016
by
Brendan Gregg
Committed by
GitHub
Aug 30, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #677 from markdrayton/fileslower
fileslower: use de->d_name.name, add filtering
parents
4ebb7cf1
74347318
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
92 additions
and
55 deletions
+92
-55
man/man8/fileslower.8
man/man8/fileslower.8
+4
-1
man/man8/filetop.8
man/man8/filetop.8
+6
-2
tools/fileslower.py
tools/fileslower.py
+33
-20
tools/fileslower_example.txt
tools/fileslower_example.txt
+2
-1
tools/filetop.py
tools/filetop.py
+38
-21
tools/filetop_example.txt
tools/filetop_example.txt
+9
-10
No files found.
man/man8/fileslower.8
View file @
4c2b5388
...
...
@@ -2,7 +2,7 @@
.SH NAME
fileslower \- Trace slow synchronous file reads and writes.
.SH SYNOPSIS
.B fileslower [\-h] [\-p PID] [min_ms]
.B fileslower [\-h] [\-p PID] [
-a] [
min_ms]
.SH DESCRIPTION
This script uses kernel dynamic tracing of synchronous reads and writes
at the VFS interface, to identify slow file reads and writes for any file
...
...
@@ -30,6 +30,9 @@ CONFIG_BPF and bcc.
\-p PID
Trace this PID only.
.TP
\-a
Include non-regular file types in output (sockets, FIFOs, etc).
.TP
min_ms
Minimum I/O latency (duration) to trace, in milliseconds. Default is 10 ms.
.SH EXAMPLES
...
...
man/man8/filetop.8
View file @
4c2b5388
...
...
@@ -8,7 +8,8 @@ This is top for files.
This traces file reads and writes, and prints a per-file summary every
interval (by default, 1 second). The summary is sorted on the highest read
throughput (Kbytes).
throughput (Kbytes). By default only IO on regular files is shown. The -a
option will list all file types (sokets, FIFOs, etc).
This uses in-kernel eBPF maps to store per process summaries for efficiency.
...
...
@@ -29,6 +30,9 @@ Since this uses BPF, only the root user can use this tool.
CONFIG_BPF and bcc.
.SH OPTIONS
.TP
\-a
Include non-regular file types (sockets, FIFOs, etc).
.TP
\-C
Don't clear the screen.
.TP
...
...
tools/fileslower.py
View file @
4c2b5388
...
...
@@ -4,7 +4,7 @@
# fileslower Trace slow synchronous file reads and writes.
# For Linux, uses BCC, eBPF.
#
# USAGE: fileslower [-h] [-p PID] [min_ms]
# USAGE: fileslower [-h] [-p PID] [
-a] [
min_ms]
#
# This script uses kernel dynamic tracing of synchronous reads and writes
# at the VFS interface, to identify slow file reads and writes for any file
...
...
@@ -32,7 +32,6 @@ from __future__ import print_function
from
bcc
import
BPF
import
argparse
import
ctypes
as
ct
import
signal
import
time
# arguments
...
...
@@ -45,19 +44,17 @@ parser = argparse.ArgumentParser(
description
=
"Trace slow synchronous file reads and writes"
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
examples
)
parser
.
add_argument
(
"-p"
,
"--pid"
,
parser
.
add_argument
(
"-p"
,
"--pid"
,
type
=
int
,
metavar
=
"PID"
,
dest
=
"tgid"
,
help
=
"trace this PID only"
)
parser
.
add_argument
(
"-a"
,
"--all-files"
,
action
=
"store_true"
,
help
=
"include non-regular file types (sockets, FIFOs, etc)"
)
parser
.
add_argument
(
"min_ms"
,
nargs
=
"?"
,
default
=
'10'
,
help
=
"minimum I/O duration to trace, in ms (default 10)"
)
args
=
parser
.
parse_args
()
min_ms
=
int
(
args
.
min_ms
)
pid
=
args
.
p
id
tgid
=
args
.
tg
id
debug
=
0
# signal handler
def
signal_ignore
(
signal
,
frame
):
print
()
# define BPF program
bpf_text
=
"""
#include <uapi/linux/ptrace.h>
...
...
@@ -72,6 +69,8 @@ enum trace_mode {
struct val_t {
u32 sz;
u64 ts;
u32 name_len;
// de->d_name.name may point to de->d_iname so limit len accordingly
char name[DNAME_INLINE_LEN];
char comm[TASK_COMM_LEN];
};
...
...
@@ -81,6 +80,7 @@ struct data_t {
u32 pid;
u32 sz;
u64 delta_us;
u32 name_len;
char name[DNAME_INLINE_LEN];
char comm[TASK_COMM_LEN];
};
...
...
@@ -92,22 +92,25 @@ BPF_PERF_OUTPUT(events);
static int trace_rw_entry(struct pt_regs *ctx, struct file *file,
char __user *buf, size_t count)
{
u32 pid;
pid = bpf_get_current_pid_tgid();
if (FILTER)
u32 tgid = bpf_get_current_pid_tgid() >> 32;
if (TGID_FILTER)
return 0;
u32 pid = bpf_get_current_pid_tgid();
// skip I/O lacking a filename
struct dentry *de = file->f_path.dentry;
if (de->d_iname[0] == 0)
int mode = file->f_inode->i_mode;
if (de->d_name.len == 0 || TYPE_FILTER)
return 0;
// store size and timestamp by pid
struct val_t val = {};
val.sz = count;
val.ts = bpf_ktime_get_ns();
__builtin_memcpy(&val.name, de->d_iname, sizeof(val.name));
val.name_len = de->d_name.len;
bpf_probe_read(&val.name, sizeof(val.name), (void *)de->d_name.name);
bpf_get_current_comm(&val.comm, sizeof(val.comm));
entryinfo.update(&pid, &val);
...
...
@@ -153,6 +156,7 @@ static int trace_rw_return(struct pt_regs *ctx, int type)
data.pid = pid;
data.sz = valp->sz;
data.delta_us = delta_us;
data.name_len = valp->name_len;
bpf_probe_read(&data.name, sizeof(data.name), valp->name);
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
events.perf_submit(ctx, &data, sizeof(data));
...
...
@@ -172,15 +176,20 @@ int trace_write_return(struct pt_regs *ctx)
"""
bpf_text
=
bpf_text
.
replace
(
'MIN_US'
,
str
(
min_ms
*
1000
))
if
args
.
p
id
:
bpf_text
=
bpf_text
.
replace
(
'
FILTER'
,
'pid != %s'
%
p
id
)
if
args
.
tg
id
:
bpf_text
=
bpf_text
.
replace
(
'
TGID_FILTER'
,
'tgid != %d'
%
tg
id
)
else
:
bpf_text
=
bpf_text
.
replace
(
'FILTER'
,
'0'
)
bpf_text
=
bpf_text
.
replace
(
'TGID_FILTER'
,
'0'
)
if
args
.
all_files
:
bpf_text
=
bpf_text
.
replace
(
'TYPE_FILTER'
,
'0'
)
else
:
bpf_text
=
bpf_text
.
replace
(
'TYPE_FILTER'
,
'!S_ISREG(mode)'
)
if
debug
:
print
(
bpf_text
)
# initialize BPF
b
=
BPF
(
text
=
bpf_text
)
b
=
BPF
(
text
=
bpf_text
,
)
# I'd rather trace these via new_sync_read/new_sync_write (which used to be
# do_sync_read/do_sync_write), but those became static. So trace these from
...
...
@@ -205,6 +214,7 @@ class Data(ct.Structure):
(
"pid"
,
ct
.
c_uint
),
(
"sz"
,
ct
.
c_uint
),
(
"delta_us"
,
ct
.
c_ulonglong
),
(
"name_len"
,
ct
.
c_uint
),
(
"name"
,
ct
.
c_char
*
DNAME_INLINE_LEN
),
(
"comm"
,
ct
.
c_char
*
TASK_COMM_LEN
),
]
...
...
@@ -216,7 +226,7 @@ mode_s = {
# header
print
(
"Tracing sync read/writes slower than %d ms"
%
min_ms
)
print
(
"%-8s %-14s %-6s %1s %-7s %7s %s"
%
(
"TIME(s)"
,
"COMM"
,
"
P
ID"
,
"D"
,
print
(
"%-8s %-14s %-6s %1s %-7s %7s %s"
%
(
"TIME(s)"
,
"COMM"
,
"
T
ID"
,
"D"
,
"BYTES"
,
"LAT(ms)"
,
"FILENAME"
))
start_ts
=
time
.
time
()
...
...
@@ -225,10 +235,13 @@ def print_event(cpu, data, size):
event
=
ct
.
cast
(
data
,
ct
.
POINTER
(
Data
)).
contents
ms
=
float
(
event
.
delta_us
)
/
1000
name
=
event
.
name
if
event
.
name_len
>
DNAME_INLINE_LEN
:
name
=
name
[:
-
3
]
+
"..."
print
(
"%-8.3f %-14.14s %-6s %1s %-7s %7.2f %s"
%
(
time
.
time
()
-
start_ts
,
event
.
comm
,
event
.
pid
,
mode_s
[
event
.
mode
],
event
.
sz
,
ms
,
event
.
name
))
event
.
sz
,
ms
,
name
))
b
[
"events"
].
open_perf_buffer
(
print_event
)
while
1
:
...
...
tools/fileslower_example.txt
View file @
4c2b5388
...
...
@@ -105,7 +105,7 @@ locks, run queue latency, etc. These can be explored using other commands.
USAGE message:
# ./fileslower -h
usage: fileslower
[-h] [-p PID
] [min_ms]
usage: fileslower
.py [-h] [-p PID] [-a
] [min_ms]
Trace slow synchronous file reads and writes
...
...
@@ -115,6 +115,7 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-p PID, --pid PID trace this PID only
-a, --all-files include non-regular file types
examples:
./fileslower # trace sync file I/O slower than 10 ms (default)
...
...
tools/filetop.py
View file @
4c2b5388
...
...
@@ -32,11 +32,13 @@ parser = argparse.ArgumentParser(
description
=
"File reads and writes by process"
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
examples
)
parser
.
add_argument
(
"-a"
,
"--all-files"
,
action
=
"store_true"
,
help
=
"include non-regular file types (sockets, FIFOs, etc)"
)
parser
.
add_argument
(
"-C"
,
"--noclear"
,
action
=
"store_true"
,
help
=
"don't clear the screen"
)
parser
.
add_argument
(
"-r"
,
"--maxrows"
,
default
=
20
,
help
=
"maximum rows to print, default 20"
)
parser
.
add_argument
(
"-p"
,
"--pid"
,
parser
.
add_argument
(
"-p"
,
"--pid"
,
type
=
int
,
metavar
=
"PID"
,
dest
=
"tgid"
,
help
=
"trace this PID only"
)
parser
.
add_argument
(
"interval"
,
nargs
=
"?"
,
default
=
1
,
help
=
"output interval, in seconds"
)
...
...
@@ -61,13 +63,13 @@ bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
#define MAX_FILE_LEN 32
// the key for the output summary
struct info_t {
u32 pid;
char name[TASK_COMM_LEN];
char file[MAX_FILE_LEN];
u32 name_len;
char comm[TASK_COMM_LEN];
// de->d_name.name may point to de->d_iname so limit len accordingly
char name[DNAME_INLINE_LEN];
char type;
};
...
...
@@ -84,22 +86,23 @@ BPF_HASH(counts, struct info_t, struct val_t);
static int do_entry(struct pt_regs *ctx, struct file *file,
char __user *buf, size_t count, int is_read)
{
u32 pid;
pid = bpf_get_current_pid_tgid();
if (FILTER)
u32 tgid = bpf_get_current_pid_tgid() >> 32;
if (TGID_FILTER)
return 0;
u32 pid = bpf_get_current_pid_tgid();
// skip I/O lacking a filename
struct dentry *de = file->f_path.dentry;
if (de->d_iname[0] == 0)
int mode = file->f_inode->i_mode;
if (de->d_name.len == 0 || TYPE_FILTER)
return 0;
// store counts and sizes by pid & file
struct info_t info = {.pid = pid};
bpf_get_current_comm(&info.
name, sizeof(info.name
));
__builtin_memcpy(&info.file, de->d_iname, sizeof(info.file))
;
int mode = file->f_inode->i_mode
;
bpf_get_current_comm(&info.
comm, sizeof(info.comm
));
info.name_len = de->d_name.len
;
bpf_probe_read(&info.name, sizeof(info.name), (void *)de->d_name.name)
;
if (S_ISREG(mode)) {
info.type = 'R';
} else if (S_ISSOCK(mode)) {
...
...
@@ -134,17 +137,28 @@ int trace_write_entry(struct pt_regs *ctx, struct file *file,
}
"""
if
args
.
pid
:
bpf_text
=
bpf_text
.
replace
(
'FILTER'
,
'pid != %s'
%
args
.
pid
)
if
args
.
tgid
:
bpf_text
=
bpf_text
.
replace
(
'TGID_FILTER'
,
'tgid != %d'
%
args
.
tgid
)
else
:
bpf_text
=
bpf_text
.
replace
(
'TGID_FILTER'
,
'0'
)
if
args
.
all_files
:
bpf_text
=
bpf_text
.
replace
(
'TYPE_FILTER'
,
'0'
)
else
:
bpf_text
=
bpf_text
.
replace
(
'FILTER'
,
'0'
)
bpf_text
=
bpf_text
.
replace
(
'TYPE_FILTER'
,
'!S_ISREG(mode)'
)
if
debug
:
print
(
bpf_text
)
# initialize BPF
b
=
BPF
(
text
=
bpf_text
)
b
.
attach_kprobe
(
event
=
"__vfs_read"
,
fn_name
=
"trace_read_entry"
)
b
.
attach_kprobe
(
event
=
"__vfs_write"
,
fn_name
=
"trace_write_entry"
)
try
:
b
.
attach_kprobe
(
event
=
"__vfs_write"
,
fn_name
=
"trace_write_entry"
)
except
:
# older kernels don't have __vfs_write so try vfs_write instead
b
.
attach_kprobe
(
event
=
"vfs_write"
,
fn_name
=
"trace_write_entry"
)
DNAME_INLINE_LEN
=
32
# linux/dcache.h
print
(
'Tracing... Output every %d secs. Hit Ctrl-C to end'
%
interval
)
...
...
@@ -163,19 +177,22 @@ while 1:
print
()
with
open
(
loadavg
)
as
stats
:
print
(
"%-8s loadavg: %s"
%
(
strftime
(
"%H:%M:%S"
),
stats
.
read
()))
print
(
"%-6s %-16s %-6s %-6s %-7s %-7s %1s %s"
%
(
"
P
ID"
,
"COMM"
,
print
(
"%-6s %-16s %-6s %-6s %-7s %-7s %1s %s"
%
(
"
T
ID"
,
"COMM"
,
"READS"
,
"WRITES"
,
"R_Kb"
,
"W_Kb"
,
"T"
,
"FILE"
))
# by-
P
ID output
# by-
T
ID output
counts
=
b
.
get_table
(
"counts"
)
line
=
0
for
k
,
v
in
reversed
(
sorted
(
counts
.
items
(),
key
=
lambda
counts
:
counts
[
1
].
rbytes
)):
name
=
k
.
name
if
k
.
name_len
>
DNAME_INLINE_LEN
:
name
=
name
[:
-
3
]
+
"..."
# print line
print
(
"%-6d %-16s %-6d %-6d %-7d %-7d %1s %s"
%
(
k
.
pid
,
k
.
name
,
print
(
"%-6d %-16s %-6d %-6d %-7d %-7d %1s %s"
%
(
k
.
pid
,
k
.
comm
,
v
.
reads
,
v
.
writes
,
v
.
rbytes
/
1024
,
v
.
wbytes
/
1024
,
k
.
type
,
k
.
fil
e
))
nam
e
))
line
+=
1
if
line
>=
maxrows
:
...
...
tools/filetop_example.txt
View file @
4c2b5388
...
...
@@ -24,11 +24,9 @@ PID COMM READS WRITES R_Kb W_Kb T FILE
26628 ld 18 0 72 0 R hibernate.o
26628 ld 16 0 64 0 R suspend.o
26628 ld 16 0 64 0 R snapshot.o
26630 cat 1 0 64 0 O null
26628 ld 16 0 64 0 R qos.o
26628 ld 13 0 52 0 R main.o
26628 ld 12 0 52 0 R swap.o
12421 sshd 3 0 48 0 O ptmx
[...]
This shows various files read and written during a Linux kernel build. The
...
...
@@ -40,7 +38,8 @@ While not printed, the average read and write size can be calculated by
dividing R_Kb by READS, and the same for writes.
The "T" column indicates the type of the file: "R" for regular files, "S" for
sockets, and "O" for other (including pipes).
sockets, and "O" for other (including pipes). By default only regular files are
shown; use the -a option to show all file types.
This script works by tracing the vfs_read() and vfs_write() functions using
kernel dynamic tracing, which instruments explicit read and write calls. If
...
...
@@ -65,7 +64,6 @@ Tracing... Output every 1 secs. Hit Ctrl-C to end
PID COMM READS WRITES R_Kb W_Kb T FILE
32672 cksum 5006 0 320384 0 R data1
12296 sshd 2 0 32 0 O ptmx
809 run 2 0 8 0 R nsswitch.conf
811 run 2 0 8 0 R nsswitch.conf
804 chown 2 0 8 0 R nsswitch.conf
...
...
@@ -89,13 +87,13 @@ PID COMM READS WRITES R_Kb W_Kb T FILE
848 run 2 0 8 0 R nsswitch.conf
[...]
This output shows a cksum command reading data1.
Note that
This output shows a cksum command reading data1.
An optional interval and optional count can also be added to the end of the
command line. For example, for 1 second interval, and 3 summaries in total:
# ./filetop -Cr 5 1 3
# ./filetop -Cr 5
-a
1 3
Tracing... Output every 1 secs. Hit Ctrl-C to end
08:08:20 loadavg: 0.30 0.42 0.31 3/282 5187
...
...
@@ -126,15 +124,15 @@ PID COMM READS WRITES R_Kb W_Kb T FILE
5227 run 2 0 8 0 R nsswitch.conf
Detaching...
This example
has caught heavy socket I/O from an sshd process, showing up as
non-regular file types (the "O" for other, and "S" for socket, in the type
column: "T").
This example
shows the -a option to include all file types. It caught heavy
socket I/O from an sshd process, showing up as non-regular file types (the "O"
for other, and "S" for socket, in the type column: "T").
USAGE message:
# ./filetop -h
usage: filetop
[-h
] [-C] [-r MAXROWS] [-p PID] [interval] [count]
usage: filetop
.py [-h] [-a
] [-C] [-r MAXROWS] [-p PID] [interval] [count]
File reads and writes by process
...
...
@@ -144,6 +142,7 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-a, --all-files include non-regular file types (sockets, FIFOs, etc)
-C, --noclear don't clear the screen
-r MAXROWS, --maxrows MAXROWS
maximum rows to print, default 20
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment