Commit 330e4f2b authored by Brendan Gregg's avatar Brendan Gregg

Merge pull request #514 from mcaleavya/master

updated opensnoop to use bpf_perf_output
parents 751ac54f 3c446c7d
...@@ -12,6 +12,10 @@ on startup. ...@@ -12,6 +12,10 @@ on startup.
This works by tracing the kernel sys_open() function using dynamic tracing, and This works by tracing the kernel sys_open() function using dynamic tracing, and
will need updating to match any changes to this function. will need updating to match any changes to this function.
This makes use of a Linux 4.5 feature (bpf_perf_event_output());
for kernels older than 4.5, see the version under tools/old,
which uses an older mechanism.
Since this uses BPF, only the root user can use this tool. Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS .SH REQUIREMENTS
CONFIG_BPF and bcc. CONFIG_BPF and bcc.
......
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# opensnoop Trace open() syscalls.
# For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: opensnoop [-h] [-t] [-x] [-p PID]
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 17-Sep-2015 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
import argparse
# arguments
examples = """examples:
./opensnoop # trace all open() syscalls
./opensnoop -t # include timestamps
./opensnoop -x # only show failed opens
./opensnoop -p 181 # only trace PID 181
"""
parser = argparse.ArgumentParser(
description="Trace open() syscalls",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-t", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-x", "--failed", action="store_true",
help="only show failed opens")
parser.add_argument("-p", "--pid",
help="trace this PID only")
args = parser.parse_args()
debug = 0
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
BPF_HASH(args_filename, u32, const char *);
int kprobe__sys_open(struct pt_regs *ctx, const char __user *filename)
{
u32 pid = bpf_get_current_pid_tgid();
FILTER
args_filename.update(&pid, &filename);
return 0;
};
int kretprobe__sys_open(struct pt_regs *ctx)
{
const char **filenamep;
int ret = ctx->ax;
u32 pid = bpf_get_current_pid_tgid();
filenamep = args_filename.lookup(&pid);
if (filenamep == 0) {
// missed entry
return 0;
}
bpf_trace_printk("%d %s\\n", ret, *filenamep);
args_filename.delete(&pid);
return 0;
}
"""
if args.pid:
bpf_text = bpf_text.replace('FILTER',
'if (pid != %s) { return 0; }' % args.pid)
else:
bpf_text = bpf_text.replace('FILTER', '')
if debug:
print(bpf_text)
# initialize BPF
b = BPF(text=bpf_text)
# header
if args.timestamp:
print("%-14s" % ("TIME(s)"), end="")
print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
start_ts = 0
# format output
while 1:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
(ret_s, filename) = msg.split(" ", 1)
ret = int(ret_s)
if (args.failed and (ret >= 0)):
continue
# split return value into FD and errno columns
if ret >= 0:
fd_s = ret
err = 0
else:
fd_s = "-1"
err = - ret
# print columns
if args.timestamp:
if start_ts == 0:
start_ts = ts
print("%-14.9f" % (ts - start_ts), end="")
print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename))
...@@ -10,10 +10,12 @@ ...@@ -10,10 +10,12 @@
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# #
# 17-Sep-2015 Brendan Gregg Created this. # 17-Sep-2015 Brendan Gregg Created this.
# 29-Apr-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
from __future__ import print_function from __future__ import print_function
from bcc import BPF from bcc import BPF
import argparse import argparse
import ctypes as ct
# arguments # arguments
examples = """examples: examples = """examples:
...@@ -38,32 +40,67 @@ debug = 0 ...@@ -38,32 +40,67 @@ debug = 0
# define BPF program # define BPF program
bpf_text = """ bpf_text = """
#include <uapi/linux/ptrace.h> #include <uapi/linux/ptrace.h>
#include <uapi/linux/limits.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
u64 ts;
char comm[TASK_COMM_LEN];
const char *fname;
};
struct data_t {
u32 pid;
u64 ts;
u64 delta;
int ret;
char comm[TASK_COMM_LEN];
char fname[NAME_MAX];
};
BPF_HASH(args_filename, u32, const char *); BPF_HASH(args_filename, u32, const char *);
BPF_HASH(infotmp, u32, struct val_t);
BPF_PERF_OUTPUT(events);
int kprobe__sys_open(struct pt_regs *ctx, const char __user *filename) int trace_entry(struct pt_regs *ctx, const char __user *filename)
{ {
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid(); u32 pid = bpf_get_current_pid_tgid();
FILTER FILTER
args_filename.update(&pid, &filename); if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
val.fname = filename;
infotmp.update(&pid, &val);
}
return 0; return 0;
}; };
int kretprobe__sys_open(struct pt_regs *ctx) int trace_return(struct pt_regs *ctx)
{ {
const char **filenamep;
int ret = ctx->ax;
u32 pid = bpf_get_current_pid_tgid(); u32 pid = bpf_get_current_pid_tgid();
struct val_t *valp;
struct data_t data = {};
u64 tsp = bpf_ktime_get_ns();
filenamep = args_filename.lookup(&pid); valp = infotmp.lookup(&pid);
if (filenamep == 0) { if (valp == 0) {
// missed entry // missed entry
return 0; return 0;
} }
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
bpf_trace_printk("%d %s\\n", ret, *filenamep); bpf_probe_read(&data.fname, sizeof(data.fname), (void *)valp->fname);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
data.ts = tsp / 1000;
data.ret = ctx->ax;
events.perf_submit(ctx, &data, sizeof(data));
infotmp.delete(&pid);
args_filename.delete(&pid); args_filename.delete(&pid);
return 0; return 0;
...@@ -79,34 +116,68 @@ if debug: ...@@ -79,34 +116,68 @@ if debug:
# initialize BPF # initialize BPF
b = BPF(text=bpf_text) b = BPF(text=bpf_text)
b.attach_kprobe(event="sys_open", fn_name="trace_entry")
b.attach_kretprobe(event="sys_open", fn_name="trace_return")
TASK_COMM_LEN = 16 # linux/sched.h
NAME_MAX = 255 # linux/limits.h
class Data(ct.Structure):
_fields_ = [
("pid", ct.c_ulonglong),
("ts", ct.c_ulonglong),
("delta", ct.c_ulonglong),
("ret", ct.c_int),
("comm", ct.c_char * TASK_COMM_LEN),
("fname", ct.c_char * NAME_MAX)
]
start_ts = 0
prev_ts = 0
delta = 0
# header # header
if args.timestamp: if args.timestamp:
print("%-14s" % ("TIME(s)"), end="") print("%-14s" % ("TIME(s)"), end="")
print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
start_ts = 0 # process event
def print_event(cpu, data, size):
# format output event = ct.cast(data, ct.POINTER(Data)).contents
while 1: global start_ts
(task, pid, cpu, flags, ts, msg) = b.trace_fields() global prev_ts
(ret_s, filename) = msg.split(" ", 1) global delta
global cont
ret = int(ret_s)
if (args.failed and (ret >= 0)):
continue
# split return value into FD and errno columns # split return value into FD and errno columns
if ret >= 0: if event.ret >= 0:
fd_s = ret fd_s = event.ret
err = 0 err = 0
else: else:
fd_s = "-1" fd_s = -1
err = - ret err = - event.ret
if start_ts == 0:
prev_ts = start_ts
if start_ts == 1:
delta = float(delta) + (event.ts - prev_ts)
if (args.failed and (event.ret >= 0)):
start_ts = 1
prev_ts = event.ts
return
# print columns
if args.timestamp: if args.timestamp:
if start_ts == 0: print("%-14.9f" % (delta / 1000000), end="")
start_ts = ts
print("%-14.9f" % (ts - start_ts), end="") print("%-6d %-16s %4d %3d %s" % (event.pid, event.comm,
print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename)) fd_s, err, event.fname))
prev_ts = event.ts
start_ts = 1
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
while 1:
b.kprobe_poll()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment