Commit f50ca1fa authored by Brenden Blanco's avatar Brenden Blanco

Merge pull request #316 from brendangregg/master

offcputime improvements
parents d940b296 670a6aa8
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
.SH NAME .SH NAME
offcputime \- Summarize off-CPU time by kernel stack trace. Uses Linux eBPF/bcc. offcputime \- Summarize off-CPU time by kernel stack trace. Uses Linux eBPF/bcc.
.SH SYNOPSIS .SH SYNOPSIS
.B offcputime [\-h] [\-p PID] [\-i INTERVAL] [\-T] [duration] .B offcputime [\-h] [\-u] [\-p PID] [\-i INTERVAL] [\-T] [duration]
.SH DESCRIPTION .SH DESCRIPTION
This program shows kernel stack traces and task names that were blocked and This program shows kernel stack traces and task names that were blocked and
"off-CPU", and the total duration they were blocked: their "off-CPU time". "off-CPU", and the total duration they were blocked: their "off-CPU time".
...@@ -33,6 +33,9 @@ CONFIG_BPF and bcc. ...@@ -33,6 +33,9 @@ CONFIG_BPF and bcc.
\-h \-h
Print usage message. Print usage message.
.TP .TP
\-u
Only trace user threads (not kernel threads).
.TP
\-v \-v
Show raw addresses. Show raw addresses.
.TP .TP
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# offcputime Summarize off-CPU time by kernel stack trace # offcputime Summarize off-CPU time by kernel stack trace
# For Linux, uses BCC, eBPF. # For Linux, uses BCC, eBPF.
# #
# USAGE: offcputime [-h] [-p PID] [-i INTERVAL] [-T] [duration] # USAGE: offcputime [-h] [-u] [-p PID] [-i INTERVAL] [-T] [duration]
# #
# The current implementation uses an unrolled loop for x86_64, and was written # The current implementation uses an unrolled loop for x86_64, and was written
# as a proof of concept. This implementation should be replaced in the future # as a proof of concept. This implementation should be replaced in the future
...@@ -27,12 +27,15 @@ examples = """examples: ...@@ -27,12 +27,15 @@ examples = """examples:
./offcputime # trace off-CPU stack time until Ctrl-C ./offcputime # trace off-CPU stack time until Ctrl-C
./offcputime 5 # trace for 5 seconds only ./offcputime 5 # trace for 5 seconds only
./offcputime -f 5 # 5 seconds, and output in folded format ./offcputime -f 5 # 5 seconds, and output in folded format
./offcputime -u # don't include kernel threads (user only)
./offcputime -p 185 # trace fo PID 185 only ./offcputime -p 185 # trace fo PID 185 only
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Summarize off-CPU time by kernel stack trace", description="Summarize off-CPU time by kernel stack trace",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
parser.add_argument("-u", "--useronly", action="store_true",
help="user threads only (no kernel threads)")
parser.add_argument("-p", "--pid", parser.add_argument("-p", "--pid",
help="trace this PID only") help="trace this PID only")
parser.add_argument("-v", "--verbose", action="store_true", parser.add_argument("-v", "--verbose", action="store_true",
...@@ -46,12 +49,15 @@ folded = args.folded ...@@ -46,12 +49,15 @@ folded = args.folded
duration = int(args.duration) duration = int(args.duration)
debug = 0 debug = 0
maxdepth = 20 # and MAXDEPTH maxdepth = 20 # and MAXDEPTH
if args.pid and args.useronly:
print("ERROR: use either -p or -u.")
exit()
# signal handler # signal handler
def signal_ignore(signal, frame): def signal_ignore(signal, frame):
print() print()
# load BPF program # define BPF program
bpf_text = """ bpf_text = """
#include <uapi/linux/ptrace.h> #include <uapi/linux/ptrace.h>
#include <linux/sched.h> #include <linux/sched.h>
...@@ -82,32 +88,32 @@ static u64 get_frame(u64 *bp) { ...@@ -82,32 +88,32 @@ static u64 get_frame(u64 *bp) {
return 0; return 0;
} }
int offcpu(struct pt_regs *ctx) { int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
u32 pid = bpf_get_current_pid_tgid(); u32 pid;
u64 ts = bpf_ktime_get_ns(); u64 ts, *tsp;
FILTER
start.update(&pid, &ts);
return 0;
}
int oncpu(struct pt_regs *ctx) { // record previous thread sleep time
u32 pid = bpf_get_current_pid_tgid(); if (FILTER) {
FILTER pid = prev->pid;
u64 ts = bpf_ktime_get_ns(); ts = bpf_ktime_get_ns();
struct key_t key = {}; start.update(&pid, &ts);
u64 zero = 0, *val, bp = 0, *tsp, delta; }
int depth = 0;
// calculate delta time // calculate current thread's delta time
pid = bpf_get_current_pid_tgid();
tsp = start.lookup(&pid); tsp = start.lookup(&pid);
if (tsp == 0) if (tsp == 0)
return 0; // missed start return 0; // missed start or filtered
delta = bpf_ktime_get_ns() - *tsp; u64 delta = bpf_ktime_get_ns() - *tsp;
start.delete(&pid); start.delete(&pid);
delta = delta / 1000; delta = delta / 1000;
if (delta < MINBLOCK_US) if (delta < MINBLOCK_US)
return 0; return 0;
// create map key
u64 zero = 0, *val, bp = 0;
int depth = 0;
struct key_t key = {};
bpf_get_current_comm(&key.name, sizeof(key.name)); bpf_get_current_comm(&key.name, sizeof(key.name));
bp = ctx->bp; bp = ctx->bp;
...@@ -141,14 +147,17 @@ out: ...@@ -141,14 +147,17 @@ out:
} }
""" """
if args.pid: if args.pid:
bpf_text = bpf_text.replace('FILTER', filter = 'pid == %s' % args.pid
'if (pid != %s) { return 0; }' % (args.pid)) elif args.useronly:
filter = '!(prev->flags & PF_KTHREAD)'
else: else:
bpf_text = bpf_text.replace('FILTER', '') filter = '1'
bpf_text = bpf_text.replace('FILTER', filter)
if debug: if debug:
print(bpf_text) print(bpf_text)
# initialize BPF
b = BPF(text=bpf_text) b = BPF(text=bpf_text)
b.attach_kprobe(event="schedule", fn_name="offcpu")
b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") b.attach_kprobe(event="finish_task_switch", fn_name="oncpu")
matched = b.num_open_kprobes() matched = b.num_open_kprobes()
if matched == 0: if matched == 0:
......
...@@ -723,8 +723,8 @@ creating your "off-CPU time flame graphs". ...@@ -723,8 +723,8 @@ creating your "off-CPU time flame graphs".
USAGE message: USAGE message:
./offcputime --help # ./offcputime -h
usage: offcputime [-h] [-p PID] [-v] [-f] [duration] usage: offcputime [-h] [-u] [-p PID] [-v] [-f] [duration]
Summarize off-CPU time by kernel stack trace Summarize off-CPU time by kernel stack trace
...@@ -733,6 +733,7 @@ positional arguments: ...@@ -733,6 +733,7 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-u, --useronly user threads only (no kernel threads)
-p PID, --pid PID trace this PID only -p PID, --pid PID trace this PID only
-v, --verbose show raw addresses -v, --verbose show raw addresses
-f, --folded output folded format -f, --folded output folded format
...@@ -741,4 +742,5 @@ examples: ...@@ -741,4 +742,5 @@ examples:
./offcputime # trace off-CPU stack time until Ctrl-C ./offcputime # trace off-CPU stack time until Ctrl-C
./offcputime 5 # trace for 5 seconds only ./offcputime 5 # trace for 5 seconds only
./offcputime -f 5 # 5 seconds, and output in folded format ./offcputime -f 5 # 5 seconds, and output in folded format
./offcputime -u # don't include kernel threads (user only)
./offcputime -p 185 # trace fo PID 185 only ./offcputime -p 185 # trace fo PID 185 only
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment