Commit 4bb6d7fe authored by Paul Chaignon's avatar Paul Chaignon Committed by Sasha Goldshtein

u* tools: automatically detect the language (#1067)

* cc: bcc function to detect the language of a process

bcc_procutils_language looks into /proc/$pid/cmdline, /proc/$pid/exe,
and /proc/$pid/maps to determine the language.
Python wrapper takes a list of candidate languages; if the detected
language is not part of the list, None is returned.

* u* tools: automatically detect the language

Uses the detect_language bcc helper. -l switch can override the
detected language. In uthreads and ucalls, the language can be
overwritten to 'none' to trace pthreads and syscalls respectively.

All tools use the -l switch to set the language, for consistency.
parent 00f662db
...@@ -485,3 +485,59 @@ bool bcc_procutils_exit_mountns(struct ns_cookie *nc) { ...@@ -485,3 +485,59 @@ bool bcc_procutils_exit_mountns(struct ns_cookie *nc) {
return rc; return rc;
} }
/* Detects the following languages + C. */
const char *languages[] = {"java", "python", "ruby", "php", "node"};
const char *language_c = "c";
const int nb_languages = 5;
const char *bcc_procutils_language(int pid) {
char procfilename[22], line[4096], pathname[32], *str;
FILE *procfile;
int i, ret;
/* Look for clues in the absolute path to the executable. */
sprintf(procfilename, "/proc/%ld/exe", (long)pid);
if (realpath(procfilename, line)) {
for (i = 0; i < nb_languages; i++)
if (strstr(line, languages[i]))
return languages[i];
}
sprintf(procfilename, "/proc/%ld/maps", (long)pid);
procfile = fopen(procfilename, "r");
if (!procfile)
return NULL;
/* Look for clues in memory mappings. */
bool libc = false;
do {
char perm[8], dev[8];
long long begin, end, size, inode;
ret = fscanf(procfile, "%llx-%llx %s %llx %s %lld", &begin, &end, perm,
&size, dev, &inode);
if (!fgets(line, sizeof(line), procfile))
break;
if (ret == 6) {
char *mapname = line;
char *newline = strchr(line, '\n');
if (newline)
newline[0] = '\0';
while (isspace(mapname[0])) mapname++;
for (i = 0; i < nb_languages; i++) {
sprintf(pathname, "/lib%s", languages[i]);
if (strstr(mapname, pathname))
return languages[i];
if ((str = strstr(mapname, "libc")) &&
(str[4] == '-' || str[4] == '.'))
libc = true;
}
}
} while (ret && ret != EOF);
fclose(procfile);
/* Return C as the language if libc was found and nothing else. */
return libc ? language_c : NULL;
}
...@@ -41,6 +41,7 @@ int bcc_procutils_each_ksym(bcc_procutils_ksymcb callback, void *payload); ...@@ -41,6 +41,7 @@ int bcc_procutils_each_ksym(bcc_procutils_ksymcb callback, void *payload);
void bcc_procutils_free(const char *ptr); void bcc_procutils_free(const char *ptr);
bool bcc_procutils_enter_mountns(int pid, struct ns_cookie *nc); bool bcc_procutils_enter_mountns(int pid, struct ns_cookie *nc);
bool bcc_procutils_exit_mountns(struct ns_cookie *nc); bool bcc_procutils_exit_mountns(struct ns_cookie *nc);
const char *bcc_procutils_language(int pid);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -134,6 +134,8 @@ lib.bcc_procutils_which_so.restype = ct.POINTER(ct.c_char) ...@@ -134,6 +134,8 @@ lib.bcc_procutils_which_so.restype = ct.POINTER(ct.c_char)
lib.bcc_procutils_which_so.argtypes = [ct.c_char_p, ct.c_int] lib.bcc_procutils_which_so.argtypes = [ct.c_char_p, ct.c_int]
lib.bcc_procutils_free.restype = None lib.bcc_procutils_free.restype = None
lib.bcc_procutils_free.argtypes = [ct.c_void_p] lib.bcc_procutils_free.argtypes = [ct.c_void_p]
lib.bcc_procutils_language.restype = ct.POINTER(ct.c_char)
lib.bcc_procutils_language.argtypes = [ct.c_int]
lib.bcc_resolve_symname.restype = ct.c_int lib.bcc_resolve_symname.restype = ct.c_int
lib.bcc_resolve_symname.argtypes = [ lib.bcc_resolve_symname.argtypes = [
......
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import ctypes as ct
from .libbcc import lib
def _read_cpu_range(path): def _read_cpu_range(path):
cpus = [] cpus = []
...@@ -31,3 +34,8 @@ def get_online_cpus(): ...@@ -31,3 +34,8 @@ def get_online_cpus():
def get_possible_cpus(): def get_possible_cpus():
return _read_cpu_range('/sys/devices/system/cpu/possible') return _read_cpu_range('/sys/devices/system/cpu/possible')
def detect_language(candidates, pid):
res = lib.bcc_procutils_language(pid)
language = ct.cast(res, ct.c_char_p).value.decode()
return language if language in candidates else None
...@@ -36,6 +36,12 @@ using namespace std; ...@@ -36,6 +36,12 @@ using namespace std;
static pid_t spawn_child(void *, bool, bool, int (*)(void *)); static pid_t spawn_child(void *, bool, bool, int (*)(void *));
TEST_CASE("language detection", "[c_api]") {
const char *c = bcc_procutils_language(getpid());
REQUIRE(c);
REQUIRE(string(c).compare("c") == 0);
}
TEST_CASE("shared object resolution", "[c_api]") { TEST_CASE("shared object resolution", "[c_api]") {
char *libm = bcc_procutils_which_so("m", 0); char *libm = bcc_procutils_which_so("m", 0);
REQUIRE(libm); REQUIRE(libm);
......
...@@ -311,14 +311,14 @@ class SmokeTests(TestCase): ...@@ -311,14 +311,14 @@ class SmokeTests(TestCase):
def test_ucalls(self): def test_ucalls(self):
# This attaches a large number (300+) kprobes, which can be slow, # This attaches a large number (300+) kprobes, which can be slow,
# so use an increased timeout value. # so use an increased timeout value.
self.run_with_int("ucalls.py -S %d" % os.getpid(), self.run_with_int("ucalls.py -l none -S %d" % os.getpid(),
timeout=30, kill_timeout=30) timeout=30, kill_timeout=30)
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4") @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uflow(self): def test_uflow(self):
# The Python installed on the Ubuntu buildbot doesn't have USDT # The Python installed on the Ubuntu buildbot doesn't have USDT
# probes, so we can't run uflow. # probes, so we can't run uflow.
# self.run_with_int("uflow.py python %d" % os.getpid()) # self.run_with_int("uflow.py -l python %d" % os.getpid())
pass pass
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4") @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
...@@ -329,7 +329,7 @@ class SmokeTests(TestCase): ...@@ -329,7 +329,7 @@ class SmokeTests(TestCase):
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4") @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uobjnew(self): def test_uobjnew(self):
self.run_with_int("uobjnew.py c %d" % os.getpid()) self.run_with_int("uobjnew.py -l c %d" % os.getpid())
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4") @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_ustat(self): def test_ustat(self):
......
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
# Copyright (c) Catalysts GmbH # Copyright (c) Catalysts GmbH
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
from bcc.utils import get_online_cpus from bcc.utils import get_online_cpus, detect_language
import multiprocessing import multiprocessing
import unittest import unittest
import os
class TestUtils(unittest.TestCase): class TestUtils(unittest.TestCase):
def test_get_online_cpus(self): def test_get_online_cpus(self):
...@@ -13,6 +14,10 @@ class TestUtils(unittest.TestCase): ...@@ -13,6 +14,10 @@ class TestUtils(unittest.TestCase):
self.assertEqual(len(online_cpus), num_cores) self.assertEqual(len(online_cpus), num_cores)
def test_detect_language(self):
candidates = ["java", "ruby", "php", "node", "c", "python"]
language = detect_language(candidates, os.getpid())
self.assertEqual(language, "python")
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -14,8 +14,11 @@ ...@@ -14,8 +14,11 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from bcc import BPF, USDT from bcc import BPF, USDT, utils
from time import sleep from time import sleep
import os
languages = ["java", "python", "ruby", "php"]
examples = """examples: examples = """examples:
./ucalls -l java 185 # trace Java calls and print statistics on ^C ./ucalls -l java 185 # trace Java calls and print statistics on ^C
...@@ -34,8 +37,7 @@ parser = argparse.ArgumentParser( ...@@ -34,8 +37,7 @@ parser = argparse.ArgumentParser(
parser.add_argument("pid", type=int, help="process id to attach to") parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("interval", type=int, nargs='?', parser.add_argument("interval", type=int, nargs='?',
help="print every specified number of seconds") help="print every specified number of seconds")
parser.add_argument("-l", "--language", parser.add_argument("-l", "--language", choices=languages + ["none"],
choices=["java", "python", "ruby", "php"],
help="language to trace (if none, trace syscalls only)") help="language to trace (if none, trace syscalls only)")
parser.add_argument("-T", "--top", type=int, parser.add_argument("-T", "--top", type=int,
help="number of most frequent/slow calls to print") help="number of most frequent/slow calls to print")
...@@ -49,10 +51,14 @@ parser.add_argument("-m", "--milliseconds", action="store_true", ...@@ -49,10 +51,14 @@ parser.add_argument("-m", "--milliseconds", action="store_true",
help="report times in milliseconds (default is microseconds)") help="report times in milliseconds (default is microseconds)")
args = parser.parse_args() args = parser.parse_args()
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
# We assume that the entry and return probes have the same arguments. This is # We assume that the entry and return probes have the same arguments. This is
# the case for Java, Python, Ruby, and PHP. If there's a language where it's # the case for Java, Python, Ruby, and PHP. If there's a language where it's
# not the case, we will need to build a custom correlator from entry to exit. # not the case, we will need to build a custom correlator from entry to exit.
if args.language == "java": if language == "java":
# TODO for JVM entries, we actually have the real length of the class # TODO for JVM entries, we actually have the real length of the class
# and method strings in arg3 and arg5 respectively, so we can insert # and method strings in arg3 and arg5 respectively, so we can insert
# the null terminator in its proper position. # the null terminator in its proper position.
...@@ -60,27 +66,29 @@ if args.language == "java": ...@@ -60,27 +66,29 @@ if args.language == "java":
return_probe = "method__return" return_probe = "method__return"
read_class = "bpf_usdt_readarg(2, ctx, &clazz);" read_class = "bpf_usdt_readarg(2, ctx, &clazz);"
read_method = "bpf_usdt_readarg(4, ctx, &method);" read_method = "bpf_usdt_readarg(4, ctx, &method);"
elif args.language == "python": elif language == "python":
entry_probe = "function__entry" entry_probe = "function__entry"
return_probe = "function__return" return_probe = "function__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);" # filename really read_class = "bpf_usdt_readarg(1, ctx, &clazz);" # filename really
read_method = "bpf_usdt_readarg(2, ctx, &method);" read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "ruby": elif language == "ruby":
# TODO Also probe cmethod__entry and cmethod__return with same arguments # TODO Also probe cmethod__entry and cmethod__return with same arguments
entry_probe = "method__entry" entry_probe = "method__entry"
return_probe = "method__return" return_probe = "method__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);" read_class = "bpf_usdt_readarg(1, ctx, &clazz);"
read_method = "bpf_usdt_readarg(2, ctx, &method);" read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "php": elif language == "php":
entry_probe = "function__entry" entry_probe = "function__entry"
return_probe = "function__return" return_probe = "function__return"
read_class = "bpf_usdt_readarg(4, ctx, &clazz);" read_class = "bpf_usdt_readarg(4, ctx, &clazz);"
read_method = "bpf_usdt_readarg(1, ctx, &method);" read_method = "bpf_usdt_readarg(1, ctx, &method);"
elif not args.language: elif not language or language == "none":
if not args.syscalls: if not args.syscalls:
print("Nothing to do; use -S to trace syscalls.") print("Nothing to do; use -S to trace syscalls.")
exit(1) exit(1)
entry_probe, return_probe, read_class, read_method = ("", "", "", "") entry_probe, return_probe, read_class, read_method = ("", "", "", "")
if language:
language = None
program = """ program = """
#include <linux/ptrace.h> #include <linux/ptrace.h>
...@@ -213,11 +221,11 @@ int syscall_return(struct pt_regs *ctx) { ...@@ -213,11 +221,11 @@ int syscall_return(struct pt_regs *ctx) {
""".replace("READ_CLASS", read_class) \ """.replace("READ_CLASS", read_class) \
.replace("READ_METHOD", read_method) \ .replace("READ_METHOD", read_method) \
.replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \ .replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \
.replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \ .replace("DEFINE_NOLANG", "#define NOLANG" if not language else "") \
.replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \ .replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \
.replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "") .replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "")
if args.language: if language:
usdt = USDT(pid=args.pid) usdt = USDT(pid=args.pid)
usdt.enable_probe_or_bail(entry_probe, "trace_entry") usdt.enable_probe_or_bail(entry_probe, "trace_entry")
if args.latency: if args.latency:
...@@ -278,7 +286,7 @@ def clear_data(): ...@@ -278,7 +286,7 @@ def clear_data():
exit_signaled = False exit_signaled = False
print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." % print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none")) (args.pid, language or "none"))
while True: while True:
try: try:
sleep(args.interval or 99999999) sleep(args.interval or 99999999)
......
...@@ -12,7 +12,7 @@ argdist, biotop, fileslower, and others. ...@@ -12,7 +12,7 @@ argdist, biotop, fileslower, and others.
For example, to trace method call latency in a Java application: For example, to trace method call latency in a Java application:
# ucalls -L -l java $(pidof java) # ucalls -L $(pidof java)
Tracing calls in process 26877 (language: java)... Ctrl-C to quit. Tracing calls in process 26877 (language: java)... Ctrl-C to quit.
METHOD # CALLS TIME (us) METHOD # CALLS TIME (us)
...@@ -48,7 +48,7 @@ Detaching kernel probes, please wait... ...@@ -48,7 +48,7 @@ Detaching kernel probes, please wait...
To print only the top 5 methods and report times in milliseconds (the default To print only the top 5 methods and report times in milliseconds (the default
is microseconds): is microseconds):
# ucalls -l python -mT 5 $(pidof python) # ucalls -mT 5 $(pidof python)
Tracing calls in process 26914 (language: python)... Ctrl-C to quit. Tracing calls in process 26914 (language: python)... Ctrl-C to quit.
METHOD # CALLS METHOD # CALLS
...@@ -60,7 +60,8 @@ METHOD # CALLS ...@@ -60,7 +60,8 @@ METHOD # CALLS
USAGE message: USAGE message:
# ./ucalls.py -h # ./ucalls.py -h
usage: ucalls.py [-h] [-l {java,python,ruby,php}] [-T TOP] [-L] [-S] [-v] [-m] usage: ucalls.py [-h] [-l {java,python,ruby,php,none}] [-T TOP] [-L] [-S] [-v]
[-m]
pid [interval] pid [interval]
Summarize method calls in high-level languages. Summarize method calls in high-level languages.
...@@ -71,7 +72,7 @@ positional arguments: ...@@ -71,7 +72,7 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-l {java,python,ruby,php}, --language {java,python,ruby,php} -l {java,python,ruby,php,none}, --language {java,python,ruby,php,none}
language to trace (if none, trace syscalls only) language to trace (if none, trace syscalls only)
-T TOP, --top TOP number of most frequent/slow calls to print -T TOP, --top TOP number of most frequent/slow calls to print
-L, --latency record method latency from enter to exit (except -L, --latency record method latency from enter to exit (except
......
...@@ -13,21 +13,24 @@ ...@@ -13,21 +13,24 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from bcc import BPF, USDT from bcc import BPF, USDT, utils
import ctypes as ct import ctypes as ct
import time import time
import os
languages = ["java", "python", "ruby", "php"]
examples = """examples: examples = """examples:
./uflow java 185 # trace Java method calls in process 185 ./uflow -l java 185 # trace Java method calls in process 185
./uflow ruby 1344 # trace Ruby method calls in process 1344 ./uflow -l ruby 134 # trace Ruby method calls in process 134
./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods ./uflow -M indexOf -l java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' python 180 # trace only REPL-defined methods ./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Trace method execution flow in high-level languages.", description="Trace method execution flow in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
parser.add_argument("language", choices=["java", "python", "ruby", "php"], parser.add_argument("-l", "--language", choices=languages,
help="language to trace") help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to") parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-M", "--method", parser.add_argument("-M", "--method",
...@@ -113,21 +116,25 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return): ...@@ -113,21 +116,25 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return):
usdt = USDT(pid=args.pid) usdt = USDT(pid=args.pid)
if args.language == "java": language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
if language == "java":
enable_probe("method__entry", "java_entry", enable_probe("method__entry", "java_entry",
"bpf_usdt_readarg(2, ctx, &clazz);", "bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=False) "bpf_usdt_readarg(4, ctx, &method);", is_return=False)
enable_probe("method__return", "java_return", enable_probe("method__return", "java_return",
"bpf_usdt_readarg(2, ctx, &clazz);", "bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=True) "bpf_usdt_readarg(4, ctx, &method);", is_return=True)
elif args.language == "python": elif language == "python":
enable_probe("function__entry", "python_entry", enable_probe("function__entry", "python_entry",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really "bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=False) "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
enable_probe("function__return", "python_return", enable_probe("function__return", "python_return",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really "bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=True) "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "ruby": elif language == "ruby":
enable_probe("method__entry", "ruby_entry", enable_probe("method__entry", "ruby_entry",
"bpf_usdt_readarg(1, ctx, &clazz);", "bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=False) "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
...@@ -140,13 +147,16 @@ elif args.language == "ruby": ...@@ -140,13 +147,16 @@ elif args.language == "ruby":
enable_probe("cmethod__return", "ruby_creturn", enable_probe("cmethod__return", "ruby_creturn",
"bpf_usdt_readarg(1, ctx, &clazz);", "bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=True) "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "php": elif language == "php":
enable_probe("function__entry", "php_entry", enable_probe("function__entry", "php_entry",
"bpf_usdt_readarg(4, ctx, &clazz);", "bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=False) "bpf_usdt_readarg(1, ctx, &method);", is_return=False)
enable_probe("function__return", "php_return", enable_probe("function__return", "php_return",
"bpf_usdt_readarg(4, ctx, &clazz);", "bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=True) "bpf_usdt_readarg(1, ctx, &method);", is_return=True)
else:
print("No language detected; use -l to trace a language.")
exit(1)
if args.verbose: if args.verbose:
print(usdt.get_text()) print(usdt.get_text())
...@@ -154,7 +164,7 @@ if args.verbose: ...@@ -154,7 +164,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt]) bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing method calls in %s process %d... Ctrl-C to quit." % print("Tracing method calls in %s process %d... Ctrl-C to quit." %
(args.language, args.pid)) (language, args.pid))
print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD")) print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD"))
class CallEvent(ct.Structure): class CallEvent(ct.Structure):
......
...@@ -10,7 +10,7 @@ method invocations. ...@@ -10,7 +10,7 @@ method invocations.
For example, trace all Ruby method calls in a specific process: For example, trace all Ruby method calls in a specific process:
# ./uflow ruby 27245 # ./uflow -l ruby 27245
Tracing method calls in ruby process 27245... Ctrl-C to quit. Tracing method calls in ruby process 27245... Ctrl-C to quit.
CPU PID TID TIME(us) METHOD CPU PID TID TIME(us) METHOD
3 27245 27245 4.536 <- IO.gets 3 27245 27245 4.536 <- IO.gets
...@@ -34,7 +34,7 @@ and the <- and -> arrows indicate the direction of the event (exit or entry). ...@@ -34,7 +34,7 @@ and the <- and -> arrows indicate the direction of the event (exit or entry).
Often, the amount of output can be overwhelming. You can filter specific Often, the amount of output can be overwhelming. You can filter specific
classes or methods. For example, trace only methods from the Thread class: classes or methods. For example, trace only methods from the Thread class:
# ./uflow -C java/lang/Thread java $(pidof java) # ./uflow -C java/lang/Thread $(pidof java)
Tracing method calls in java process 27722... Ctrl-C to quit. Tracing method calls in java process 27722... Ctrl-C to quit.
CPU PID TID TIME(us) METHOD CPU PID TID TIME(us) METHOD
3 27722 27731 3.144 -> java/lang/Thread.<init> 3 27722 27731 3.144 -> java/lang/Thread.<init>
...@@ -88,17 +88,18 @@ thread running on the same CPU. ...@@ -88,17 +88,18 @@ thread running on the same CPU.
USAGE message: USAGE message:
# ./uflow -h # ./uflow -h
usage: uflow.py [-h] [-M METHOD] [-C CLAZZ] [-v] {java,python,ruby,php} pid usage: uflow.py [-h] [-l {java,python,ruby,php}] [-M METHOD] [-C CLAZZ] [-v]
pid
Trace method execution flow in high-level languages. Trace method execution flow in high-level languages.
positional arguments: positional arguments:
{java,python,ruby,php}
language to trace
pid process id to attach to pid process id to attach to
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-l {java,python,ruby,php}, --language {java,python,ruby,php}
language to trace
-M METHOD, --method METHOD -M METHOD, --method METHOD
trace only calls to methods starting with this prefix trace only calls to methods starting with this prefix
-C CLAZZ, --class CLAZZ -C CLAZZ, --class CLAZZ
...@@ -107,7 +108,7 @@ optional arguments: ...@@ -107,7 +108,7 @@ optional arguments:
purposes) purposes)
examples: examples:
./uflow java 185 # trace Java method calls in process 185 ./uflow -l java 185 # trace Java method calls in process 185
./uflow ruby 1344 # trace Ruby method calls in process 1344 ./uflow -l ruby 134 # trace Ruby method calls in process 134
./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods ./uflow -M indexOf -l java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' python 180 # trace only REPL-defined methods ./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
...@@ -13,20 +13,23 @@ ...@@ -13,20 +13,23 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from bcc import BPF, USDT from bcc import BPF, USDT, utils
import ctypes as ct import ctypes as ct
import time import time
import os
languages = ["java", "python", "ruby", "node"]
examples = """examples: examples = """examples:
./ugc java 185 # trace Java GCs in process 185 ./ugc -l java 185 # trace Java GCs in process 185
./ugc ruby 1344 -m # trace Ruby GCs reporting in ms ./ugc -l ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 java 185 # trace only Java GCs longer than 10ms ./ugc -M 10 -l java 185 # trace only Java GCs longer than 10ms
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Summarize garbage collection events in high-level languages.", description="Summarize garbage collection events in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
parser.add_argument("language", choices=["java", "python", "ruby", "node"], parser.add_argument("-l", "--language", choices=languages,
help="language to trace") help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to") parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-v", "--verbose", action="store_true", parser.add_argument("-v", "--verbose", action="store_true",
...@@ -111,10 +114,14 @@ int trace_%s(struct pt_regs *ctx) { ...@@ -111,10 +114,14 @@ int trace_%s(struct pt_regs *ctx) {
probes = [] probes = []
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
# #
# Java # Java
# #
if args.language == "java": if language == "java":
# Oddly, the gc__begin/gc__end probes don't really have any useful # Oddly, the gc__begin/gc__end probes don't really have any useful
# information, while the mem__pool* ones do. There's also a bunch of # information, while the mem__pool* ones do. There's also a bunch of
# probes described in the hotspot_gc*.stp file which aren't there # probes described in the hotspot_gc*.stp file which aren't there
...@@ -145,7 +152,7 @@ if args.language == "java": ...@@ -145,7 +152,7 @@ if args.language == "java":
# #
# Python # Python
# #
elif args.language == "python": elif language == "python":
begin_save = """ begin_save = """
int gen = 0; int gen = 0;
bpf_usdt_readarg(1, ctx, &gen); bpf_usdt_readarg(1, ctx, &gen);
...@@ -166,7 +173,7 @@ elif args.language == "python": ...@@ -166,7 +173,7 @@ elif args.language == "python":
# #
# Ruby # Ruby
# #
elif args.language == "ruby": elif language == "ruby":
# Ruby GC probes do not have any additional information available. # Ruby GC probes do not have any additional information available.
probes.append(Probe("gc__mark__begin", "gc__mark__end", probes.append(Probe("gc__mark__begin", "gc__mark__end",
"", "", lambda _: "GC mark stage")) "", "", lambda _: "GC mark stage"))
...@@ -175,7 +182,7 @@ elif args.language == "ruby": ...@@ -175,7 +182,7 @@ elif args.language == "ruby":
# #
# Node # Node
# #
elif args.language == "node": elif language == "node":
end_save = """ end_save = """
u32 gc_type = 0; u32 gc_type = 0;
bpf_usdt_readarg(1, ctx, &gc_type); bpf_usdt_readarg(1, ctx, &gc_type);
...@@ -188,6 +195,11 @@ elif args.language == "node": ...@@ -188,6 +195,11 @@ elif args.language == "node":
[desc for desc, val in descs.items() [desc for desc, val in descs.items()
if e.field1 & val != 0]))) if e.field1 & val != 0])))
else:
print("No language detected; use -l to trace a language.")
exit(1)
for probe in probes: for probe in probes:
program += probe.generate() program += probe.generate()
probe.attach() probe.attach()
...@@ -198,7 +210,7 @@ if args.verbose: ...@@ -198,7 +210,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt]) bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing garbage collections in %s process %d... Ctrl-C to quit." % print("Tracing garbage collections in %s process %d... Ctrl-C to quit." %
(args.language, args.pid)) (language, args.pid))
time_col = "TIME (ms)" if args.milliseconds else "TIME (us)" time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
print("%-8s %-8s %-40s" % ("START", time_col, "DESCRIPTION")) print("%-8s %-8s %-40s" % ("START", time_col, "DESCRIPTION"))
......
...@@ -8,7 +8,7 @@ the GC event is also provided. ...@@ -8,7 +8,7 @@ the GC event is also provided.
For example, to trace all garbage collection events in a specific Node process: For example, to trace all garbage collection events in a specific Node process:
# ugc node $(pidof node) # ugc $(pidof node)
Tracing garbage collections in node process 30012... Ctrl-C to quit. Tracing garbage collections in node process 30012... Ctrl-C to quit.
START TIME (us) DESCRIPTION START TIME (us) DESCRIPTION
1.500 1181.00 GC scavenge 1.500 1181.00 GC scavenge
...@@ -44,7 +44,7 @@ Occasionally, it might be useful to filter out collections that are very short, ...@@ -44,7 +44,7 @@ Occasionally, it might be useful to filter out collections that are very short,
or display only collections that have a specific description. The -M and -F or display only collections that have a specific description. The -M and -F
switches can be useful for this: switches can be useful for this:
# ugc -F Tenured java $(pidof java) # ugc -F Tenured $(pidof java)
Tracing garbage collections in java process 29907... Ctrl-C to quit. Tracing garbage collections in java process 29907... Ctrl-C to quit.
START TIME (us) DESCRIPTION START TIME (us) DESCRIPTION
0.360 4309.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256 0.360 4309.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
...@@ -52,7 +52,7 @@ START TIME (us) DESCRIPTION ...@@ -52,7 +52,7 @@ START TIME (us) DESCRIPTION
4.648 4139.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256 4.648 4139.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
^C ^C
# ugc -M 1 java $(pidof java) # ugc -M 1 $(pidof java)
Tracing garbage collections in java process 29907... Ctrl-C to quit. Tracing garbage collections in java process 29907... Ctrl-C to quit.
START TIME (us) DESCRIPTION START TIME (us) DESCRIPTION
0.160 3715.00 MarkSweepCompact Code Cache used=287528->3209472 max=173408256->251658240 0.160 3715.00 MarkSweepCompact Code Cache used=287528->3209472 max=173408256->251658240
...@@ -68,18 +68,19 @@ START TIME (us) DESCRIPTION ...@@ -68,18 +68,19 @@ START TIME (us) DESCRIPTION
USAGE message: USAGE message:
# ugc -h # ugc -h
usage: ugc.py [-h] [-v] [-m] [-M MINIMUM] [-F FILTER] usage: ugc.py [-h] [-l {java,python,ruby,node}] [-v] [-m] [-M MINIMUM]
{java,python,ruby,node} pid [-F FILTER]
pid
Summarize garbage collection events in high-level languages. Summarize garbage collection events in high-level languages.
positional arguments: positional arguments:
{java,python,ruby,node}
language to trace
pid process id to attach to pid process id to attach to
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-l {java,python,ruby,node}, --language {java,python,ruby,node}
language to trace
-v, --verbose verbose mode: print the BPF program (for debugging -v, --verbose verbose mode: print the BPF program (for debugging
purposes) purposes)
-m, --milliseconds report times in milliseconds (default is microseconds) -m, --milliseconds report times in milliseconds (default is microseconds)
...@@ -89,6 +90,6 @@ optional arguments: ...@@ -89,6 +90,6 @@ optional arguments:
display only GCs whose description contains this text display only GCs whose description contains this text
examples: examples:
./ugc java 185 # trace Java GCs in process 185 ./ugc -l java 185 # trace Java GCs in process 185
./ugc ruby 1344 -m # trace Ruby GCs reporting in ms ./ugc -l ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 java 185 # trace only Java GCs longer than 10ms ./ugc -M 10 -l java 185 # trace only Java GCs longer than 10ms
...@@ -13,20 +13,24 @@ ...@@ -13,20 +13,24 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from bcc import BPF, USDT from bcc import BPF, USDT, utils
from time import sleep from time import sleep
import os
# C needs to be the last language.
languages = ["java", "ruby", "c"]
examples = """examples: examples = """examples:
./uobjnew java 145 # summarize Java allocations in process 145 ./uobjnew -l java 145 # summarize Java allocations in process 145
./uobjnew c 2020 1 # grab malloc() sizes and print every second ./uobjnew -l c 2020 1 # grab malloc() sizes and print every second
./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations ./uobjnew -l ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size ./uobjnew -l ruby 6712 -S 10 # top 10 Ruby types by total size
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Summarize object allocations in high-level languages.", description="Summarize object allocations in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
parser.add_argument("language", choices=["java", "ruby", "c"], parser.add_argument("-l", "--language", choices=languages,
help="language to trace") help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to") parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("interval", type=int, nargs='?', parser.add_argument("interval", type=int, nargs='?',
...@@ -39,6 +43,10 @@ parser.add_argument("-v", "--verbose", action="store_true", ...@@ -39,6 +43,10 @@ parser.add_argument("-v", "--verbose", action="store_true",
help="verbose mode: print the BPF program (for debugging purposes)") help="verbose mode: print the BPF program (for debugging purposes)")
args = parser.parse_args() args = parser.parse_args()
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
program = """ program = """
#include <linux/ptrace.h> #include <linux/ptrace.h>
...@@ -56,14 +64,14 @@ struct val_t { ...@@ -56,14 +64,14 @@ struct val_t {
}; };
BPF_HASH(allocs, struct key_t, struct val_t); BPF_HASH(allocs, struct key_t, struct val_t);
""".replace("MALLOC_TRACING", "1" if args.language == "c" else "0") """.replace("MALLOC_TRACING", "1" if language == "c" else "0")
usdt = USDT(pid=args.pid) usdt = USDT(pid=args.pid)
# #
# Java # Java
# #
if args.language == "java": if language == "java":
program += """ program += """
int alloc_entry(struct pt_regs *ctx) { int alloc_entry(struct pt_regs *ctx) {
struct key_t key = {}; struct key_t key = {};
...@@ -82,7 +90,7 @@ int alloc_entry(struct pt_regs *ctx) { ...@@ -82,7 +90,7 @@ int alloc_entry(struct pt_regs *ctx) {
# #
# Ruby # Ruby
# #
elif args.language == "ruby": elif language == "ruby":
create_template = """ create_template = """
int THETHING_alloc_entry(struct pt_regs *ctx) { int THETHING_alloc_entry(struct pt_regs *ctx) {
struct key_t key = { .name = "THETHING" }; struct key_t key = { .name = "THETHING" };
...@@ -115,7 +123,7 @@ int object_alloc_entry(struct pt_regs *ctx) { ...@@ -115,7 +123,7 @@ int object_alloc_entry(struct pt_regs *ctx) {
# #
# C # C
# #
elif args.language == "c": elif language == "c":
program += """ program += """
int alloc_entry(struct pt_regs *ctx, size_t size) { int alloc_entry(struct pt_regs *ctx, size_t size) {
struct key_t key = {}; struct key_t key = {};
...@@ -128,18 +136,23 @@ int alloc_entry(struct pt_regs *ctx, size_t size) { ...@@ -128,18 +136,23 @@ int alloc_entry(struct pt_regs *ctx, size_t size) {
} }
""" """
else:
print("No language detected; use -l to trace a language.")
exit(1)
if args.verbose: if args.verbose:
print(usdt.get_text()) print(usdt.get_text())
print(program) print(program)
bpf = BPF(text=program, usdt_contexts=[usdt]) bpf = BPF(text=program, usdt_contexts=[usdt])
if args.language == "c": if language == "c":
bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry", bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry",
pid=args.pid) pid=args.pid)
exit_signaled = False exit_signaled = False
print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." % print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none")) (args.pid, language or "none"))
while True: while True:
try: try:
sleep(args.interval or 99999999) sleep(args.interval or 99999999)
...@@ -157,7 +170,7 @@ while True: ...@@ -157,7 +170,7 @@ while True:
data = sorted(data.items(), key=lambda kv: kv[1].total_size) data = sorted(data.items(), key=lambda kv: kv[1].total_size)
print("%-30s %8s %12s" % ("TYPE", "# ALLOCS", "# BYTES")) print("%-30s %8s %12s" % ("TYPE", "# ALLOCS", "# BYTES"))
for key, value in data: for key, value in data:
if args.language == "c": if language == "c":
obj_type = "block size %d" % key.size obj_type = "block size %d" % key.size
else: else:
obj_type = key.name obj_type = key.name
......
...@@ -9,7 +9,7 @@ can in turn cause heavy garbage collection. ...@@ -9,7 +9,7 @@ can in turn cause heavy garbage collection.
For example, trace Ruby object allocations when running some simple commands For example, trace Ruby object allocations when running some simple commands
in irb (the Ruby REPL): in irb (the Ruby REPL):
# ./uobjnew ruby 27245 # ./uobjnew -l ruby 27245
Tracing allocations in process 27245 (language: ruby)... Ctrl-C to quit. Tracing allocations in process 27245 (language: ruby)... Ctrl-C to quit.
TYPE # ALLOCS # BYTES TYPE # ALLOCS # BYTES
...@@ -28,7 +28,7 @@ Plain C/C++ allocations (through "malloc") are also supported. We can't report ...@@ -28,7 +28,7 @@ Plain C/C++ allocations (through "malloc") are also supported. We can't report
the type being allocated, but we can report the object sizes at least. Also, the type being allocated, but we can report the object sizes at least. Also,
print only the top 10 rows by number of bytes allocated: print only the top 10 rows by number of bytes allocated:
# ./uobjnew -S 10 c 27245 # ./uobjnew -S 10 -l c 27245
Tracing allocations in process 27245 (language: c)... Ctrl-C to quit. Tracing allocations in process 27245 (language: c)... Ctrl-C to quit.
TYPE # ALLOCS # BYTES TYPE # ALLOCS # BYTES
...@@ -48,18 +48,19 @@ block size 80 569 45520 ...@@ -48,18 +48,19 @@ block size 80 569 45520
USAGE message: USAGE message:
# ./uobjnew -h # ./uobjnew -h
usage: uobjnew.py [-h] [-C TOP_COUNT] [-S TOP_SIZE] [-v] usage: uobjnew.py [-h] [-l {java,ruby,c}] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
{java,ruby,c} pid [interval] pid [interval]
Summarize object allocations in high-level languages. Summarize object allocations in high-level languages.
positional arguments: positional arguments:
{java,ruby,c} language to trace
pid process id to attach to pid process id to attach to
interval print every specified number of seconds interval print every specified number of seconds
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-l {java,ruby,c}, --language {java,ruby,c}
language to trace
-C TOP_COUNT, --top-count TOP_COUNT -C TOP_COUNT, --top-count TOP_COUNT
number of most frequently allocated types to print number of most frequently allocated types to print
-S TOP_SIZE, --top-size TOP_SIZE -S TOP_SIZE, --top-size TOP_SIZE
...@@ -68,7 +69,7 @@ optional arguments: ...@@ -68,7 +69,7 @@ optional arguments:
purposes) purposes)
examples: examples:
./uobjnew java 145 # summarize Java allocations in process 145 ./uobjnew -l java 145 # summarize Java allocations in process 145
./uobjnew c 2020 1 # grab malloc() sizes and print every second ./uobjnew -l c 2020 1 # grab malloc() sizes and print every second
./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations ./uobjnew -l ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size ./uobjnew -l ruby 6712 -S 10 # top 10 Ruby types by total size
...@@ -13,9 +13,12 @@ ...@@ -13,9 +13,12 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from bcc import BPF, USDT from bcc import BPF, USDT, utils
import ctypes as ct import ctypes as ct
import time import time
import os
languages = ["java"]
examples = """examples: examples = """examples:
./uthreads -l java 185 # trace Java threads in process 185 ./uthreads -l java 185 # trace Java threads in process 185
...@@ -26,7 +29,7 @@ parser = argparse.ArgumentParser( ...@@ -26,7 +29,7 @@ parser = argparse.ArgumentParser(
"high-level languages.", "high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
parser.add_argument("-l", "--language", choices=["java"], parser.add_argument("-l", "--language", choices=languages + ["none"],
help="language to trace (none for pthreads only)") help="language to trace (none for pthreads only)")
parser.add_argument("pid", type=int, help="process id to attach to") parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-v", "--verbose", action="store_true", parser.add_argument("-v", "--verbose", action="store_true",
...@@ -59,7 +62,11 @@ int trace_pthread(struct pt_regs *ctx) { ...@@ -59,7 +62,11 @@ int trace_pthread(struct pt_regs *ctx) {
""" """
usdt.enable_probe_or_bail("pthread_start", "trace_pthread") usdt.enable_probe_or_bail("pthread_start", "trace_pthread")
if args.language == "java": language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
if language == "java":
template = """ template = """
int %s(struct pt_regs *ctx) { int %s(struct pt_regs *ctx) {
char type[] = "%s"; char type[] = "%s";
...@@ -87,7 +94,7 @@ if args.verbose: ...@@ -87,7 +94,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt]) bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing thread events in process %d (language: %s)... Ctrl-C to quit." % print("Tracing thread events in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none")) (args.pid, language or "none"))
print("%-8s %-16s %-8s %-30s" % ("TIME", "ID", "TYPE", "DESCRIPTION")) print("%-8s %-16s %-8s %-30s" % ("TIME", "ID", "TYPE", "DESCRIPTION"))
class ThreadEvent(ct.Structure): class ThreadEvent(ct.Structure):
......
...@@ -39,7 +39,7 @@ instead. ...@@ -39,7 +39,7 @@ instead.
USAGE message: USAGE message:
# ./uthreads -h # ./uthreads -h
usage: uthreads.py [-h] [-l {java}] [-v] pid usage: uthreads.py [-h] [-l {java,none}] [-v] pid
Trace thread creation/destruction events in high-level languages. Trace thread creation/destruction events in high-level languages.
...@@ -48,7 +48,7 @@ positional arguments: ...@@ -48,7 +48,7 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-l {java}, --language {java} -l {java,none}, --language {java,none}
language to trace (none for pthreads only) language to trace (none for pthreads only)
-v, --verbose verbose mode: print the BPF program (for debugging -v, --verbose verbose mode: print the BPF program (for debugging
purposes) purposes)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment